In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

# Loading Dataset

In [4]:
df = pd.read_csv('../../Datasets/car price/CarPrice_Assignment.csv')
df.head(5)

Unnamed: 0,car_ID,symboling,CarName,fueltype,aspiration,doornumber,carbody,drivewheel,enginelocation,wheelbase,...,enginesize,fuelsystem,boreratio,stroke,compressionratio,horsepower,peakrpm,citympg,highwaympg,price
0,1,3,alfa-romero giulia,gas,std,two,convertible,rwd,front,88.6,...,130,mpfi,3.47,2.68,9.0,111,5000,21,27,13495.0
1,2,3,alfa-romero stelvio,gas,std,two,convertible,rwd,front,88.6,...,130,mpfi,3.47,2.68,9.0,111,5000,21,27,16500.0
2,3,1,alfa-romero Quadrifoglio,gas,std,two,hatchback,rwd,front,94.5,...,152,mpfi,2.68,3.47,9.0,154,5000,19,26,16500.0
3,4,2,audi 100 ls,gas,std,four,sedan,fwd,front,99.8,...,109,mpfi,3.19,3.4,10.0,102,5500,24,30,13950.0
4,5,2,audi 100ls,gas,std,four,sedan,4wd,front,99.4,...,136,mpfi,3.19,3.4,8.0,115,5500,18,22,17450.0


# Cleaning Dataset

In [15]:
print(df.columns)

Index(['car_ID', 'symboling', 'CarName', 'fueltype', 'aspiration',
       'doornumber', 'carbody', 'drivewheel', 'enginelocation', 'wheelbase',
       'carlength', 'carwidth', 'carheight', 'curbweight', 'enginetype',
       'cylindernumber', 'enginesize', 'fuelsystem', 'boreratio', 'stroke',
       'compressionratio', 'horsepower', 'peakrpm', 'citympg', 'highwaympg',
       'price'],
      dtype='object')


In [16]:
df = df.drop('car_ID', axis=1)
df = df.drop('CarName', axis=1)
df.head(5)

Unnamed: 0,symboling,fueltype,aspiration,doornumber,carbody,drivewheel,enginelocation,wheelbase,carlength,carwidth,...,enginesize,fuelsystem,boreratio,stroke,compressionratio,horsepower,peakrpm,citympg,highwaympg,price
0,3,gas,std,two,convertible,rwd,front,88.6,168.8,64.1,...,130,mpfi,3.47,2.68,9.0,111,5000,21,27,13495.0
1,3,gas,std,two,convertible,rwd,front,88.6,168.8,64.1,...,130,mpfi,3.47,2.68,9.0,111,5000,21,27,16500.0
2,1,gas,std,two,hatchback,rwd,front,94.5,171.2,65.5,...,152,mpfi,2.68,3.47,9.0,154,5000,19,26,16500.0
3,2,gas,std,four,sedan,fwd,front,99.8,176.6,66.2,...,109,mpfi,3.19,3.4,10.0,102,5500,24,30,13950.0
4,2,gas,std,four,sedan,4wd,front,99.4,176.6,66.4,...,136,mpfi,3.19,3.4,8.0,115,5500,18,22,17450.0


In [35]:
for col in df.columns:
    if df[col].dtype == 'object':
        df = df.drop(col, axis=1)

In [36]:
df.head(5)

Unnamed: 0,symboling,wheelbase,carlength,carwidth,carheight,curbweight,enginesize,boreratio,stroke,compressionratio,horsepower,peakrpm,citympg,highwaympg,price
0,3,88.6,168.8,64.1,48.8,2548,130,3.47,2.68,9.0,111,5000,21,27,13495.0
1,3,88.6,168.8,64.1,48.8,2548,130,3.47,2.68,9.0,111,5000,21,27,16500.0
2,1,94.5,171.2,65.5,52.4,2823,152,2.68,3.47,9.0,154,5000,19,26,16500.0
3,2,99.8,176.6,66.2,54.3,2337,109,3.19,3.4,10.0,102,5500,24,30,13950.0
4,2,99.4,176.6,66.4,54.3,2824,136,3.19,3.4,8.0,115,5500,18,22,17450.0


# Converting Data To numpy

In [37]:
Data = df.to_numpy()
print(Data.shape)

In [71]:
X = Data[:, :-1]
y = Data[:, -1].reshape(-1, 1)

print('X.shape = {}, y.shape = {}'.format(X.shape, y.shape))

X.shape = (205, 14), y.shape = (205, 1)


# Linear Regression Model

<img src="https://latex.codecogs.com/svg.latex?A_{(m*k)}\cdot B_{(k*p)}=C_{(m*p)}"/>

In [72]:
class Linear_Regression:
    def __init__(self, features):
        self.w = np.zeros((1, features + 1))
        
    def __call__(self, X):
        #      x.shape = (N, f)
        #      w.shape = (1, f)
        # output.shape = (N, 1)
        # (N, f) * (f, 1) => (N, 1) *faster
        # ((1, f) * (f, N)).T => (N, 1)
        
        inner_X = np.hstack((X, np.ones((X.shape[0], 1))))
        return np.dot(inner_X, self.w.T)
        
    def get_MSE_error(self, X, y):
        h = self(X)
        return np.mean((y - h) ** 2) / 2
    
    def get_MSE_derivatives(self, X, y):
        inner_X = np.hstack((X, np.ones((X.shape[0], 1))))
        h = np.dot(inner_X, self.w.T)
        # (y - h).shape = (N, 1)
        #       X.shape = (N, f)
        #  output.shape = (1, f)
        # (1, N) * (N, f) => (1, f) *faster
        # ((f, N) * (N, 1)).T => (1, f)
        return -np.dot((y - h).T, inner_X) / y.shape[0]
    
    def fit(self, X, y, alpha, itterations):
        for i in range(itterations):
            der = self.get_MSE_derivatives(X, y)
            self.w = self.w - alpha * der

In [73]:
model = Linear_Regression(14)
print('start error :', model.get_MSE_error(X, y))
model.fit(X, y, 0.00000001, 100000)
print('final error :', model.get_MSE_error(X, y))

start error : 119890739.74557047
final error : 7058695.816436184


# Standardization

In [74]:
X_normalized = (X - X.mean(axis=0)) / (X.std(axis=0))
y_normalized = (y - y.mean(axis=0)) / (y.std(axis=0))

In [91]:
model = Linear_Regression(14)
print('start error :', model.get_MSE_error(X_normalized, y_normalized))
model.fit(X_normalized, y_normalized, 0.2, 100)
print('final error :', model.get_MSE_error(X_normalized, y_normalized))

start error : 0.49999999999999994
final error : 0.07475166128672582
