In [26]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [27]:
df_ = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/USA_Housing.csv')
df_.drop('Address',inplace = True, axis=1)
df_.head()

Unnamed: 0,Avg. Area Income,Avg. Area House Age,Avg. Area Number of Rooms,Avg. Area Number of Bedrooms,Area Population,Price
0,79545.458574,5.682861,7.009188,4.09,23086.800503,1059034.0
1,79248.642455,6.0029,6.730821,3.09,40173.072174,1505891.0
2,61287.067179,5.86589,8.512727,5.13,36882.1594,1058988.0
3,63345.240046,7.188236,5.586729,3.26,34310.242831,1260617.0
4,59982.197226,5.040555,7.839388,4.23,26354.109472,630943.5


In [28]:
from sklearn.preprocessing import StandardScaler

ss = StandardScaler()
df = pd.DataFrame(ss.fit_transform(df_.drop('Price',axis=1)), columns = df_.columns[:5])
df['Price'] = df_['Price']
df.head()

Unnamed: 0,Avg. Area Income,Avg. Area House Age,Avg. Area Number of Rooms,Avg. Area Number of Bedrooms,Area Population,Price
0,1.02866,-0.296927,0.021274,0.088062,-1.317599,1059034.0
1,1.000808,0.025902,-0.255506,-0.722301,0.403999,1505891.0
2,-0.684629,-0.112303,1.516243,0.93084,0.07241,1058988.0
3,-0.491499,1.221572,-1.393077,-0.58454,-0.186734,1260617.0
4,-0.807073,-0.944834,0.846742,0.201513,-0.988387,630943.5


In [29]:
X = df[['Avg. Area Income', 'Avg. Area House Age', 'Avg. Area Number of Rooms','Avg. Area Number of Bedrooms', 'Area Population']]
y = df['Price']

## sklearn Linear Regression

In [30]:
from sklearn.model_selection import train_test_split

X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.3, random_state = 42)

from sklearn.linear_model import LinearRegression

model = LinearRegression()
model.fit(X_train,y_train)

print('Coefficients :',model.coef_, '\n' 'Intercept :',model.intercept_)

y_pred = model.predict(X_test)

from sklearn.metrics import mean_squared_error, r2_score

print('MSE :',mean_squared_error(y_test,y_pred))
print('R2 Score :',r2_score(y_test,y_pred))

Coefficients : [230464.52520198 164159.19982575 120514.71328446   2913.62424452
 151019.35865248] 
Intercept : 1231278.6368716825
MSE : 10068422551.400879
R2 Score : 0.9146818498754016


## Batch Gradient Descent

In [31]:
class GDRegressor:

  def __init__(self, learning_rate, epochs):
    self.learning_rate = learning_rate
    self.epochs = epochs
    self.coef_ = None
    self.intercept_ = None
    self.y_pred = None
    self.cost_history = []


  def fit(self,X_train,y_train):

    self.coef_ = np.zeros((X_train.shape[1],))
    self.intercept_ = 0

    for epoch in range(self.epochs):

        y_hat = np.dot(self.coef_ , X_train.T) + self.intercept_

        cost = np.sum((y_train - y_hat)**2) / X_train.shape[0]
        self.cost_history.append(cost)

        dj_dw = np.dot((y_hat - y_train),X_train) / X_train.shape[0]
        dj_db = -2 * np.mean(y_train - y_hat)

        self.coef_ = self.coef_ - self.learning_rate * dj_dw
        self.intercept_ = self.intercept_ - self.learning_rate * dj_db

    return self.coef_ , self.intercept_

  def plot(self):
    x = np.arange(0,len(self.cost_history),1)
    plt.plot(x,self.cost_history)
    plt.xlabel('Epochs')
    plt.ylabel('Cost')

  def predict(self,X_test):
    self.y_pred = np.dot(self.coef_, X_test.T) + self.intercept_
    return self.y_pred

  @staticmethod
  def mean_squared_error(y_test,y_pred):
    return ((y_test - y_pred)**2).mean()

  @staticmethod
  def r2_score(y_test,y_pred):
    y_mean = np.mean(y_test)
    ss_total = np.sum((y_test - y_mean) ** 2)
    ss_residual = np.sum((y_test - y_pred) ** 2)
    r2 = 1 - (ss_residual / ss_total)
    return r2


model = GDRegressor(0.2,100)
model.fit(X_train,y_train)
y_pred = model.predict(X_test)


print('Coefficients :',model.coef_, '\n' 'Intercept :',model.intercept_)
print('MSE :',model.mean_squared_error(y_test,y_pred))
print('R2 Score :',r2_score(y_test,y_pred))

Coefficients : [230464.46372345 164159.16804683 120513.81840009   2914.54445281
 151019.37592615] 
Intercept : 1231278.632218545
MSE : 10068428022.426792
R2 Score : 0.9146818035148314


## Stochastic Gradient Descent



In [32]:
class SGDRegressor:

  def __init__(self, learning_rate, epochs):
    self.learning_rate = learning_rate
    self.epochs = epochs
    self.coef_ = None
    self.intercept_ = None
    self.y_pred = None

  def fit(self, X_train, y_train):
    X_train = np.array(X_train)
    y_train = np.array(y_train)
    self.coef_ = np.zeros((X_train.shape[1],))
    self.intercept_ = 0

    for epoch in range(self.epochs):
      indices = np.arange(X_train.shape[0])
      np.random.shuffle(indices)

      for i in indices:

        index = np.random.randint(0, X_train.shape[0])
        y_hat = np.dot(self.coef_ , X_train[index].T) + self.intercept_

        dj_dw = np.dot((y_hat - y_train[index]), X_train[index]) / X_train.shape[0]
        dj_db = -2 * np.mean(y_train[index] - y_hat)

        self.coef_ = self.coef_ - self.learning_rate * dj_dw
        self.intercept_ = self.intercept_ - self.learning_rate * dj_db

    return self.coef_, self.intercept_

  def predict(self, X_test):
    self.y_pred = np.dot(self.coef_, X_test.T) + self.intercept_
    return self.y_pred

  @staticmethod
  def mean_squared_error(y_test, y_pred):
    return ((y_test - y_pred) ** 2).mean()

  @staticmethod
  def r2_score(y_test, y_pred):
    y_mean = np.mean(y_test)
    ss_total = np.sum((y_test - y_mean) ** 2)
    ss_residual = np.sum((y_test - y_pred) ** 2)
    r2 = 1 - (ss_residual / ss_total)
    return r2


model = SGDRegressor(0.1, 200)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

print('Coefficients:', model.coef_, '\nIntercept:', model.intercept_)
print('MSE:', model.mean_squared_error(y_test, y_pred))
print('R2 Score:', model.r2_score(y_test, y_pred))

Coefficients: [230648.4344975  163953.54480931 120102.80184173   2870.87101457
 150610.95561344] 
Intercept: 1226498.5356411901
MSE: 10118576415.136166
R2 Score: 0.9142568543159035


## Mini Batch Gradient Descent

In [36]:
class MBGDRegressor:

  def __init__(self, learning_rate, epochs, batch_size):
    self.learning_rate = learning_rate
    self.epochs = epochs
    self.batch_size = batch_size
    self.coef_ = None
    self.intercept_ = None
    self.y_pred = None

  def fit(self, X_train, y_train):
    X_train = np.array(X_train)
    y_train = np.array(y_train)
    self.coef_ = np.zeros((X_train.shape[1],))
    self.intercept_ = 0

    for epoch in range(self.epochs):
      for i in range(int(X_train.shape[0] // self.batch_size)):

        index = np.random.randint(0, X_train.shape[0], self.batch_size)
        y_hat = np.dot(self.coef_ , X_train[index].T) + self.intercept_

        dj_dw = np.dot((y_hat - y_train[index]), X_train[index]) / X_train.shape[0]
        dj_db = -2 * np.mean(y_train[index] - y_hat)

        self.coef_ = self.coef_ - self.learning_rate * dj_dw
        self.intercept_ = self.intercept_ - self.learning_rate * dj_db

    return self.coef_, self.intercept_

  def predict(self, X_test):
    self.y_pred = np.dot(self.coef_, X_test.T) + self.intercept_
    return self.y_pred

  @staticmethod
  def mean_squared_error(y_test, y_pred):
    return ((y_test - y_pred) ** 2).mean()

  @staticmethod
  def r2_score(y_test, y_pred):
    y_mean = np.mean(y_test)
    ss_total = np.sum((y_test - y_mean) ** 2)
    ss_residual = np.sum((y_test - y_pred) ** 2)
    r2 = 1 - (ss_residual / ss_total)
    return r2


model = MBGDRegressor(0.1, 1000, 20)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

print('Coefficients:', model.coef_, '\nIntercept:', model.intercept_)
print('MSE:', model.mean_squared_error(y_test, y_pred))
print('R2 Score:', model.r2_score(y_test, y_pred))

Coefficients: [230392.49936259 164279.62766928 121113.06786079   2464.11610486
 150843.90486086] 
Intercept: 1230122.0361085334
MSE: 10072576853.65584
R2 Score: 0.9146466470040837
