In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
df_ = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/USA_Housing.csv')
df_.drop('Address',inplace = True, axis=1)

from sklearn.preprocessing import StandardScaler

ss = StandardScaler()
df = pd.DataFrame(ss.fit_transform(df_), columns = df_.columns[:])
df.head()

Unnamed: 0,Avg. Area Income,Avg. Area House Age,Avg. Area Number of Rooms,Avg. Area Number of Bedrooms,Area Population,Price
0,1.02866,-0.296927,0.021274,0.088062,-1.317599,-0.490081
1,1.000808,0.025902,-0.255506,-0.722301,0.403999,0.775508
2,-0.684629,-0.112303,1.516243,0.93084,0.07241,-0.490211
3,-0.491499,1.221572,-1.393077,-0.58454,-0.186734,0.080843
4,-0.807073,-0.944834,0.846742,0.201513,-0.988387,-1.702518


In [3]:
X = df[['Avg. Area Income', 'Avg. Area House Age', 'Avg. Area Number of Rooms','Avg. Area Number of Bedrooms', 'Area Population']]
y = df['Price']

## sklearn Linear Regression

In [4]:
from sklearn.model_selection import train_test_split

X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.3, random_state = 42)

from sklearn.linear_model import LinearRegression

model = LinearRegression()
model.fit(X_train,y_train)

print('Coefficients :',model.coef_, '\n' 'Intercept :',model.intercept_)

y_pred = model.predict(X_test)

from sklearn.metrics import mean_squared_error, r2_score

print('MSE :',mean_squared_error(y_test,y_pred))
print('R2 Score :',r2_score(y_test,y_pred))

Coefficients : [0.65272181 0.46493181 0.34132186 0.00825197 0.42771714] 
Intercept : -0.0022488163278688474
MSE : 0.0807624541925124
R2 Score : 0.9146818498754016


## Batch Gradient Descent

In [5]:
class GDRegressor:

  def __init__(self, learning_rate, epochs):
    self.learning_rate = learning_rate
    self.epochs = epochs
    self.coef_ = None
    self.intercept_ = None
    self.y_pred = None
    self.cost_history = []


  def fit(self,X_train,y_train):

    self.coef_ = np.zeros((X_train.shape[1],))
    self.intercept_ = 0

    for epoch in range(self.epochs):

        y_hat = np.dot(self.coef_ , X_train.T) + self.intercept_

        cost = np.sum((y_train - y_hat)**2) / X_train.shape[0]
        self.cost_history.append(cost)

        dj_dw = np.dot((y_hat - y_train),X_train) / X_train.shape[0]
        dj_db = -2 * np.mean(y_train - y_hat)

        self.coef_ = self.coef_ - self.learning_rate * dj_dw
        self.intercept_ = self.intercept_ - self.learning_rate * dj_db

    return self.coef_ , self.intercept_

  def plot(self):
    x = np.arange(0,len(self.cost_history),1)
    plt.plot(x,self.cost_history)
    plt.xlabel('Epochs')
    plt.ylabel('Cost')

  def predict(self,X_test):
    self.y_pred = np.dot(self.coef_, X_test.T) + self.intercept_
    return self.y_pred

  @staticmethod
  def mean_squared_error(y_test,y_pred):
    return ((y_test - y_pred)**2).mean()

  @staticmethod
  def r2_score(y_test,y_pred):
    y_mean = np.mean(y_test)
    ss_total = np.sum((y_test - y_mean) ** 2)
    ss_residual = np.sum((y_test - y_pred) ** 2)
    r2 = 1 - (ss_residual / ss_total)
    return r2


model = GDRegressor(0.2,100)
model.fit(X_train,y_train)
y_pred = model.predict(X_test)


print('Coefficients :',model.coef_, '\n' 'Intercept :',model.intercept_)
print('MSE :',model.mean_squared_error(y_test,y_pred))
print('R2 Score :',r2_score(y_test,y_pred))

Coefficients : [0.65272164 0.46493172 0.34131939 0.00825452 0.42771719] 
Intercept : -0.002248829203674023
MSE : 0.08076249706969538
R2 Score : 0.9146818045795774


## Stochastic Gradient Descent



In [6]:
class SGDRegressor:

  def __init__(self, learning_rate, epochs):
    self.learning_rate = learning_rate
    self.epochs = epochs
    self.coef_ = None
    self.intercept_ = None
    self.y_pred = None

  def fit(self, X_train, y_train):
    X_train = np.array(X_train)
    y_train = np.array(y_train)
    self.coef_ = np.zeros((X_train.shape[1],))
    self.intercept_ = 0

    for epoch in range(self.epochs):
      indices = np.arange(X_train.shape[0])
      np.random.shuffle(indices)

      for i in indices:

        index = np.random.randint(0, X_train.shape[0])
        y_hat = np.dot(self.coef_ , X_train[index].T) + self.intercept_

        dj_dw = np.dot((y_hat - y_train[index]), X_train[index]) / X_train.shape[0]
        dj_db = -2 * np.mean(y_train[index] - y_hat)

        self.coef_ = self.coef_ - self.learning_rate * dj_dw
        self.intercept_ = self.intercept_ - self.learning_rate * dj_db

    return self.coef_, self.intercept_

  def predict(self, X_test):
    self.y_pred = np.dot(self.coef_, X_test.T) + self.intercept_
    return self.y_pred

  @staticmethod
  def mean_squared_error(y_test, y_pred):
    return ((y_test - y_pred) ** 2).mean()

  @staticmethod
  def r2_score(y_test, y_pred):
    y_mean = np.mean(y_test)
    ss_total = np.sum((y_test - y_mean) ** 2)
    ss_residual = np.sum((y_test - y_pred) ** 2)
    r2 = 1 - (ss_residual / ss_total)
    return r2


model = SGDRegressor(0.1, 100)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

print('Coefficients:', model.coef_, '\nIntercept:', model.intercept_)
print('MSE:', model.mean_squared_error(y_test, y_pred))
print('R2 Score:', model.r2_score(y_test, y_pred))

Coefficients: [0.65252664 0.46614536 0.3396318  0.01174385 0.42746511] 
Intercept: 0.20177707307810516
MSE: 0.11937898235248444
R2 Score: 0.873887017923047


## Mini Batch Gradient Descent

In [7]:
class MBGDRegressor:

  def __init__(self, learning_rate, epochs, batch_size):
    self.learning_rate = learning_rate
    self.epochs = epochs
    self.batch_size = batch_size
    self.coef_ = None
    self.intercept_ = None
    self.y_pred = None

  def fit(self, X_train, y_train):
    X_train = np.array(X_train)
    y_train = np.array(y_train)
    self.coef_ = np.zeros((X_train.shape[1],))
    self.intercept_ = 0

    for epoch in range(self.epochs):
      for i in range(int(X_train.shape[0] // self.batch_size)):

        index = np.random.randint(0, X_train.shape[0], self.batch_size)
        y_hat = np.dot(self.coef_ , X_train[index].T) + self.intercept_

        dj_dw = np.dot((y_hat - y_train[index]), X_train[index]) / X_train.shape[0]
        dj_db = -2 * np.mean(y_train[index] - y_hat)

        self.coef_ = self.coef_ - self.learning_rate * dj_dw
        self.intercept_ = self.intercept_ - self.learning_rate * dj_db

    return self.coef_, self.intercept_

  def predict(self, X_test):
    self.y_pred = np.dot(self.coef_, X_test.T) + self.intercept_
    return self.y_pred

  @staticmethod
  def mean_squared_error(y_test, y_pred):
    return ((y_test - y_pred) ** 2).mean()

  @staticmethod
  def r2_score(y_test, y_pred):
    y_mean = np.mean(y_test)
    ss_total = np.sum((y_test - y_mean) ** 2)
    ss_residual = np.sum((y_test - y_pred) ** 2)
    r2 = 1 - (ss_residual / ss_total)
    return r2


model = MBGDRegressor(0.44, 100, 20)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

print('Coefficients:', model.coef_, '\nIntercept:', model.intercept_)
print('MSE:', model.mean_squared_error(y_test, y_pred))
print('R2 Score:', model.r2_score(y_test, y_pred))

Coefficients: [0.65562061 0.46607228 0.34050668 0.00632768 0.42678719] 
Intercept: -0.061108695290278
MSE: 0.08514223658574188
R2 Score: 0.9100550101455451
