A gradient descent function is created in Ordinary least square class, this function helps to find the gradient descent by computing the gradient.

In [1]:
# Importing all the important modules
import numpy as np
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import normalize
from sklearn.preprocessing import scale
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
import matplotlib.pyplot as plt
import random
import pandas 
import time


In [None]:
#class declaration 
class OrdinaryLeastSquare:

  #declaring gama which will be used to calculate SGD with momentum
  #By Default set to zero to perform SGD without Momentum
  def __init__(self, gama=0):
    self.gama = gama

  

  #Declaring the fit class to fit the data in the model and train it
  #It has several varaiables, X the training data, Y the labels , alpha the learning rate , h used to computer gradient by approximation,
  #Tolerance is the value till which the gradient will go close , maxIterations is the max value till which gradientdescend try to find X value
  def fit(self, X, y, x0, alpha, h, tolerance, maxIterations):
        self.n = X.shape[0]
        self.d = X.shape[1]
        self.h = h
        self.alpha = alpha
        self.x0 = x0
        #Adding ones column in the data as in Y = mx + c , there is one more feature on right side compared to left
        self.data = np.hstack((np.ones([self.n, 1]), X))
        
       
        self.outputs = y
        
       
        X = self.data
        #Using the Loss function
        #Trying to find the derivative
        L = lambda beta: ((X @ beta).T - y.T) @ (X @ beta - y) 
        self.beta = self.gradientDescendent(L, self.x0,   self.h, self.alpha,  self.gama, tolerance, maxIterations)


  #Declaring the predict class
  def predict(self, X):

        yPredicted = np.empty([X.shape[0],1])
        
        ##Adding ones column in the data as in Y = mx + c , there is one more feature on right side compared to left
        X = np.hstack((np.ones([X.shape[0],1]), X))
        
        # Applying function f to each datapoint
        for row in range(X.shape[0]):
            yPredicted[row] = self.beta @ X[row,]
            
        return yPredicted

  #Declaring the function to calculate the gradient descendt
  #All the values mean same as above
  def gradientDescendent(self, f, X0, h,  alpha, gama,  tolerance, maxIterations):
  
    x = X0
    n = len(x)
    vt = np.zeros(n)


    for counter in range(maxIterations):

              gradient = self.computeGradient(f, x, h)
              #print(counter)
            
              if np.linalg.norm(gradient) < tolerance:
                  print('Gradient descent took', counter, 'iterations to converge')
                  print('The norm of the gradient is', np.linalg.norm(gradient))
                
                  return x

            
              elif counter == maxIterations-1:
                  print("Gradient descent failed")
                  print('The gradient is', gradient)
             
                  return x
            
              vt = alpha*gradient + vt*gama
              x -= vt

  #Calculating the gradient by approximation 
  def computeGradient(self,f,x,h):
        n = len(x)
        gradient = np.zeros(n)

        for counter in range(n):
            xUp = x.copy()
            xUp[counter] += h
            gradient[counter] = (f(xUp) - f(x))/h

        return gradient



In [None]:
#Preprocess the data before training or testing

#importing module to deal with CSV
import pandas as pd


#Reading the csv file from drive
data = pd.read_csv('/content/drive/My Drive/Mount_Pleasant_Real_Estate_Data.csv', sep=',')

#Remove all the data with Null/NAN values
data = data.dropna(axis=0)


#Removing the unecesasry columns
drop_features = ["ID","Baths - Full", "Baths - Half", "Misc Exterior",
                 "Amenities", "Fireplace?", "Subdivision", "House Style"]
data.drop(drop_features, axis=1, inplace=True)

#data.head()

#Replacing the comma and $ with ''
data['List Price'] = data['List Price'].str.replace(',', '').str.replace('$', '').astype(float)


#Mapping the yes/no to binary 1/0
features = ['Duplex?', 'New Owned?', 'Has Pool?', 'Has Dock?', 'Fenced Yard', 'Screened Porch?', 'Golf Course?']
for feature in features:
  data[feature] = data[feature].map(dict(Yes=1, No=0))


#taking the mean of the data to normalize
data_mean = data.mean()

data_max = data.max()
data_min = data.min()


data = (data-data_mean)/(data_max-data_min)


data = data.to_numpy()


#taking trainX and trainY to train the model
trainX = np.array(data[:,1:], dtype=float)
trainy = np.array(data[:,0], dtype=float)

#Decalring an initial value a
initial_value = (np.random.uniform(size=trainX.shape[1]+1))


In [None]:
#SGD without Momentum , as the gama value is set to zero
model = OrdinaryLeastSquare(gama = 0)

#Starting the time counter 
time_start = time.time()
#training the model
model.fit(trainX, trainy, initial_value,  h = 0.001, alpha = 0.001,  tolerance = 0.0001, maxIterations = 100000)

#Ending the time counter
time_end = time.time()
print('Time : ', (time_end-time_start), 'seconds')
predictions = model.predict(trainX)

parameters = model.beta

#Calulating the different values for analysis
print('The r**2 score is', r2_score(trainy, predictions))
print('The mean squared error is', mean_squared_error(trainy, predictions))
print('The mean absolute error is', mean_absolute_error(trainy, predictions),'\n')

Gradient descent took 5743 iterations to converge
The norm of the gradient is 9.990350506882552e-05
Time :  2.0678915977478027 seconds
The r**2 score is 0.9062664928934926
The mean squared error is 0.0025743828942834685
The mean absolute error is 0.03573018436428183 



In [None]:
#SGD with momentum , as the gama value is not set zero it is set to 0.9 
model = OrdinaryLeastSquare( gama=0.9)
initial_value = (np.random.uniform(size=trainX.shape[1]+1))


time_start = time.time()

model.fit(trainX, trainy, initial_value, alpha = 0.001, h = 0.001, tolerance = 0.0001, maxIterations = 100000)

time_end = time.time()
print('Time : ', (time_end-time_start), 'seconds')
predictions = model.predict(trainX)

parameters = model.beta

#Calulating the values for analysis
print('The r**2 score is', r2_score(trainy, predictions))
print('The mean squared error is', mean_squared_error(trainy, predictions))
print('The mean absolute error is', mean_absolute_error(trainy, predictions),'\n')

Gradient descent took 467 iterations to converge
The norm of the gradient is 9.948595303788544e-05
Time :  0.30265259742736816 seconds
The r**2 score is 0.9062664927453994
The mean squared error is 0.0025743828983508356
The mean absolute error is 0.035730184883892624 

