In [1]:
import warnings
warnings.filterwarnings("ignore")
from sklearn.datasets import load_boston
from random import seed
from random import randrange
from csv import reader
from math import sqrt
from sklearn import preprocessing
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import SGDRegressor
from sklearn import preprocessing
from sklearn.metrics import mean_squared_error
import sys
import random
from prettytable import PrettyTable

In [2]:
X = load_boston().data
Y = load_boston().target

In [3]:
scaler = preprocessing.StandardScaler().fit(X)
X = scaler.transform(X)

In [4]:
clf = SGDRegressor()
clf.fit(X, Y)
skmse = mean_squared_error(Y, clf.predict(X))
print(skmse)

22.685385712722375


In [5]:
y_pred_sk = clf.predict(X).tolist()

In [6]:
a = clf.coef_.tolist()

In [7]:
prettytable_data = []
prettytable_data.append(a)

In [8]:
class CustomSGDRegressor:
    
    def __init__(self, learning_rate = 0.001, iterations = 100000):
        self.max_iters = iterations
        self.weights = None
        self.alpha = learning_rate
        
    def fit(self, X, Y):
        # appending the features with ones of shape [n,1] to include the W0
        X = np.append(X, np.ones((X.shape[0], 1)), axis=1)
        self.weights = np.ones((1, X.shape[1]))
        
        mse_or = mean_squared_error(Y, self._predict(X))
        
        for iteration in range(self.max_iters):
            self.weights = self.weights - self.alpha * self._gradient(X[iteration%X.shape[0]], Y[iteration%X.shape[0]])
            mse = mean_squared_error(Y, self._predict(X))
            # printing at every 100th interation
            if iteration % 2000 == 0:
                print("iteration : {0}, MSE : {1}".format(iteration, mse))
                
            if mse_or - mse < 0.01 :
                print('Converged !! \niteration : {0}, MSE : {1}\n'.format(iteration, mse))
                mse_or = mse
                break
            elif mse_or - mse < 0: # reducing the alpha value by 10 when the model overshoots the minima
                self.alpha = self.alpha/10
            
    def _gradient(self, x, y):
        return -2 * x * (y - np.dot(self.weights, x))
        
    def _predict(self, X):
        return np.array([np.dot(self.weights, X[i]) for i in range(X.shape[0])])
    
    def predict(self, X):
        X = np.append(X, np.ones((X.shape[0], 1)), axis=1)
        return np.array([np.dot(self.weights, X[i]) for i in range(X.shape[0])])

In [9]:
clf = CustomSGDRegressor()

In [10]:
clf.fit(X, Y)

iteration : 0, MSE : 595.4584748485299
iteration : 2000, MSE : 28.212488120504464
iteration : 4000, MSE : 23.243975664228152
iteration : 6000, MSE : 22.732775743702348
iteration : 8000, MSE : 22.66915618056451
iteration : 10000, MSE : 23.44950674101431
iteration : 12000, MSE : 22.484530834031222
iteration : 14000, MSE : 22.431096281526347
iteration : 16000, MSE : 22.430580790338944
iteration : 18000, MSE : 22.96131515494494
iteration : 20000, MSE : 23.465865506555872
iteration : 22000, MSE : 22.69886027787685
iteration : 24000, MSE : 22.758531667619458
iteration : 26000, MSE : 22.232552909753498
iteration : 28000, MSE : 22.33665359185766
iteration : 30000, MSE : 22.549217900147823
iteration : 32000, MSE : 22.365185497138025
iteration : 34000, MSE : 22.342564676553284
iteration : 36000, MSE : 22.32850424471496
iteration : 38000, MSE : 22.35233318555242
iteration : 40000, MSE : 22.40794453544401
iteration : 42000, MSE : 22.33837588587905
iteration : 44000, MSE : 22.368043707298597
iterat

In [11]:
y_pred = clf.predict(X).tolist() # storing the predicted values by my custom SGD classifier

### Comparing coefficients of CustomSGD vs Scikit Learn SGD

In [12]:
tmp = clf.weights.reshape((14,)) # reshaping weights as per clf.coef_ of scikit-learn's implementation

In [13]:
b = [tmp[i] for i in range(tmp.shape[0]-1)] # converting to list

In [14]:
coefficients = pd.DataFrame({'scikit SGD coef':a, 'custom SGD coef':b})

In [15]:
coefficients

Unnamed: 0,scikit SGD coef,custom SGD coef
0,-0.729991,-1.127545
1,0.628754,0.805467
2,-0.490527,0.137264
3,0.819534,0.522197
4,-0.978776,-1.956269
5,3.197023,3.071332
6,-0.237259,0.153272
7,-2.350684,-2.997373
8,0.944963,2.339893
9,-0.526823,-1.830586


### Comparing MSE of Scikit learn vs Custom implementations of SGD

In [16]:
print("Custom MSE : {0}".format(mean_squared_error(Y, clf.predict(X))))
print("Scikit Learn MSE : {0}".format(skmse))

Custom MSE : 22.371672778607884
Scikit Learn MSE : 22.685385712722375


### Comparing predictions Scikit Learn vs Custom implementations of SGD

In [17]:
predicted = pd.DataFrame({'scikit predictions':y_pred_sk, 'custom predictions':y_pred})

In [18]:
predicted

Unnamed: 0,scikit predictions,custom predictions
0,30.689994,[30.086010625671886]
1,24.731081,[25.16947638451916]
2,30.826738,[31.23820275712525]
3,29.269687,[29.17357858650119]
4,28.724617,[28.549671987583494]
5,25.456474,[25.483391927904016]
6,22.845453,[22.501519161649387]
7,19.603330,[19.049532370580526]
8,11.571377,[10.377271682918527]
9,19.112299,[18.394247531689658]
