In [1]:
from sklearn.datasets import load_diabetes
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split

In [2]:
X,y=load_diabetes(return_X_y=True)

In [3]:
X.shape

(442, 10)

In [4]:
y.shape

(442,)

In [5]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=2)

# Stocastic Gradient Descent is fast and take update at each row. It is useful for big dataset. Less epoch is required.
## Code from scratch

In [6]:
class my_Stocastic_Gradient_Descent:

  def __init__(self,learning_rate=.01,epochs=100):
    self.learning_rate=learning_rate
    self.epochs=epochs
    self.intercept_=None
    self.coef_=None

  def fit(self,X_train,y_train):

    self.coef_=np.ones(X_train.shape[1])                                     # Taking all the betas as 1
    self.intercept_=0                                                        # Taking beta0 as 0

    for i in range(self.epochs):

      for j in range(X_train.shape[0]):

        random_index=np.random.randint(0,X_train.shape[0])

        y_hat= np.dot(X_train[random_index],self.coef_) + self.intercept_      # This is a single value.

        intercept_derivative=-2 * (y_train[random_index] - y_hat)
        self.intercept_=self.intercept_ - (self.learning_rate * intercept_derivative)

        coef_derivative=-2 * np.dot((y_train[random_index]-y_hat),X_train[random_index])
        self.coef_ = self.coef_ - (self.learning_rate * coef_derivative)

    print(self.intercept_,self.coef_)

  def predict(self,X_test):
        return np.dot(X_test,self.coef_) + self.intercept_

In [11]:
reg=my_Stocastic_Gradient_Descent(learning_rate=.01,epochs=50)

In [12]:
reg.fit(X_train,y_train)

150.30412793469233 [  47.01010962  -69.54045326  351.80527139  244.33675118   16.17840038
  -30.79947795 -173.92791291  127.88723037  323.82591341  124.28611384]


In [13]:
y_pred=reg.predict(X_test)
y_pred

array([151.2280837 , 189.75196688, 137.70349769, 107.73827114,
       242.32185362, 234.20637499, 106.41462232, 113.93825472,
        91.79009849, 183.42519207, 157.98970803, 170.35028021,
       181.88545784, 148.62541406, 252.03650385,  91.0634396 ,
       184.68404021, 138.38887913, 138.48117665, 136.86562454,
       131.66141365, 187.75220309, 164.38998191, 175.59985428,
       124.89943847, 217.75374298, 192.28166949, 118.80255997,
        62.88897316, 238.27207908, 234.35496147, 117.87546808,
        74.96834943, 108.97887962, 197.77762206, 164.75433095,
       164.53432456, 193.64658366, 113.92287171, 229.59019449,
       135.04145906, 125.31339832, 184.36906124, 184.44812118,
       168.35771216, 145.43232423, 170.77693521, 275.36068718,
       116.03814842, 186.04300255, 233.69867084, 129.73817727,
       142.63157953, 147.35257652, 186.9616266 , 111.82924324,
       152.79982919,  85.16705669, 156.26779973, 143.10090285,
       161.68110466, 168.78969337, 109.14256654, 203.34

In [14]:
r2_score(y_test,y_pred)

0.43466870975983596

# Comparing with Scikit learn's sgdregressor.

In [15]:
from sklearn.linear_model import SGDRegressor

In [21]:
sg=SGDRegressor(max_iter=100,learning_rate='constant',eta0=0.01)

In [22]:
sg.fit(X_train,y_train)



In [23]:
y_pred=sg.predict(X_test)

In [24]:
r2_score(y_test,y_pred)

0.4312613099460495