In [280]:
import pandas as pd 
import numpy as np 

In [281]:
from sklearn.datasets import load_diabetes

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

In [282]:
diabetes = load_diabetes()

In [283]:
X = diabetes.data
y = diabetes.target

In [284]:
X.shape, y.shape

((442, 10), (442,))

In [285]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)

#### Use the built-in Linear Regression for calculating coef_ and intercept_.

In [286]:
lr = LinearRegression()

In [287]:
lr.fit(X_train, y_train)

In [288]:
lr.intercept_, lr.coef_

(np.float64(151.34560453985995),
 array([  37.90402135, -241.96436231,  542.42875852,  347.70384391,
        -931.48884588,  518.06227698,  163.41998299,  275.31790158,
         736.1988589 ,   48.67065743]))

In [289]:
r2_score(y_test, lr.predict(X_test))

0.4526027629719197

#### Now we create the own SGD regressor class for calculating the coef_ and intercept_.

In [290]:
class MySGDRegressor: 

    def __init__(self, learning_rate = 0.01, epochs = 50): 
        self.coef_ = None 
        self.intercept_ = None 
        self.learning_rate = learning_rate
        self.epochs = epochs


    def fit(self, X_train, y_train): 

        # random initialization. 
        self.intercept_ = 0 
        self.coef_ = np.ones(X_train.shape[1])
        
        # run a loop for each opoch. 
        for epoch in range(self.epochs): 
            # iterate the random samples. 
            for sample in range(X_train.shape[0]):
                # calculate the index of that random sample.
                index = np.random.randint(0, X_train.shape[0])

                # update the intercept_. 
                y_hat = np.dot(X_train[index], self.coef_) + self.intercept_
                intercept_der = -2 * (y_train[index] - y_hat)
                self.intercept_ = self.intercept_ - (self.learning_rate * intercept_der)

                # update the coef_
                coef_der = -2 * np.dot((y_train[index] - y_hat), X_train[index])
                self.coef_ = self.coef_ - (self.learning_rate * coef_der)

        print(self.intercept_, self.coef_)


    def predict(self, X_test): 
        return np.dot(X_test, self.coef_) + self.intercept_

In [291]:
mysgd = MySGDRegressor(0.2, 20)

In [292]:
mysgd.fit(X_train, y_train)

100.64594501770915 [  64.86087801 -204.49260341  563.79379825  280.60908062 -101.22523762
 -143.38857917 -206.3604555   158.15494391  421.81493204   44.65345827]


In [293]:
y_pred = mysgd.predict(X_test)

In [294]:
r2_score(y_test, y_pred)

-0.10750548919567615

### Use the built-in class SGDRegressor that use the Gradient Descent to calculate coef_ and intercept_. 

In [324]:
from sklearn.linear_model import SGDRegressor

In [329]:
sgdlr = SGDRegressor(max_iter = 1000, tol = 0.001, learning_rate = 'constant', random_state = 42) 

In [330]:
sgdlr.fit(X_train, y_train)

In [331]:
sgdlr.intercept_, sgdlr.coef_

(array([150.18905285]),
 array([  58.28970954,  -79.58016775,  340.01094888,  236.90759423,
           7.25371506,  -26.80872891, -174.99835524,  146.97405679,
         271.32101614,  146.07321141]))

In [332]:
r2_score(y_test, sgdlr.predict(X_test))

0.4408180251735596