In [142]:
import numpy as np
from sklearn.model_selection import cross_val_score
from sklearn.datasets import load_diabetes
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score

In [121]:
X,y = load_diabetes(return_X_y=True)
print(X.shape)
print(y.shape)

(442, 10)
(442,)


In [122]:
X

array([[ 0.03807591,  0.05068012,  0.06169621, ..., -0.00259226,
         0.01990749, -0.01764613],
       [-0.00188202, -0.04464164, -0.05147406, ..., -0.03949338,
        -0.06833155, -0.09220405],
       [ 0.08529891,  0.05068012,  0.04445121, ..., -0.00259226,
         0.00286131, -0.02593034],
       ...,
       [ 0.04170844,  0.05068012, -0.01590626, ..., -0.01107952,
        -0.04688253,  0.01549073],
       [-0.04547248, -0.04464164,  0.03906215, ...,  0.02655962,
         0.04452873, -0.02593034],
       [-0.04547248, -0.04464164, -0.0730303 , ..., -0.03949338,
        -0.00422151,  0.00306441]])

In [123]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=2)

In [124]:
# using OLS
reg = LinearRegression()
reg.fit(X_train,y_train)
print("weights - ",reg.coef_)
print("Bias - ",reg.intercept_)

weights -  [  -9.15865318 -205.45432163  516.69374454  340.61999905 -895.5520019
  561.22067904  153.89310954  126.73139688  861.12700152   52.42112238]
Bias -  151.88331005254167


In [125]:
y_pred = reg.predict(X_test)
r2_score(y_test,y_pred)

0.4399338661568969

In [126]:
#stochastic gradient descent from scratch

class SGDRegressor:
    def __init__(self,learning_rate=0.01, epoch=100):
        self.intercept_ = None
        self.coef_ = None
        self.learning_rate = learning_rate
        self.epoch = epoch

    def fit(self,X,y):
        self.intercept_ = 0
        self.coef_ = np.ones((1,X.shape[1]))
        y = y.reshape(-1,1)

        for i in range(self.epoch):
            mean_loss = []
            
            for j in range(X.shape[0]):
                idx = np.random.randint(0,X.shape[0])
                reshaped_x = X[idx].reshape(-1,1)
                y_hat = np.dot(self.coef_, reshaped_x) + self.intercept_
                y_dif = y[idx]-y_hat
            
                loss_intercept_ = -2 * y_dif
                loss_coef_ = -2 * y_dif*reshaped_x

                self.intercept_ = self.intercept_ - (self.learning_rate*loss_intercept_) 
                self.coef_ = self.coef_ - (self.learning_rate*loss_coef_.T)             

                mean_loss.append(y_dif**2)
            # print(np.array(mean_loss).mean())

        print("Weights - ", self.coef_)
        print("Bias - ", self.intercept_)

    def predict(self,x):
        return np.dot(x,self.coef_.T) + self.intercept_

In [139]:
sgd = SGDRegressor(0.01,85)
sgd.fit(X_train,y_train)

Weights -  [[  36.05002521 -121.48886455  433.11091186  293.31207681  -13.55603081
   -79.76543904 -191.24537674  118.75984833  392.77503486  115.8631307 ]]
Bias -  [[150.83001763]]


In [140]:
y_pred = sgd.predict(X_test)

In [141]:
r2_score(y_test,y_pred)

0.45056208631225847