In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_diabetes

In [3]:
X, y = load_diabetes(return_X_y=True)

In [4]:
X.shape

(442, 10)

In [5]:
y.shape

(442,)

In [6]:
from sklearn.model_selection import train_test_split

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=5)

In [9]:
from sklearn.linear_model import LinearRegression
lr = LinearRegression()

In [10]:
lr.fit(X_train, y_train)

In [11]:
y_pred = lr.predict(X_test)

In [12]:
from sklearn.metrics import r2_score

In [13]:
print(r2_score(y_test, y_pred))

0.5271558947230806


In [14]:
lr.coef_

array([   2.72308829, -255.94291747,  522.84096403,  353.09406901,
       -827.60149738,  543.34104068,  115.94257227,  214.6877495 ,
        694.94897032,   32.73339672])

In [15]:
lr.intercept_

np.float64(152.22190213007212)

In [29]:
X_train.shape[0]

353

In [30]:
X_train.shape

(353, 10)

In [31]:
X_train.shape[1]

10

In [121]:
idx = random.sample(range(X_train.shape[0]),10)
idx

[172, 40, 100, 144, 219, 105, 211, 317, 153, 139]

# MINI_BATCH GD code from scratch

In [42]:
import random

class MBGDRegressor:

    def __init__(self,batch_size,learning_rate=0.01,epochs=100):

        self.coef_ = None
        self.intercept_ = None
        self.lr = learning_rate
        self.epochs = epochs
        self.batch_size = batch_size

    def fit(self,X_train,y_train):
        # init your coefs
        self.intercept_ = 0
        self.coef_ = np.ones(X_train.shape[1])

        for i in range(self.epochs):

            for j in range(int(X_train.shape[0]/self.batch_size)):

                idx = random.sample(range(X_train.shape[0]),self.batch_size)

                y_hat = np.dot(X_train[idx],self.coef_) + self.intercept_
                #print("Shape of y_hat",y_hat.shape)
                intercept_der = -2 * np.mean(y_train[idx] - y_hat)
                self.intercept_ = self.intercept_ - (self.lr * intercept_der)

                coef_der = -2 * np.dot((y_train[idx] - y_hat),X_train[idx])
                self.coef_ = self.coef_ - (self.lr * coef_der)

        print(self.intercept_, "\n", self.coef_)

    def predict(self,X_test):
        return np.dot(X_test,self.coef_) + self.intercept_

In [91]:
mbgdr = MBGDRegressor(batch_size = 10, epochs=1000, learning_rate=0.05)

In [92]:
mbgdr.fit(X_train, y_train)

151.76179021412213 
 [  11.57396143 -251.71106026  532.91259164  339.11381909 -453.09049646
  232.63376736  -70.83340423  148.51944943  542.89503222   51.81073844]


In [93]:
y_pred1 = mbgdr.predict(X_test)

In [94]:
r2_score(y_test, y_pred1)

0.5296657911620593

# Applying MINI_BATCH GD using sklearn

In [122]:
from sklearn.linear_model import SGDRegressor

In [151]:
reg = SGDRegressor(learning_rate='constant', eta0=0.05)

In [166]:
batch_size = 20

for i in range (1000):

  idx = random.sample(range(X_train.shape[0]),batch_size)
  reg.partial_fit(X_train[idx], y_train[idx])

In [167]:
reg.coef_

array([  24.10409684, -221.13236592,  519.5012033 ,  345.05916952,
        -75.94434671,  -53.19512207, -207.42409619,  126.24927535,
        389.0453157 ,   44.5191699 ])

In [168]:
reg.intercept_

array([148.57204688])

In [169]:
y_pred2 = reg.predict(X_test)

In [170]:
r2_score(y_test, y_pred2)

0.5207913456385873