# Import libraries

In [37]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [38]:
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, SGDRegressor
from sklearn.metrics import r2_score

In [39]:
import time
import random

# Load dataset

In [40]:
X, y = load_diabetes(return_X_y=True)

# Train test split

In [41]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# apply LinearRegression

In [42]:
reg1 = LinearRegression()
reg1.fit(X_train, y_train)

In [43]:
reg1.intercept_

151.34560453985995

In [44]:
reg1.coef_

array([  37.90402135, -241.96436231,  542.42875852,  347.70384391,
       -931.48884588,  518.06227698,  163.41998299,  275.31790158,
        736.1988589 ,   48.67065743])

In [45]:
r2_score(y_test, reg1.predict(X_test))

0.4526027629719195

# Create a custom Mini Batch Regressor Class

In [46]:
class MiniBatchRegressor:
  def __init__(self, lr=0.01, epochs=100, batch_size=10) -> None:

    self.coef_ = None
    self.intercept_ = None
    self.lr = lr
    self.epochs = epochs
    self.batch_size = batch_size

  def fit(self, X_train, y_train):
    self.intercept_ = 0
    self.coef_ = np.ones(X_train.shape[1])

    for i in range(self.epochs):
      for j in range(int(X_train.shape[0]/self.batch_size)):
         idx = random.sample(range(X_train.shape[0]),self.batch_size)
         y_hat = np.dot(X_train[idx], self.coef_) + self.intercept_
         intercept_der = -2 * np.mean(y_train[idx] - y_hat)
         self.intercept_ = self.intercept_ - (self.lr * intercept_der)

         coef_der = -2 * np.dot((y_train[idx] - y_hat), X_train[idx])
         self.coef_ = self.coef_ - (self.lr * coef_der)

  def predict(self, X_test):
    return np.dot(X_test, self.coef_) + self.intercept_

In [47]:
start = time.time()
reg2 = MiniBatchRegressor(epochs=100, batch_size=int(X_train.shape[0]/10))
reg2.fit(X_train, y_train)
print(time.time() - start)

0.109161376953125


In [48]:
reg2.intercept_

152.51540816818206

In [49]:
reg2.coef_

array([  42.83463143, -176.68536843,  467.62172556,  306.75675386,
        -43.20391515,  -91.12293545, -206.97952631,  149.78060569,
        345.23572043,  130.41823323])

In [50]:
r2_score(y_test, reg2.predict(X_test))

0.4620428470893253

# Compare both results

In [51]:
reg2.intercept_ - reg1.intercept_

1.1698036283221143

In [52]:
reg2.coef_ - reg1.coef_

array([   4.93061008,   65.27899388,  -74.80703295,  -40.94709005,
        888.28493074, -609.18521244, -370.3995093 , -125.53729588,
       -390.96313847,   81.74757579])

# Inbuild SGDRegressor

In [57]:
reg3 = SGDRegressor(learning_rate='constant',eta0=0.1)

In [58]:
batch_size = 35
for i in range(100):
    idx = random.sample(range(X_train.shape[0]),batch_size)
    reg3.partial_fit(X_train[idx],y_train[idx])

In [59]:
reg3.intercept_

array([155.35852072])

In [60]:
reg3.coef_

array([  58.66655969, -103.33288922,  370.31340533,  249.84429695,
         -9.72638321,  -45.87669729, -182.23478208,  136.29265654,
        278.81272236,  133.17780537])

In [61]:
r2_score(y_test, reg3.predict(X_test))

0.44610085323702475

# Code troubleshoot

In [53]:
batch_size = 10
int(X_train.shape[0]/batch_size)

35

In [54]:
range(X_train.shape[0])

range(0, 353)

In [55]:
random.sample(range(X_train.shape[0]), batch_size)

[270, 27, 122, 29, 189, 22, 49, 24, 81, 240]

In [56]:
int(X_train.shape[0]/20)

17