In [115]:
import pandas as pd
import numpy as np

In [116]:
from sklearn.datasets import load_diabetes
X,y = load_diabetes(return_X_y=True)
X.shape, y.shape

((442, 10), (442,))

In [117]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2, random_state=2)
X_train.shape[1]

10

# Linear Reg model

In [118]:
from sklearn.linear_model import LinearRegression
reg = LinearRegression()
reg.fit(X_train,y_train)
y_pred = reg.predict(X_test)

In [119]:
from sklearn.metrics import r2_score
r2_score(y_test,y_pred) *100

43.99338661568968

In [120]:
print(reg.intercept_)
print(reg.coef_)

151.88331005254167
[  -9.15865318 -205.45432163  516.69374454  340.61999905 -895.5520019
  561.22067904  153.89310954  126.73139688  861.12700152   52.42112238]


# Class

In [121]:
import random
random.sample(range(1,100),10)

[58, 97, 70, 1, 53, 55, 60, 23, 89, 61]

In [122]:
class MBGDRegressor:
  def __init__(self, batch_size,learning_rate=0.01, epochs=100):
    self.coef_ =None
    self.intercept_ = None
    self.lr = learning_rate
    self.epochs = epochs
    self.batch_size = batch_size

  def fit(self,X_train,y_train):
    self.coef_ = np.ones(X_train.shape[1])
    self.intercept_ = 0

    for i in range(self.epochs):
      for j in range(int(X_train.shape[0]/self.batch_size)):
        idx = random.sample(range(0,X_train.shape[0]), self.batch_size)

        y_hat = self.intercept_ + np.dot(X_train[idx], self.coef_)
        intercept_der = -2 * np.mean(y_train[idx] - y_hat)
        self.intercept_ = self.intercept_ -(self.lr * intercept_der)

        y_hat = self.intercept_ + np.dot(X_train[idx], self.coef_)
        coef_der = -2 * np.dot((y_train[idx] - y_hat), X_train[idx] )
        self.coef_ = self.coef_ - (self.lr * coef_der)

    print(self.intercept_, self.coef_)
  def predict(self,X_test):
    return self.intercept_ + np.dot(X_test,self.coef_)

In [123]:
mbgd = MBGDRegressor(batch_size=int(X_train.shape[0]/10) , learning_rate=0.01, epochs=50)
mbgd.fit(X_train,y_train)
y_pred = mbgd.predict(X_test)
r2_score(y_test,y_pred) * 100

152.85080043304566 [  53.57126908  -58.94279718  343.71713983  253.6181038    21.91943316
  -27.5601191  -165.07028195  121.22732638  319.73316047  138.27465743]


42.997138705495

# SGDRegressor class

In [124]:
from sklearn.linear_model import SGDRegressor

sgd = SGDRegressor(learning_rate='constant',eta0=0.1)
batch_size = 35

for i in range(100):
    idx = random.sample(range(X_train.shape[0]),batch_size)
    sgd.partial_fit(X_train[idx],y_train[idx])

In [125]:
y_pred = sgd.predict(X_test)
r2_score(y_test,y_pred)


0.4071381877182161

In [126]:
print(sgd.intercept_)
print(sgd.coef_)


[164.7109468]
[  69.55576538  -68.55249052  351.68459016  243.70622785    7.33590113
  -48.08596179 -164.86811807  117.30962084  326.91362192  127.15105178]
