In [73]:
from sklearn.datasets import load_diabetes
import random
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score

# Build model

In [170]:
class my_MBGDRegressor:

    def __init__(self, batch_size, learning_rate, epochs):
        self.lr = learning_rate
        self.epochs = epochs
        self.batch_size = batch_size
        self.coef_ = None
        self.intercept_ = None

    
    def fit(self, X_train, y_train):
        self.intercept_ = 0
        self.coef_ = np.ones(X_train.shape[1])

        for i in range(self.epochs):
            for j in range(int(X_train.shape[0] / self.batch_size)):
                idx = random.sample(range(X_train.shape[0]), self.batch_size)

                y_hat = self.intercept_ + np.dot(X_train[idx], self.coef_)

                intercept_der = -2 * np.mean(y_train[idx] - y_hat)
                self.intercept_ = self.intercept_ - (self.lr * intercept_der)
                
                coef_der = -2 * np.dot((y_train[idx] - y_hat), X_train[idx])
                self.coef_ = self.coef_ - (self.lr * coef_der)

    
    def predict(self, X_test):
        return (self.intercept_ + np.dot(X_test, self.coef_))


    def my_r2_score(self, y_test, y_pred):
        u = 0
        v = 0
        u += np.sum((y_test - y_pred) ** 2)
        v += np.sum((y_test - y_test.mean()) ** 2)     
        r2 = 1 - (u / v)
        return r2


# Load data

In [35]:
X, y = load_diabetes(return_X_y = True)

In [64]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 11)

# Testing Model

## My model

In [86]:
mbgd = my_MBGDRegressor(int(X_train.shape[0]/10), 0.01, 50)
mbgd.fit(X_train, y_train)

In [87]:
y_pred = mbgd.predict(X_test)

In [88]:
print(mbgd.coef_)
print(mbgd.intercept_)

[  12.85733272 -101.31780578  330.97473383  223.58995744   11.56031343
  -37.11159559 -153.93287496  123.26110525  288.97113394  147.18108541]
153.0606409084559


In [89]:
print("R2 Score : ", mbgd.my_r2_score(y_test, y_pred))

R2 Score :  0.5427464423783381


## Using sklearn (LR)

In [65]:
lr = LinearRegression()
lr.fit(X_train, y_train)

In [66]:
y_pred = lr.predict(X_test)

In [67]:
print("R2 Score : ", r2_score(y_test, y_pred))

R2 Score :  0.5771689611166477


In [70]:
print(lr.coef_)
print(lr.intercept_)

[ -67.16308558 -244.0456999   509.15636468  305.70665091 -449.72246545
  217.82622156  -49.52742566   56.3929511   663.25035342   82.20307496]
152.88437994403927


## Using sklearn (SGD)

In [171]:
from sklearn.linear_model import SGDRegressor

In [165]:
sgd = SGDRegressor(learning_rate = 'constant', eta0 = 0.2)

In [166]:
batch_size = 35

for i in range(100):
    idx = random.sample(range(X_train.shape[0]), batch_size)
    sgd.partial_fit(X_train[idx], y_train[idx])

In [167]:
print(sgd.coef_)
print(sgd.intercept_)

[ -25.15208945 -178.66883659  432.47031905  270.07483429   -1.33863671
  -97.63796269 -155.62907902  109.14172933  380.78236203  147.65791323]
[134.64854188]


In [168]:
y_pred = sgd.predict(X_test)

In [169]:
print("R2 Score : ", r2_score(y_test, y_pred))

R2 Score :  0.5410356863419605
