<a href="https://colab.research.google.com/github/mounikarevanuru/mlfoundations/blob/main/algorithms/linear_regression/linear_regression_mini_batch_gradient_descent_from_scratch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [27]:
import numpy as np
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
import random

In [28]:
class LinearRegressionMiniBatchGD:

  def __init__(self, lr = 0.01, epochs = 1000, batch_size = 50):
    self.lr = lr
    self.epochs = epochs
    self.coef_ = None
    self.intercept_ = None
    self.batch_size = batch_size

  def _rmse_loss(self, X, y):
    y_pred = self.predict(X)
    return np.sqrt(np.mean((y - y_pred) ** 2))


  def fit(self, X, y):

    self.coef_ = np.ones(X.shape[1])
    self.intercept_ = 0

    total_batches = int(X.shape[0]/self.batch_size)

    for i in range(self.epochs):
      for j in range(total_batches):

        idx = random.sample(range(X.shape[0]), self.batch_size)

        y_pred = X[idx].dot(self.coef_) + self.intercept_
        intercept_der = -2 * np.mean(y[idx] - y_pred)
        coef_der = -2/X[idx].shape[0] * X[idx].T.dot((y[idx] - y_pred))

        self.intercept_ = self.intercept_ - (self.lr * intercept_der)
        self.coef_ = self.coef_ - (self.lr *  coef_der)

    self.loss_ = self._rmse_loss(X, y)

  def predict(self, X):

    return X.dot(self.coef_) + self.intercept_

  def r2_score(self, X, y):

    y_pred = self.predict(X)
    ss_res = np.sum((y - y_pred) ** 2)
    ss_total = np.sum((y - np.mean(y)) ** 2)

    return 1 - ss_res/ss_total


In [29]:
diabetes = load_diabetes()
#diabetes

In [30]:
X = diabetes.data
y = diabetes.target

In [31]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)

In [32]:
model = LinearRegressionMiniBatchGD(lr = 0.6)
model.fit(X_train, y_train)

In [33]:
print(model.predict(X_test))

[135.45117107 176.97408786 134.61058656 289.95050197 117.10745196
  88.76255781 252.34202651 182.25131996  80.59197922 106.64206239
  90.80761497 159.35405865  60.25868214 201.37874043  95.10712156
 128.80044168 217.08473954 242.53856965 193.14162114 210.4619271
 203.4299797   84.24560036  68.44340183 183.90310812 152.35084019
 158.11845129 185.63079426 173.56344877  46.18510101 106.10960209
 175.8558012   87.84343412 127.05991362 176.91139695 168.32870714
 186.54251419 118.44690372 113.6631078  141.20624686  56.82064586
  70.81568363 103.62808399 158.38639044 145.30491041 170.91864281
  61.23355421  74.94086882 103.54109519  54.81621534 157.6751501
 153.06050316  61.89711349 109.56175995 104.17030122 166.00121333
 155.969329    90.40418052 203.82630208 114.07450706  65.32436822
 180.43963212 198.97441869 137.83731199 101.16050444 122.55583492
 198.94844584 163.12726242 157.75169054 115.5700226  137.03179514
 176.82138421 190.85823881 231.4608544  139.39064614  78.49606985
 147.0567060

In [34]:
print(model.coef_)
print(model.intercept_)

[  38.46560444 -240.4109665   550.38866418  344.51605844 -205.60657047
  -54.69945417 -166.63267676  174.73513394  453.080792     56.78564519]
147.25945351258164


In [35]:
model.r2_score(X_test, y_test)

np.float64(0.4457006210229085)

In [36]:
print("RMSE loss:", model.loss_)

RMSE loss: 53.919309756940585
