In [None]:
import numpy as np
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score

In [None]:
X,Y = load_diabetes(return_X_y=True)
X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size=0.2)

In [None]:
X_train.shape, Y_train.shape

((353, 10), (353,))

In [None]:
class LRscratch():
  def __init__(self):
    self.intercept_ = None
    self.coeff_ = None

  def fit(self, X_train, Y_train):
    X_train = np.insert(X_train, 0, 1, axis=1)
    beta = np.linalg.inv(X_train.T.dot(X_train)).dot(X_train.T.dot(Y_train))
    self.intercept_ = beta[0]
    self.coeff_ = beta[1:]

  def predict(self, X_test):
    # X_test = np.insert(X_test, 0, 1, axis=1)
    return X_test.dot(self.coeff_) + self.intercept_

In [None]:
lr_scratch = LRscratch()
lr_scratch.fit(X_train, Y_train)
y_pred = lr_scratch.predict(X_test)

In [None]:
print(r2_score(Y_test, y_pred))

0.6032165803407301


Using Gradient Descent

In [None]:
from sklearn.datasets import load_diabetes, make_regression
import numpy as np

In [None]:
X, Y = make_regression(n_samples=100, n_features=1, noise=10)

In [None]:
X.shape

(100, 1)

In [None]:
class LR_GD():
  def __init__(self, lr, epochs):
    self.m = 20
    self.b = -100
    self.lr = lr
    self.epochs = epochs

  def fit(self, X, Y):
    for i in range(self.epochs):
      slope_b = -2*np.sum(Y - self.m*X.ravel()-self.b)
      slope_m = -2*np.sum((Y - self.m*X.ravel()-self.b)*X.ravel())
      self.m = self.m - self.lr*slope_m
      self.b = self.b - self.lr*slope_b
    print(self.m, self.b)

In [None]:
LR_GD = LR_GD(lr=0.01, epochs=1000)
LR_GD.fit(X, Y)

25715.03904209265 -243436.7359413026


For Higher Dimensions

In [None]:
from sklearn.datasets import make_regression
import numpy as np
from sklearn.linear_model import LinearRegression

In [None]:
X, y = make_regression(n_samples=100, n_features=10, noise=10)

In [None]:
class GD_hd():
    def __init__(self, lr, epochs):
      self.coeff = np.ones(X.shape[1])
      self.intercept = 0
      self.lr = lr
      self.epochs = epochs

    def fit(self, X, y):
      for i in range(self.epochs):
        y_hat = np.dot(X, self.coeff) + self.intercept
        slope_intercept = -2*np.mean(y-y_hat)
        slope_coeff = -2*np.mean((y-y_hat).dot(X))
        self.intercept = self.intercept - (self.lr * slope_intercept)
        self.coeff = self.coeff - (self.lr * slope_coeff)
      print(self.intercept, self.coeff)

    def predict(self, X):
      return np.dot(X, self.coeff) + self.intercept


In [None]:
gd_higher_dimension = GD_hd(lr=0.01, epochs=100)
gd_higher_dimension.fit(X, y)

1.421224103408971 [67.18006362 67.18006362 67.18006362 67.18006362 67.18006362 67.18006362
 67.18006362 67.18006362 67.18006362 67.18006362]


In [None]:
actual = LinearRegression()
actual.fit(X, y)

In [None]:
actual.coef_

array([67.66265584, 17.22042122, 60.26642582, 76.69458477, 71.60549187,
       19.90460882, 35.57277609, 41.54021466, 30.70872065, 46.77825276])

In [None]:
actual.intercept_

1.3175347866250195

Stocastic Gradient Descent

In [None]:
from sklearn.datasets import make_regression
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score

In [None]:
X, y = make_regression(n_samples=100, n_features=10)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [None]:
class SGD_hd():
    def __init__(self, lr, epochs):
      self.coeff = np.ones(X.shape[1])
      self.intercept = 120
      self.lr = lr
      self.epochs = epochs

    def fit(self, X, y):
      for i in range(self.epochs):
        for j in range(X.shape[0]):
          idx = np.random.randint(0, X.shape[0])
          y_hat = np.dot(X[idx], self.coeff) + self.intercept
          slope_intercept = -2*(y[idx]-y_hat)
          slope_coeff = -2*(y[idx]-y_hat)*X[idx]
          self.intercept = self.intercept - (self.lr * slope_intercept)
          self.coeff = self.coeff - (self.lr * slope_coeff)
      print(self.intercept, self.coeff)

    def predict(self, X):
      return np.dot(X, self.coeff) + self.intercept


In [None]:
sgd = SGD_hd(lr=0.01, epochs=50)
sgd.fit(X_train, y_train)

-5.210164887162972e-15 [64.13357022 44.19297706 77.7251166  43.1538982  46.18471905 66.17820262
 49.50640728 97.89301551 60.20380651 56.04580355]


In [None]:
y_pred = sgd.predict(X_test)

In [None]:
r2_score(y_test, y_pred)

1.0