In [1]:
import numpy as np
from sklearn.model_selection import train_test_split

In [158]:
class BaseOptimizers():
  def __init__(self, optimizer = 'GD'):
    self.optimizer = optimizer
  
  def loss(self, X, w, y):
    assert X.shape[-1] == w.shape[0], 'Incompatible shapes'
    y_hat = X @ w
    loss_ = np.sum(np.square(y_hat - y))
    return loss_

  def lr_schedule(self,t):
    t0, t1 = 200, 100000
    return t0 / (t + t1)
  
  def gradient(self, X, w, y):
    assert X.shape[-1] == w.shape[0], 'Incompatible shapes'
    return X.T @ ((X @ w) - y)
  
  def gradient_descent(self, X, y, 
                       verbose, epochs, lr):
    w0 = np.random.normal(0, 1, size=(X.shape[1],1))
    self.all_weights = []
    for epoch in range(epochs):
      if verbose:
        print('The Current Loss is :', self.loss(X, w0, y))
      self.all_weights.append(w0)
      w0 = w0 - lr*(self.gradient(X, w0, y))
    return w0

  def mini_batch_gd(self, X, y,
                    verbose, epochs, batch_size):
    w0 = np.random.normal(0, 1, size=(X.shape[1],1))
    t = 0
    self.all_weights = []
    for epoch in range(epochs):
      random_indices = np.random.permutation(X.shape[0])
      X_shuffled = X[random_indices]
      y_shuffled = y[random_indices]
      for i in range(0, X.shape[0], batch_size):
        t = t + 1
        X_temp = X_shuffled[i:i+batch_size]
        y_temp = y_shuffled[i:i+batch_size]
        lr = self.lr_schedule(t)
        self.all_weights.append(w0)
        w0 = w0 - lr*(self.gradient(X_temp, w0, y_temp))
      if verbose:
          print(f'Epoch {epoch} Loss is :', self.loss(X, w0, y))
    return w0

  def stochastic_gd(self, X,  
                    y, verbose, epochs):
    w0 = np.random.normal(0, 1, size=(X.shape[1],1))
    t = 0
    self.all_weights = []
    for epoch in range(epochs):
      for i in range(X.shape[0]):
        random_index = np.random.randint(X.shape[0])
        t = t + 1
        X_temp = X[random_index:random_index+1]
        y_temp = y[random_index:random_index+1]
        lr = self.lr_schedule(t)
        self.all_weights.append(w0)
        w0 = w0 - lr*(self.gradient(X_temp, w0, y_temp))
      if verbose:
          print(f'Epoch {epoch} Loss is :', self.loss(X, w0, y))
    return w0

In [159]:
class LinearRegression(BaseOptimizers):
  def __init__(self,
               optimizer = 'GD',
               random_seed = 42):
    super().__init__(optimizer)
    # self.optimizer = optimizer
    self.random_seed = random_seed
  
  def add_dummy_feature(self, X):
    matrix_dummy = np.hstack((np.ones((X.shape[0], 1),
                                            dtype = X.dtype), 
                                            X))
    return matrix_dummy
  
  def preprocess(self, X):
    X = self.add_dummy_feature(X)
    return X
  
  def train(self,
            X_train,
            y_train,
            epochs = 200,
            batch_size = 100,
            learning_rate = 0.001,
            verbose = False):
    assert batch_size < X_train.shape[0], 'batch size must be smaller than the number of data points'
    X_train = self.preprocess(X_train)
    if self.optimizer is 'GD':
      self.optimized_weights = self.gradient_descent(X_train, y_train,
                                                    verbose, epochs,
                                                    learning_rate)                          
    elif self.optimizer is 'MBGD':
      self.optimized_weights = self.mini_batch_gd(X_train, y_train,
                                                  verbose, epochs=epochs,
                                                  batch_size = batch_size)                                     
    else:
      self.optimized_weights = self.stochastic_gd(X_train, y_train,
                                                  verbose, epochs=epochs)
    self.weights = self.all_weights
                                                  
  def predict(self, X):
    X = self.add_dummy_feature(X)
    assert X.shape[-1] == self.optimized_weights.shape[0], 'Incompatible Shapes'
    return X @ self.optimized_weights

In [160]:
from sklearn.datasets import make_regression
X, y = make_regression(n_samples = 10000)
y = y.reshape(-1,1)
x_train, x_test, y_train, y_test = train_test_split(X, y, train_size = 0.8)

In [151]:
import time
e = 10

In [164]:
a = time.time()
model_gd = LinearRegression(optimizer = 'GD')
model_gd.train(x_train, y_train, epochs = 10, verbose = True, learning_rate = 0.0001)
print(time.time() - a)

The Current Loss is : 257986103.63728854
The Current Loss is : 13082377.031345716
The Current Loss is : 1089717.733753635
The Current Loss is : 107598.11621560548
The Current Loss is : 11453.56003601751
The Current Loss is : 1269.9388190657373
The Current Loss is : 144.51802903685052
The Current Loss is : 16.75490393676003
The Current Loss is : 1.970597825032935
The Current Loss is : 0.23448419739197524
0.06466317176818848


In [165]:
a = time.time()
model_mbgd = LinearRegression(optimizer = 'MBGD')
model_mbgd.train(x_train, y_train, batch_size = 100, epochs = e, verbose = True)
print(time.time() - a)

Epoch 0 Loss is : 1.78015282543965e-05
Epoch 1 Loss is : 2.4507867883945274e-18
Epoch 2 Loss is : 1.1526821658053038e-23
Epoch 3 Loss is : 1.116941108782542e-23
Epoch 4 Loss is : 1.0512088676956352e-23
Epoch 5 Loss is : 1.0856336235156324e-23
Epoch 6 Loss is : 1.069286808433116e-23
Epoch 7 Loss is : 1.0599778840174116e-23
Epoch 8 Loss is : 1.0752533563490111e-23
Epoch 9 Loss is : 1.060598643555592e-23
0.09479856491088867


In [168]:
a = time.time()
model_sgd = LinearRegression(optimizer = 'SGD')
model_sgd.train(x_train, y_train, epochs = e, verbose = True)
print(time.time() - a)

Epoch 0 Loss is : 0.0015262366480492296
Epoch 1 Loss is : 8.688210971476476e-14
Epoch 2 Loss is : 1.705728411008878e-21
Epoch 3 Loss is : 1.3138694677457339e-21
Epoch 4 Loss is : 1.2844493463757655e-21
Epoch 5 Loss is : 1.2564962356337583e-21
Epoch 6 Loss is : 1.2444885972753088e-21
Epoch 7 Loss is : 1.2193577770824084e-21
Epoch 8 Loss is : 1.2267829178621826e-21
Epoch 9 Loss is : 1.223819860002979e-21
1.3747100830078125
