In [1]:
import numpy as np

In [2]:
class BatchGradientDescentRegressor:
    '''
    Fits Batch Gradient Descent Regression model.
    It is required to scale the input features before fitting the model. 
    
    Params:
    eta (float): Learning rate
    epochs (int): number of epochs
    tol (float): minimum reduction in loss required to continue training
    '''
    def __init__(self, eta: float = 0.01, epochs: int=1000, tol: float=0.001, random_state: int = 42):
        self.eta = eta
        self.epochs = epochs
        self.tol = tol
        self.random_state = random_state
        self.weights = None
        self.__z = None
        self.__af = None
        self.__errors = None
        self.loss = 1
        self.loss_history = [0]


    def __initialise_weights(self, n_col: int): # random weight initialization
        np.random.seed(self.random_state)
        self.weights = np.random.randn(n_col)

        
    def __sum_function(self, x: np.array):
        self.__z = np.dot(x, self.weights.reshape(-1, 1)) # X.W
        self.__af = self.__z.ravel()

        
    def __weight_update(self, x: np.array, y: np.array): # update wwights
        n_row = len(self.__af)
        self.__errors = self.__af - (y).ravel() # y_hat - y
        self.weights = self.weights - (self.eta * (1 / n_row) * np.dot(self.__errors, x))

        
    def __loss_update(self): # update loss
        self.loss = np.mean(self.__errors ** 2)

        
    def fit(self, x: np.array, y: np.array):
        n_row, _ = x.shape
        ones = np.ones((n_row, 1)) # array of 1s
        x = np.hstack((ones, x)) # appending array of 1s to x as bias feature
        _, n_col = x.shape
        self.__initialise_weights(n_col=n_col) # random weight initialization
        epoch = 0

        while epoch < self.epochs: 
            self.__sum_function(x=x) # prediction
            self.__weight_update(x=x, y=y) # weight update
            self.__loss_update() # update loss
            epoch += 1
            print(f'Epoch {epoch} / {self.epochs}\tTraining MSE: {np.round(self.loss, 5)}')
            self.loss_history.append(self.loss) # add loss to loss_history list
            
            if np.abs(self.loss_history[-1] - self.loss_history[-2]) < self.tol:
                print("Exiting as reduction in MSE < tol")
                break
        return self

                
    def predict(self, x: np.array):
        n_row, _ = x.shape
        ones = np.ones((n_row, 1)) # array of 1s
        x = np.hstack((ones, x)) # appending array of 1s to x as bias feature
        return np.dot(x, self.weights.reshape(-1, 1)) # prediction
    
    def __repr__(self):
        return f"BatchGradientDescentRegressor(eta={self.eta}, epochs={self.epochs}, tot={self.tol}, random_state={self.random_state})"


In [3]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

In [4]:
np.random.seed(42)
data = pd.DataFrame({'x1': np.arange(100000), 'x2': np.arange(100000, 0, -1), 'x3': np.random.randint(low=0, high=100000, size=100000)})
data['y'] = data.sum(axis=1)

In [5]:
data

Unnamed: 0,x1,x2,x3,y
0,0,100000,15795,115795
1,1,99999,860,100860
2,2,99998,76820,176820
3,3,99997,54886,154886
4,4,99996,6265,106265
...,...,...,...,...
99995,99995,5,57885,157885
99996,99996,4,50577,150577
99997,99997,3,87556,187556
99998,99998,2,12117,112117


In [6]:
x = data.drop(columns='y').values
y = data['y'].values
x.shape, y.shape

((100000, 3), (100000,))

In [7]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=42)

In [8]:
minmax_scaler = MinMaxScaler()
x_train = minmax_scaler.fit_transform(x_train)

In [9]:
regressor = BatchGradientDescentRegressor(eta=1, epochs=2000)

In [10]:
regressor.fit(x_train, y_train)

Epoch 1 / 2000	Training MSE: 23286462149.17448
Epoch 2 / 2000	Training MSE: 13549309154.724
Epoch 3 / 2000	Training MSE: 7904065834.50988
Epoch 4 / 2000	Training MSE: 4628342869.60223
Epoch 5 / 2000	Training MSE: 2725136965.03407
Epoch 6 / 2000	Training MSE: 1617280355.64333
Epoch 7 / 2000	Training MSE: 970603067.65962
Epoch 8 / 2000	Training MSE: 591586841.4301
Epoch 9 / 2000	Training MSE: 368130752.32659
Epoch 10 / 2000	Training MSE: 235267643.90196
Epoch 11 / 2000	Training MSE: 155320898.93699
Epoch 12 / 2000	Training MSE: 106418677.19266
Epoch 13 / 2000	Training MSE: 75845184.00972
Epoch 14 / 2000	Training MSE: 56191521.87947
Epoch 15 / 2000	Training MSE: 43126945.43992
Epoch 16 / 2000	Training MSE: 34108058.05786
Epoch 17 / 2000	Training MSE: 27630961.66758
Epoch 18 / 2000	Training MSE: 22797746.68576
Epoch 19 / 2000	Training MSE: 19064999.1607
Epoch 20 / 2000	Training MSE: 16097731.45845
Epoch 21 / 2000	Training MSE: 13684397.27206
Epoch 22 / 2000	Training MSE: 11687320.25699
Epo

BatchGradientDescentRegressor(eta=1, epochs=2000, tot=0.001, random_state=42)

In [11]:
regressor.weights

array([66668.07920447, 33334.0473283 , 33334.04458626, 99993.76078236])

In [12]:
x_test = minmax_scaler.transform(x_test)

In [13]:
pred = regressor.predict(x_test).ravel()

In [14]:
pred

array([137734.03560006, 168307.96257965, 143560.02013075, ...,
       133411.04511243, 112388.09590683, 108151.10523137])

In [15]:
y_test

array([137734, 168308, 143560, ..., 133411, 112388, 108151], dtype=int64)

In [16]:
# Test MSE
np.round(mean_squared_error(y_test, pred), 5)

0.00487