In [292]:
import numpy as np
import pandas as pd
from sklearn.base import BaseEstimator, RegressorMixin
from sklearn.utils import check_X_y, column_or_1d
from sklearn.datasets import load_diabetes
import matplotlib.pyplot as plt
from sklearn.metrics import r2_score

In [209]:
diab_ = load_diabetes()
X,y = diab_['data'], diab_['target']

In [210]:
X.shape, y.shape

((442, 10), (442,))

In [411]:
class CustomLinearRegression(BaseEstimator, RegressorMixin):
    '''Custom class to perform Linear Regression'''
    def __init__(self, fit_intercept=True,
                 normalize=False,
                 copy_X=True,
                #  n_jobs=None,
                 solver='ols',
                 batch_size = 10,
                 epochs = 100,
                 lr = 0.01,
                 verbose = False):

        # initialize the parameters
        self.params_ = None
        self.copy_X = copy_X
        self.fit_intercept = fit_intercept
        # self.n_jobs = n_jobs
        self.normalize = normalize
        if solver not in ['ols', 'gd']:
            raise ValueError('''Invalid value for solver parameter
            \nCan only take `normal` and `gd` as inputs.''')
        self.solver = solver
        self.epochs = epochs
        self.lr = lr
        self.batch_size = batch_size
        self.verbose = verbose

    def _create_mini_batches(self,X,y):
        # print(X.shape, y.shape)
        # stacking the dependent and independent variable to make them one single 2D array
        data= np.hstack([X,y])
        # shuffling the rows so that the order is now different
        np.random.shuffle(data)
        mini_batches = []
        no_of_minibatches = len(X)//self.batch_size
        for i in range(no_of_minibatches):
            # print(i, i+1)
            mini_batch = data[i * self.batch_size: (i+1)*self.batch_size,:]
            X_mini = mini_batch[:,:-1]
            y_mini = mini_batch[:,-1]
            mini_batches.append((X_mini, y_mini))
        return mini_batches

    def mse_loss(self, predictions,labels):
        # calculating the mean squared error
        mse_loss = np.mean(((predictions.ravel() - labels)**2))
        return mse_loss

    def random_weight_vector(self,dim):
        # generates a random column weight vector of (dim,1)
        return np.random.normal(loc = 0, scale = 1, size = (dim,1))

    def fit(self, X, y):
        # runs couple of checks
        # ensures X is 2D and y is 1D
        # y should not have nan vals and so on...
        X,y = check_X_y(X,y)
        # flattening  the target variables
        y = y.ravel()
        # determining whether to include intercept
        if self.fit_intercept:
            X = np.insert(X,0,1, axis = 1)

        if self.solver == "ols":
            # using normal equation
            # np.linalg.pinv calculates the Moore Pinerose inverse (as implemented in scikit-learn)
            self.params_ = np.linalg.pinv(X) @ y
        else:
            self.errors = []
            self.grads = []
            # using gradient descent to solve linear regression
            # initialize a random weight variable
            # here we are taking the value of the weights from a standard normal distribution
            # np.random.seed(108) # incase we need all our models to start from similar weights
            # start with a random weight vector
            self.params_ = self.random_weight_vector(dim = X.shape[1])
            # get mini batches
            batches = self._create_mini_batches(X,y.reshape(-1,1))
            # loop over iterations
            for iter in range(self.epochs):
                # for each iteration loop on all batches
                for x_mini, y_mini in batches:
                    # print(f'mini batch rows : {x_mini.shape[0]}')
                    # get the predictions for current weight
                    predictions = x_mini @ self.params_
                    # print(f'Predictions Shape : {predictions.shape}')
                    # calculate the mse error
                    err = self.mse_loss(predictions,y_mini)
                    # print(f'MSE Loss : {err}')
                    self.errors.append(err)
                    # calculate the gradient of loss w.r.t. weight vector
                    gradient = (-2/len(x_mini) * ((y_mini - predictions.ravel()).T @ x_mini))
                    # print(f'Gradient Shape : {gradient.shape}')
                    # perform gradient descent
                    self.params_ -= self.lr * gradient.reshape(-1,1)
                self.grads.append(gradient)
        self.params_ = self.params_.ravel()
        self.coef_ = self.params_
        # intercept will be zero when fit_intercept is set as False
        self.intercept_ = 0
        if self.fit_intercept:
            self.intercept_ = self.params_.ravel()[0]
            self.coef_ = self.params_.ravel()[1:]
        return self

    def predict(self, X):
        # Check if fit has been called
        if self.params_ is None:
            raise ValueError("You must call `fit` before `predict`.")
        # Perform prediction
        if self.fit_intercept:
           X = np.insert(X,0,1,axis = 1)
        return X@self.params_


In [416]:
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LinearRegression
params = {'lr':[0.001,0.01],
          'solver':['ols','gd'],
          'lr':[0.1,0.01,0.001],
          'epochs':[100,200,300,400,500]}
regressor = CustomLinearRegression()
clf = GridSearchCV(regressor, param_grid=params)


In [417]:
clf.fit(X,y)

In [429]:
import pandas as pd
pd.DataFrame(clf.cv_results_).sort_values(by = 'mean_test_score', ascending=False)

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_epochs,param_lr,param_solver,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.000627,0.000518,0.000311,0.000121,100,0.1,ols,"{'epochs': 100, 'lr': 0.1, 'solver': 'ols'}",0.429556,0.522599,0.482681,0.426498,0.550248,0.482316,0.049269,1
16,0.00018,2e-05,0.000162,5e-06,300,0.001,ols,"{'epochs': 300, 'lr': 0.001, 'solver': 'ols'}",0.429556,0.522599,0.482681,0.426498,0.550248,0.482316,0.049269,1
2,0.000186,3.5e-05,0.000164,5e-06,100,0.01,ols,"{'epochs': 100, 'lr': 0.01, 'solver': 'ols'}",0.429556,0.522599,0.482681,0.426498,0.550248,0.482316,0.049269,1
28,0.000184,2.6e-05,0.000173,1e-05,500,0.001,ols,"{'epochs': 500, 'lr': 0.001, 'solver': 'ols'}",0.429556,0.522599,0.482681,0.426498,0.550248,0.482316,0.049269,1
4,0.000186,2.9e-05,0.000171,8e-06,100,0.001,ols,"{'epochs': 100, 'lr': 0.001, 'solver': 'ols'}",0.429556,0.522599,0.482681,0.426498,0.550248,0.482316,0.049269,1
20,0.000181,2.2e-05,0.000166,7e-06,400,0.01,ols,"{'epochs': 400, 'lr': 0.01, 'solver': 'ols'}",0.429556,0.522599,0.482681,0.426498,0.550248,0.482316,0.049269,1
6,0.000184,3.2e-05,0.000165,7e-06,200,0.1,ols,"{'epochs': 200, 'lr': 0.1, 'solver': 'ols'}",0.429556,0.522599,0.482681,0.426498,0.550248,0.482316,0.049269,1
8,0.000186,2.6e-05,0.00017,9e-06,200,0.01,ols,"{'epochs': 200, 'lr': 0.01, 'solver': 'ols'}",0.429556,0.522599,0.482681,0.426498,0.550248,0.482316,0.049269,1
26,0.000181,2.3e-05,0.000168,8e-06,500,0.01,ols,"{'epochs': 500, 'lr': 0.01, 'solver': 'ols'}",0.429556,0.522599,0.482681,0.426498,0.550248,0.482316,0.049269,1
10,0.000177,2.1e-05,0.000168,7e-06,200,0.001,ols,"{'epochs': 200, 'lr': 0.001, 'solver': 'ols'}",0.429556,0.522599,0.482681,0.426498,0.550248,0.482316,0.049269,1
