In [1]:
# Import Libraries

from sklearn.datasets import make_regression
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.image

from sklearn.model_selection import train_test_split
from sklearn.linear_model import SGDRegressor, LinearRegression
from sklearn.metrics import r2_score

### Gradient Descent

`Batch gradient descent (BGD) updates the model parameters after processing the entire dataset in each iteration.`

#### Advantage
> **Stable Convergence:** <br>`Since BGD computes the exact gradient over the entire dataset, it updates weights smoothly and follows the optimal path.`<br>
> **Leverages Matrix Operations:** <br>`BGD utilizes vectorized operations (effecient on modern hardware(like GPU)).`<br>
> **More Accurate Gradient Estimates:** <br>`Because BGD consider entire dataset, it accurately moves towards the minimum without noisy updates.`

#### Disadvantage
> **Slow for Large Datasets:** <br>`When dealing with millions of samples, computing the gradient over the entire dataset per iteration can be very slow.`<br>
> **Higher Memory Usage:** <br>`Since it needs to hold the entire dataset in memory, it may not work well for massive datasets.`<br>
> **May get Stuck in Local Minima:** <br>`If the cost function has multiple local minima, BGD may get stuck because it takes large, smooth steps.`

In [5]:
# Create data for regression

X,y = make_regression(n_samples= 100, n_features= 5, n_informative= 3, n_targets= 1, noise= 50, random_state= 1)

In [6]:
# Split data

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size= 0.3, random_state= 0)

### Batch Gradient Descent from Scratch

In [7]:
# write a class for for gradient descent

class GradientDescent:
    '''
    training: method for learning the intercept and slope,
    testing: method for visual representation of the predicted line.
    '''
    def __init__(self, learning_rate=0.005, epochs= 100):
        '''
        Input: intercept, slope and learning rate as input
        '''
        self.intercept_ = None
        self.coeff_ = None
        self.learning_rate = learning_rate
        self.epochs = epochs
    
    def training(self, X_train, y_train):
        '''
        Input: independent(X_train), dependant(y_train) variable and number of iterations
        Output: returns updated intercept and slope
        '''

        X_train = pd.DataFrame(X_train) # convert data to dataframe

        # Initilize weights and bias
        self.intercept_ = 0
        self.coeff_ = np.ones(X_train.shape[1])
        
        for iter in range(self.epochs):
            # Compute predictions
            y_pred = np.dot(X_train, self.coeff_) + self.intercept_ # m*x + b

            # Compute gradients
            derivative_intercept = -2/X_train.shape[0] * np.sum(y_train - y_pred) # -2*np.mean(y_train - intercept - X_train* coeff)
            derivative_slope = -2 * np.dot((y_train - y_pred), X_train)/X_train.shape[0] # -2*np.mean((y_train - intercept - X_train* coeff)*X_train)
            
            self.intercept_ = self.intercept_ - (self.learning_rate * derivative_intercept)
            self.coeff_ = self.coeff_ - (self.learning_rate * derivative_slope)
            
        return self.intercept_, self.coeff_

    def testing(self, X):
        '''
        Input: independent(X_train) and dependant(y_train) variable
        Output: predictions
        '''
        y_test_pred = np.dot(X,self.coeff_) + self.intercept_
        
        return y_test_pred

In [14]:
l = GradientDescent()
l

<__main__.GradientDescent at 0x225167ba2a0>

In [15]:
l.training(X_train, y_train)

(np.float64(1.7863947287375876),
 array([10.12534506, 36.47214664,  2.37708351,  2.80728833, 18.94088823]))

In [16]:
l.testing(X_train)

array([  23.71425707,  -93.29659886,  -78.27177675,   41.03360564,
        -21.6274495 ,    2.89790796,   -6.96099471,    5.18547049,
         -1.52195201,   61.32220427,   91.76395613,   30.01136028,
          6.6443924 ,   12.2947879 , -113.45938075,  -16.83616648,
         23.89305106,    6.14015025,   70.05949402,    8.25109385,
        101.65814375,   28.96541775,   19.21521755,   17.89450308,
         -8.45260525,  -79.24912344,   49.79574253, -102.64510339,
         36.36367016,   37.27994902,   32.95171094,   90.28036514,
         13.08177102,    1.65148714,   -8.68443334,    1.27847747,
        -44.65461728,    2.57792686,   18.31727148,   -8.54715417,
         52.6945299 ,  -51.5999605 ,    0.60529351,  -13.16462527,
         58.32737569,  -39.24212007,   36.04347098,   75.56406946,
         70.06007108,  -25.75881547,   31.77779468,   13.52188401,
         88.04363647,  -37.61606037,   -6.33747263,  -69.71329616,
         32.27773392,  -16.60283242,   -7.60827143,   18.06444

In [17]:
#

r2_score(y_test, l.testing(X_test))

0.5162244485558467

#### By using Linear Regression SKLearn

In [18]:
lr = LinearRegression()
lr

In [19]:
lr.fit(X_train, y_train)

In [20]:
lr.coef_, lr.intercept_

(array([16.07541589, 53.85723097,  5.00123167, -4.80877118, 30.73767307]),
 np.float64(0.12855701343173997))

In [21]:
r2_score(y_test, lr.predict(X_test))

0.4881068480437871

### Batch GD from Scratch with convergence condition

In [105]:
class BatchGradientDescent:
    '''
    A class for performing gradient descent for linear regression.
    '''
    def __init__(self, learning_rate=0.005, epochs=100):
        '''
        Initializes the model parameters.
        '''
        self.intercept_ = 0
        self.coeff_ = 1
        self.learning_rate = learning_rate
        self.epochs = epochs

    def training(self, X_train, y_train):
        '''
        Trains the model using gradient descent.
        '''
        X_train = pd.DataFrame(X_train)

        # Initialize parameters
        self.intercept_ = np.sum(y_train) / 2
        self.coeff_ = np.ones(X_train.shape[1])

        iter = 0
        inte = [self.intercept_]
        slp = [self.coeff_]

        while iter < self.epochs:
            # Predictions
            y_pred = np.dot(X_train, self.coeff_) + self.intercept_

            # Compute derivatives
            derivative_intercept = -2 / X_train.shape[0] * np.sum(y_train - y_pred)
            derivative_slope = -2 * np.dot((y_train - y_pred), X_train) / X_train.shape[0]

            # Check convergence
            if (abs(derivative_intercept) < 0.1) and (np.abs(derivative_slope) < 0.1).all():
                break

            # Update parameters
            self.intercept_ -= self.learning_rate * derivative_intercept
            self.coeff_ -= self.learning_rate * derivative_slope

            iter += 1
            inte.append(self.intercept_)
            slp.append(self.coeff_)

            # Early stopping check
            if iter == 10:
                if (abs(inte[iter - 1]) < abs(inte[iter])) and (abs(slp[iter - 1]) < abs(slp[iter])).all():
                    return 'Please change the learning rate as the model will not be able to converge.'

        return self.intercept_, self.coeff_

    def testing(self, X_train):
        '''
        Predicts output values after training.
        '''
        y_test_pred = np.dot(X_train, self.coeff_) + self.intercept_
        return y_test_pred


In [106]:
# create instance

g = BatchGradientDescent()
g

<__main__.GradientDescentFinal_2 at 0x22538973d40>

In [107]:
g.training(X_train, y_train)

(np.float64(91.4089262411558),
 array([ 7.79570072, 23.0930828 , -0.39741181, -9.03666692, 28.33001461]))

In [108]:
g.testing(X_train) 

array([115.69420352,  30.41271807,   6.32071444,  97.04589134,
        42.21716169,  94.81056745,  48.47122667,  81.35830768,
       112.34461577, 136.99969565, 176.63007997, 107.82475046,
        89.26896382,  91.21164838,  36.64821249,  87.7895936 ,
        87.69051281, 129.81260703, 122.02139578,  82.13368685,
       160.84406729,  85.07391672,  94.14856032,  80.03692564,
        85.04709689,  17.43009504, 109.48915241,  -2.31945439,
       120.05599042,  98.97489621, 158.3632039 , 140.72579901,
        98.26573348,  82.72378564,  77.21649403,  97.25151192,
        53.46536157,  57.77917227,  85.78900976,  76.62187064,
       157.41816364,  19.44577036,  95.08760533,  89.8864382 ,
       142.22722182,  74.02236492, 131.53693023, 141.61489795,
       113.99174529,  49.88935723, 111.08282367, 103.23436896,
       147.86201413,  84.10810651,  52.49749993,  34.96783144,
        98.40384566,  77.33712138,  82.6174921 , 103.21839205,
       116.06507955,  92.52994428, 124.94588757,  96.72