In [30]:
import numpy as np
import pandas as pd
#import statsmodels as sm
from sympy import *

In [102]:
%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false;
}

<IPython.core.display.Javascript object>

In [132]:
class GradientDescent:
    def __init__(self, **kwargs):
        self.model = kwargs.get('model')
        self.data = kwargs.get('data')
        self.regularization = kwargs.get('regularization')
        self.n_iter = kwargs.get('n_iter') if kwargs.get('n_iter') else 5
        self.theta_init = kwargs.get('theta_init')
        self.alpha = kwargs.get('alpha') if kwargs.get('alpha') else 0.01
        self.lambda_ = kwargs.get('lambda_') if kwargs.get('lambda_') else 0.01
    
    def run(self):
        self.X = self.data.iloc[:,:-1]
        self.y = self.data.iloc[:,-1]
        self.X.insert(0, 'X0', np.ones(self.X.shape[0]))
        #display(self.X)
        
        if not self.theta_init:
            n = self.X.shape[1] if len(X.shape) == 2 else 1
            self.theta_init = np.zeros(n)
            
        if self.model == 'Linear Regression':
            self.run_linear_reg()
        else:
            self.run_log_reg()
            
    def run_log_reg(self):
        print('Running Gradient descent for Logictic Regression')
        m, n = self.X.shape
        display(Eq(MatrixSymbol(f'theta_{0}', n,1), Matrix(self.theta_init)))
        theta = np.array(self.theta_init)
        for i in range(self.n_iter):
            print(f'\nIteration : {i+1}')
            h = self.sigmoid(self.X @ theta)
            display(Eq(MatrixSymbol('\nh_{\u03B8}(x)', n, 1), Matrix(h)))
            theta = self.gd_update(h,m, theta)
            display(Eq(MatrixSymbol(f'theta_{i+1}', n,1), Matrix(theta)))
            print(f'\nLoss J(\u03B8) = {self.logistic_regression_loss(theta)}')
            print('\n===============================================')
        return theta
    
    def gd_update(self, h, m, theta):
        if not self.regularization:
            return theta - (self.alpha/m) * (self.X.T @ (h - self.y))
        
        elif self.regularization in ['L1', 'Lasso']:
            theta[0] = theta[0] - (self.alpha/m) * np.sum((h - self.y) * self.X.iloc[:, 0])
            theta[1:] = theta[1:] - (self.alpha/m)* (self.X.iloc[:,1:].T @ (h - self.y)
            + self.lambda_*np.sign(theta[1:]))
            
        
        elif self.regularization in ['L2', 'Ridge']:
            theta[0] = theta[0] - (self.alpha/m) * np.sum((h - self.y) * self.X.iloc[:, 0])

            theta[1:] = theta[1:] - (self.alpha/m)* (self.X.iloc[:,1:].T @ (h - self.y) 
            + self.lambda_*theta[1:])
        return theta
    

    # Sigmoid function
    def sigmoid(self, z):
        return 1/(1 + np.exp(-z))

    # Loss function for logistic regression
    def logistic_regression_loss(self, theta):
        m, n = self.X.shape
        h = self.sigmoid(self.X @ theta)
        if not self.regularization:
            loss = (1/m) * (-self.y.T @ np.log(h) - (1 - self.y).T @ np.log(1 - h))
        
        elif self.regularization in ['L1', 'Lasso']:
            loss = (1/m) * (-self.y.T @ np.log(h) - (1 - self.y).T @ np.log(1 - h)) 
            + (self.lambda_/m) * np.sum(np.abs(theta[1:]))

        
        elif self.regularization in ['L2', 'Ridge']:
            loss = (1/m) * (-self.y.T @ np.log(h) - (1 - self.y).T @ np.log(1 - h)) 
            + (self.lambda_/(2*m)) * np.sum(theta[1:]**2)
        
        return loss
        
    def run_linear_reg(self):
        print('Running Gradient descent for Linear Regression')
        m = self.X.shape[0]
        n = self.X.shape[1] if len(self.X.shape) == 2 else 1
        #print(m, self.alpha)
        display(Eq(MatrixSymbol(f'theta_{0}', n,1), Matrix(self.theta_init)))
        theta = np.array(self.theta_init)
        for i in range(self.n_iter):
            print(f'\nIteration : {i+1}')
            h = self.X @ theta
            display(Eq(MatrixSymbol('\nh_{\u03B8}(x)', n, 1), Matrix(h)))
            #print(h)
            theta = self.gd_update(h,m, theta)
            
            display(Eq(MatrixSymbol(f'theta_{i+1}', n,1), Matrix(theta)))
            print(f'\nLoss J(\u03B8) = {self.linear_regression_loss(theta)}')
            print('\n===============================================')
        return theta
    
    def linear_regression_loss(self, theta):
        m = self.X.shape[0]
        n = self.X.shape[1] if len(self.X.shape) == 2 else 1
        h = self.X @ theta
        if not self.regularization:
            loss = (1/(2*m)) * np.sum((h - self.y)**2)
        
        elif self.regularization in ['L1', 'Lasso']:
            loss = (1/(2*m)) * np.sum((h - self.y)**2) + (self.lambda_/m) * np.sum(np.abs(theta[1:]))
            
        
        elif self.regularization in ['L2', 'Ridge']:
            loss = (1/(2*m)) * np.sum((h - self.y)**2) + (self.lambda_/(2*m)) * np.sum(theta[1:]**2)
        
        
        
        return loss


## Linear Regression Without Regularization

In [122]:
df = pd.DataFrame({'X1': [35,25,30], 'X2': [100,80,60], 'Y': [1.81,1.22,1.71]})
GradientDescent(model = 'Linear Regression', data = df, alpha = 0.02, theta_init = [5,-0.03, -0.03]).run()

Running Gradient descent for Linear Regression


Eq(theta_0, Matrix([
[    5],
[-0.03],
[-0.03]]))


Iteration : 1


Eq(
h_{θ}(x), Matrix([
[0.95],
[1.85],
[ 2.3]]))

Eq(theta_1, Matrix([
[             4.9976],
[-0.0523333333333334],
[-0.0286666666666667]]))


Loss J(θ) = 0.3855455837037076


Iteration : 2


Eq(
h_{θ}(x), Matrix([
[0.299266666666658],
[ 1.39593333333333],
[ 1.70759999999999]]))

Eq(theta_2, Matrix([
[ 5.00651466666667],
[0.271328888888893],
[0.885617777777789]]))


Loss J(θ) = 3508.5574833406354


Iteration : 3


Eq(
h_{θ}(x), Matrix([
[103.064803555557],
[82.6391591111121],
[66.2834480000008]]))

Eq(theta_3, Matrix([
[ 3.35819859555553],
[-49.8393413925932],
[ -135.87051531852]]))


Loss J(θ) = 79130421.54427132


Iteration : 4


Eq(
h_{θ}(x), Matrix([
[-15328.0702819972],
[-12112.2665617009],
[-9644.05296229346]]))

Eq(theta_4, Matrix([
[250.619063968833],
[ 7475.1997438156],
[20402.8810238375]]))


Loss J(θ) = 1784786544865.047


Iteration : 5


Eq(
h_{θ}(x), Matrix([
[2302170.71248126],
[1819361.09456636],
[1448679.47280869]]))

Eq(theta_5, Matrix([
[-36884.0912017399],
[-1122659.74249128],
[-3064172.75885587]]))


Loss J(θ) = 4.02558580833604e+16



## Linear Regression With L1 Regularization

In [123]:
df = pd.DataFrame({'X1': [35,25,30], 'X2': [100,80,60], 'Y': [1.81,1.22,1.71]})
GradientDescent(model = 'Linear Regression', data = df, alpha = 0.02,  lambda_ = 2, regularization = 'L1',
                theta_init = [5,-0.03, -0.03]).run()

Running Gradient descent for Linear Regression


Eq(theta_0, Matrix([
[    5],
[-0.03],
[-0.03]]))


Iteration : 1


Eq(
h_{θ}(x), Matrix([
[0.95],
[1.85],
[ 2.3]]))

Eq(theta_1, Matrix([
[             4.9976],
[             -0.039],
[-0.0153333333333334]]))


Loss J(θ) = 0.7031366948148087


Iteration : 2


Eq(
h_{θ}(x), Matrix([
[2.09926666666666],
[2.79593333333333],
[          2.9076]]))

Eq(theta_2, Matrix([
[  4.97718133333333],
[-0.595337777777774],
[ -1.51438222222221]]))


Loss J(θ) = 9537.335772164506


Iteration : 3


Eq(
h_{θ}(x), Matrix([
[ -167.29786311111],
[-131.056840888888],
[-103.745885333333]]))

Eq(theta_3, Matrix([
[7.68945192888887],
[82.0138141629624],
[223.967529125924]]))


Loss J(θ) = 215108725.00458696


Iteration : 4


Eq(
h_{θ}(x), Matrix([
[ 25274.925860225],
[19975.4371360769],
[15906.1556243732]]))

Eq(theta_4, Matrix([
[-399.989072208945],
[ -12324.985200777],
[ -33639.483766681]]))


Loss J(θ) = 4851777056120.387


Iteration : 5


Eq(
h_{θ}(x), Matrix([
[-3795722.84776751],
[-2999683.32042612],
[-2388518.57109638]]))

Eq(theta_5, Matrix([
[60826.2074563911],
[ 1850995.5945686],
[5052083.50207747]]))


Loss J(θ) = 1.0943182488777405e+17



## Linear Regression With L2 Regularization

In [124]:
df = pd.DataFrame({'X1': [35,25,30], 'X2': [100,80,60], 'Y': [1.81,1.22,1.71]})
GradientDescent(model = 'Linear Regression', data = df, alpha = 0.02,  lambda_ = 2, regularization = 'L2',
                theta_init = [5,-0.03, -0.03]).run()

Running Gradient descent for Linear Regression


Eq(theta_0, Matrix([
[    5],
[-0.03],
[-0.03]]))


Iteration : 1


Eq(
h_{θ}(x), Matrix([
[0.95],
[1.85],
[ 2.3]]))

Eq(theta_1, Matrix([
[             4.9976],
[-0.0519333333333334],
[-0.0282666666666667]]))


Loss J(θ) = 0.36294800888889234


Iteration : 2


Eq(
h_{θ}(x), Matrix([
[0.353266666666658],
[ 1.43793333333333],
[ 1.74359999999999]]))

Eq(theta_2, Matrix([
[ 5.00563466666667],
[0.245621333333337],
[0.813594666666678]]))


Loss J(θ) = 2973.939619005096


Iteration : 3


Eq(
h_{θ}(x), Matrix([
[94.9618480000013],
[76.2337413333343],
[61.1899546666674]]))

Eq(theta_3, Matrix([
[ 3.48799770666665],
[-45.8913659733339],
[-125.097795840002]]))


Loss J(θ) = 67083991.48113447


Iteration : 4


Eq(
h_{θ}(x), Matrix([
[-14112.4893953602],
[-11151.6198188268],
[-8879.12073189345]]))

Eq(theta_4, Matrix([
[231.141130680536],
[6883.02982734018],
[18786.6166344763]]))


Loss J(θ) = 1513347972520.222


Iteration : 5


Eq(
h_{θ}(x), Matrix([
[2119798.84853522],
[1675236.21757229],
[1333919.03401947]]))

Eq(theta_5, Matrix([
[-33961.8546034993],
[-1033817.35062785],
[-2821687.49225714]]))


Loss J(θ) = 3.4139621735077836e+16



## Logictic Regression Without Regularization

In [127]:
df = pd.DataFrame({'X1': [35,25,30], 'X2': [100,80,60], 'Y': [1.81,1.22,1.71]})
GradientDescent(model = 'Logistic Regression', data = df, alpha = 0.02, theta_init = [5,-0.03, -0.03]).run()

Running Gradient descent for Logictic Regression


Eq(theta_{0}, Matrix([
[    5],
[-0.03],
[-0.03]]))


Iteration : 1


Eq(
h_{θ}(x), Matrix([
[0.721115178022863],
[0.864127102990906],
[0.908877038985144]]))

Eq(theta_1, Matrix([
[ 5.01497253786667],
[0.443609866832486],
[ 1.20617127746222]]))


Loss J(θ) = -inf


Iteration : 2


  result = getattr(ufunc, method)(*inputs, **kwargs)


Eq(
h_{θ}(x), Matrix([
[1.0],
[1.0],
[1.0]]))

Eq(theta_2, Matrix([
[ 5.02657253786667],
[0.811276533499152],
[ 2.14750461079555]]))


Loss J(θ) = -inf


Iteration : 3


Eq(
h_{θ}(x), Matrix([
[1.0],
[1.0],
[1.0]]))

Eq(theta_3, Matrix([
[5.03817253786667],
[1.17894320016582],
[3.08883794412888]]))


Loss J(θ) = -inf


Iteration : 4


Eq(
h_{θ}(x), Matrix([
[1.0],
[1.0],
[1.0]]))

Eq(theta_4, Matrix([
[5.04977253786667],
[1.54660986683249],
[4.03017127746222]]))


Loss J(θ) = -inf


Iteration : 5


Eq(
h_{θ}(x), Matrix([
[1.0],
[1.0],
[1.0]]))

Eq(theta_5, Matrix([
[5.06137253786667],
[1.91427653349915],
[4.97150461079555]]))


Loss J(θ) = -inf



## Logistic Regression With L1 Regularization

In [128]:
df = pd.DataFrame({'X1': [35,25,30], 'X2': [100,80,60], 'Y': [1.81,1.22,1.71]})
GradientDescent(model = 'Logictic Regression', data = df, alpha = 0.02,  lambda_ = 2, regularization = 'L1',
                theta_init = [5,-0.03, -0.03]).run()

Running Gradient descent for Logictic Regression


Eq(theta_{0}, Matrix([
[    5],
[-0.03],
[-0.03]]))


Iteration : 1


Eq(
h_{θ}(x), Matrix([
[0.721115178022863],
[0.864127102990906],
[0.908877038985144]]))

Eq(theta_1, Matrix([
[ 5.01497253786667],
[0.456943200165819],
[ 1.21950461079555]]))


Loss J(θ) = -inf


Iteration : 2


Eq(
h_{θ}(x), Matrix([
[1.0],
[1.0],
[1.0]]))

Eq(theta_2, Matrix([
[ 5.02657253786667],
[0.811276533499152],
[ 2.14750461079555]]))


Loss J(θ) = -inf


Iteration : 3


Eq(
h_{θ}(x), Matrix([
[1.0],
[1.0],
[1.0]]))

Eq(theta_3, Matrix([
[5.03817253786667],
[1.16560986683249],
[3.07550461079555]]))


Loss J(θ) = -inf


Iteration : 4


Eq(
h_{θ}(x), Matrix([
[1.0],
[1.0],
[1.0]]))

Eq(theta_4, Matrix([
[5.04977253786667],
[1.51994320016582],
[4.00350461079555]]))


Loss J(θ) = -inf


Iteration : 5


Eq(
h_{θ}(x), Matrix([
[1.0],
[1.0],
[1.0]]))

Eq(theta_5, Matrix([
[5.06137253786667],
[1.87427653349915],
[4.93150461079555]]))


Loss J(θ) = -inf



## Logistic Regression With L2 Regularization

In [133]:
df = pd.DataFrame({'X1': [35,25,30], 'X2': [100,80,60], 'Y': [1.81,1.22,1.71]})
GradientDescent(model = 'Logistic Regression', data = df, alpha = 0.02,  lambda_ = 2, regularization = 'L2',
                theta_init = [5,-0.03, -0.03]).run()

Running Gradient descent for Logictic Regression


Eq(theta_0, Matrix([
[    5],
[-0.03],
[-0.03]]))


Iteration : 1


Eq(
h_{θ}(x), Matrix([
[0.721115178022863],
[0.864127102990906],
[0.908877038985144]]))

Eq(theta_1, Matrix([
[ 5.01497253786667],
[0.444009866832486],
[ 1.20657127746222]]))


Loss J(θ) = -inf


Iteration : 2


Eq(
h_{θ}(x), Matrix([
[1.0],
[1.0],
[1.0]]))

Eq(theta_2, Matrix([
[ 5.02657253786667],
[0.805756401941386],
[ 2.13181699376272]]))


Loss J(θ) = -inf


Iteration : 3


Eq(
h_{θ}(x), Matrix([
[1.0],
[1.0],
[1.0]]))

Eq(theta_3, Matrix([
[5.03817253786667],
[ 1.1626796499155],
[3.04472610051255]]))


Loss J(θ) = -inf


Iteration : 4


Eq(
h_{θ}(x), Matrix([
[1.0],
[1.0],
[1.0]]))

Eq(theta_4, Matrix([
[5.04977253786667],
[1.51484392124996],
[3.94546308583905]]))


Loss J(θ) = -inf


Iteration : 5


Eq(
h_{θ}(x), Matrix([
[1.0],
[1.0],
[1.0]]))

Eq(theta_5, Matrix([
[5.06137253786667],
[1.86231266896663],
[4.83419024469453]]))


Loss J(θ) = -inf

