In [30]:
import numpy as np
import pandas as pd
#import statsmodels as sm
from sympy import *

In [102]:
%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false;
}

<IPython.core.display.Javascript object>

In [252]:
class GradientDescent:
    def __init__(self, **kwargs):
        self.model = kwargs.get('model')
        self.data = kwargs.get('data')
        self.regularization = kwargs.get('regularization')
        self.n_iter = kwargs.get('n_iter') if kwargs.get('n_iter') else 5
        self.theta_init = kwargs.get('theta_init')
        self.alpha = kwargs.get('alpha') if kwargs.get('alpha') else 0.01
        self.lambda_ = kwargs.get('lambda_') if kwargs.get('lambda_') else 0.01
    
    def run(self):
        self.X = self.data.iloc[:,:-1]
        self.y = self.data.iloc[:,-1]
        self.X.insert(0, 'X0', np.ones(self.X.shape[0]))
        #display(self.X)
        
        if not self.theta_init:
            n = self.X.shape[1] if len(X.shape) == 2 else 1
            self.theta_init = np.zeros(n)
            
        if self.model == 'Linear Regression':
            self.run_linear_reg()
        else:
            self.run_log_reg()
            
    def run_log_reg(self):
        if not self.regularization:
            print('Running Gradient descent for Logistic Regression')
            print('\nCost Function:')
            display(Eq(Symbol('J(\u03B8)'), 
                       Symbol('-1/(n)\u2211_{i=1}^n[y^{i}log(h_{\u03B8}(x^{i})) + (1-y^{i})log(1-h_{\u03B8}(x^{i}))]')))
            
            print('\nTheta Update:')
            display(Eq(Symbol('\u03B8_j'), Symbol('\u03B8_j-\u03B1/(n)\u2211_{i=1}^n(h_{\u03B8}(x^{i}) - y^{i})x^{i}')))
        
        elif self.regularization in ['L1', 'Lasso']:
            print('Running Gradient descent for Logistic Regression with L1 Regularization')        
            print('\nCost Function:')
            display(Eq(Symbol('J(\u03B8)'), 
                       Symbol('-1/(n)\u2211_{i=1}^n[y^{i}log(h_{\u03B8}(x^{i})) + (1-y^{i})log(1-h_{\u03B8}(x^{i}))] + \u03BB/(n)\u2211_{j=1}^d|\u03B8_j|')))
            
            print('\nTheta Update:')
            display(Eq(Symbol('\u03B8_0'), Symbol('\u03B8_0-\u03B1/(n)\u2211_{i=1}^n(h_{\u03B8}(x^{i}) - y^{i})')))
            display(Eq(Symbol('\u03B8_j'), 
                       Symbol('\u03B8_j-\u03B1/(n)[1/2\u2211_{i=1}^n(h_{\u03B8}(x^{i}) - y^{i})x^{i} + \u03BBsign(\u03B8_j)]')))
        

        
        elif self.regularization in ['L2', 'Ridge']:
            print('Running Gradient descent for Logistic Regression with L2 Regularization')
            print('\nCost Function:')
            display(Eq(Symbol('J(\u03B8)'), 
                       Symbol('-1/(n)\u2211_{i=1}^n[y^{i}log(h_{\u03B8}(x^{i})) + (1-y^{i})log(1-h_{\u03B8}(x^{i}))] + \u03BB/(2n)\u2211_{j=1}^d\u03B8_j^2')))
            
            print('\nTheta Update:')
            display(Eq(Symbol('\u03B8_0'), Symbol('\u03B8_0-\u03B1/(n)\u2211_{i=1}^n(h_{\u03B8}(x^{i}) - y^{i})')))
            display(Eq(Symbol('\u03B8_j'), 
                       Symbol('\u03B8_j-\u03B1/(n)[\u2211_{i=1}^n(h_{\u03B8}(x^{i}) - y^{i})x^{i} + \u03BB\u03B8_j]')))
        
        
            
        print('\nWhere:')
        display(Eq(Symbol('\nh_{\u03B8}(x)'), Pow(Symbol('1+ e^{-\u03B8^{T}x}'),-1)))
        print('\n===============================================')
            
        m = self.X.shape[0]
        n = self.X.shape[1] if len(self.X.shape) == 2 else 1
        #print(m, self.alpha)
        display(Eq(MatrixSymbol(f'theta_{0}', n,1), Matrix(self.theta_init)))
        print('\n===============================================')
        theta = np.array(self.theta_init).astype(float)
        for i in range(self.n_iter):
            print(f'\nIteration : {i+1}')
            h = self.sigmoid(self.X @ theta)
            #print(h)
            display(Eq(MatrixSymbol('h_{\u03B8}(x)', m, 1), Matrix(h)))
            #df = pd.DataFrame({'h_{\u03B8}(x) = ' : h, 'y(actual)'})
            theta = self.gd_update(h,m, theta)
            display(Eq(MatrixSymbol(f'theta_{i+1}', n,1), Matrix(theta)))
            print(f'\nLoss J(\u03B8) = {self.logistic_regression_loss(theta)}')
            print('\n===============================================')
        return theta
    
    def gd_update(self, h, m, theta):
        if not self.regularization:
            return theta - (self.alpha/m) * (self.X.T @ (h - self.y))
        
        elif self.regularization in ['L1', 'Lasso']:
            #print(theta[0])
            theta[0] = theta[0] - (self.alpha/m) * np.sum((h - self.y) * self.X.iloc[:, 0])
            theta[1:] = theta[1:] - (self.alpha/m)* (self.X.iloc[:,1:].T @ (h - self.y)
            + self.lambda_*np.sign(theta[1:]))
            #print((self.alpha/m) * np.sum((h - self.y) * self.X.iloc[:, 0]))
        
        elif self.regularization in ['L2', 'Ridge']:
            theta[0] = theta[0] - (self.alpha/m) * np.sum((h - self.y) * self.X.iloc[:, 0])
            #print((self.alpha/m) * np.sum((h - self.y) * self.X.iloc[:, 0]))

            theta[1:] = theta[1:] - (self.alpha/m)* (self.X.iloc[:,1:].T @ (h - self.y) 
            + self.lambda_*theta[1:])
        return theta
    

    # Sigmoid function
    def sigmoid(self, z):
        return 1/(1 + np.exp(-z))

    # Loss function for logistic regression
    def logistic_regression_loss(self, theta):
        m, n = self.X.shape
        h = self.sigmoid(self.X @ theta)
        if not self.regularization:
            loss = (1/m) * (-self.y.T @ np.log(h) - (1 - self.y).T @ np.log(1 - h))
        
        elif self.regularization in ['L1', 'Lasso']:
            loss = (1/m) * (-self.y.T @ np.log(h) - (1 - self.y).T @ np.log(1 - h)) 
            + (self.lambda_/m) * np.sum(np.abs(theta[1:]))

        
        elif self.regularization in ['L2', 'Ridge']:
            loss = (1/m) * (-self.y.T @ np.log(h) - (1 - self.y).T @ np.log(1 - h)) 
            + (self.lambda_/(2*m)) * np.sum(theta[1:]**2)
        
        return loss
        
    def run_linear_reg(self):        
        if not self.regularization:
            print('Running Gradient descent for Linear Regression')
            print('\nCost Function:')
            display(Eq(Symbol('J(\u03B8)'), Symbol('1/(2n)\u2211_{i=1}^n(h_{\u03B8}(x^{i}) - y^{i})^2')))
            
            print('\nTheta Update:')
            display(Eq(Symbol('\u03B8_j'), Symbol('\u03B8_j-\u03B1/(n)\u2211_{i=1}^n(h_{\u03B8}(x^{i}) - y^{i})x^{i}')))
        
        elif self.regularization in ['L1', 'Lasso']:
            print('Running Gradient descent for Linear Regression with L1 Regularization')        
            print('\nCost Function:')
            display(Eq(Symbol('J(\u03B8)'), 
                       Symbol('1/(2n)\u2211_{i=1}^n(h_{\u03B8}(x^{i}) - y^{i})^2 + \u03BB/(n)\u2211_{j=1}^d|\u03B8_j|')))
            
            print('\nTheta Update:')
            display(Eq(Symbol('\u03B8_0'), Symbol('\u03B8_0-\u03B1/(n)\u2211_{i=1}^n(h_{\u03B8}(x^{i}) - y^{i})')))
            display(Eq(Symbol('\u03B8_j'), 
                       Symbol('\u03B8_j-\u03B1/(n)[\u2211_{i=1}^n(h_{\u03B8}(x^{i}) - y^{i})x^{i} + \u03BBsign(\u03B8_j)]')))
        

        
        elif self.regularization in ['L2', 'Ridge']:
            print('Running Gradient descent for Linear Regression with L2 Regularization')
            print('\nCost Function:')
            display(Eq(Symbol('J(\u03B8)'), 
                       Symbol('1/(2n)\u2211_{i=1}^n(h_{\u03B8}(x^{i}) - y^{i})^2 + \u03BB/(2n)\u2211_{j=1}^d\u03B8_j^2')))
            
            print('\nTheta Update:')
            display(Eq(Symbol('\u03B8_0'), Symbol('\u03B8_0-\u03B1/(n)\u2211_{i=1}^n(h_{\u03B8}(x^{i}) - y^{i})')))
            display(Eq(Symbol('\u03B8_j'), 
                       Symbol('\u03B8_j-\u03B1/(n)[\u2211_{i=1}^n(h_{\u03B8}(x^{i}) - y^{i})x^{i} + \u03BB\u03B8_j]')))
        
        
            
        print('\nWhere:')
        display(Eq(Symbol('\nh_{\u03B8}(x)'), Symbol('\u03B8^{T}x')))
        print('\n===============================================')
            
        m = self.X.shape[0]
        n = self.X.shape[1] if len(self.X.shape) == 2 else 1
        #print(m, self.alpha)
        display(Eq(MatrixSymbol(f'theta_{0}', n,1), Matrix(self.theta_init)))
        print('\n===============================================')
        theta = np.array(self.theta_init).astype(float)
        for i in range(self.n_iter):
            print(f'\nIteration : {i+1}')
            h = self.X @ theta
            display(Eq(MatrixSymbol('h_{\u03B8}(x)', m, 1), Matrix(h)))
            #print(h)
            theta = self.gd_update(h,m, theta)
            
            display(Eq(MatrixSymbol(f'theta_{i+1}', n,1), Matrix(theta)))
            print(f'\nLoss J(\u03B8) = {self.linear_regression_loss(theta)}')
            print('\n===============================================')
        return theta
    
    def linear_regression_loss(self, theta):
        m = self.X.shape[0]
        n = self.X.shape[1] if len(self.X.shape) == 2 else 1
        h = self.X @ theta
        if not self.regularization:
            loss = (1/(2*m)) * np.sum((h - self.y)**2)
        
        elif self.regularization in ['L1', 'Lasso']:
            loss = (1/(2*m)) * np.sum((h - self.y)**2) + (self.lambda_/m) * np.sum(np.abs(theta[1:]))
            
        
        elif self.regularization in ['L2', 'Ridge']:
            loss = (1/(2*m)) * np.sum((h - self.y)**2) + (self.lambda_/(2*m)) * np.sum(theta[1:]**2)
        
        
        
        return loss


## Linear Regression Without Regularization

In [253]:
df = pd.DataFrame({'X1': [35,25,30], 'X2': [80,80,100], 'Y': [1.81,1.22,1.71]})
GradientDescent(model = 'Linear Regression', data = df, alpha = 0.02, theta_init = [5,-0.03, -0.03]).run()

Running Gradient descent for Linear Regression

Cost Function:


Eq(J(θ), 1/(2n)∑_{i=1}^n(h_{θ}(x^{i}) - y^{i})^2)


Theta Update:


Eq(θ_j, θ_j-α/(n)∑_{i=1}^n(h_{θ}(x^{i}) - y^{i})x^{i})


Where:


Eq(
h_{θ}(x), θ^{T}x)




Eq(theta_0, Matrix([
[    5],
[-0.03],
[-0.03]]))



Iteration : 1


Eq(h_{θ}(x), Matrix([
[1.55],
[1.85],
[ 1.1]]))

Eq(theta_1, Matrix([
[            5.0016],
[0.0476666666666667],
[ 0.179333333333333]]))


Loss J(θ) = 209.23321865037045


Iteration : 2


Eq(h_{θ}(x), Matrix([
[         21.0166],
[20.5399333333333],
[24.3649333333333]]))

Eq(theta_2, Matrix([
[ 4.59372355555556],
[-12.1848488888889],
[        -35.47144]]))


Loss J(θ) = 5962734.246445694


Iteration : 3


Eq(h_{θ}(x), Matrix([
[-3259.59118755556],
[-3137.74269866667],
[-3908.09574311111]]))

Eq(theta_3, Matrix([
[73.3281877511111],
[2053.93035994074],
[5984.59312805926]]))


Loss J(θ) = 170032554508.70718


Iteration : 4


Eq(h_{θ}(x), Matrix([
[550728.341030418],
[530189.037431011],
[  660150.5517919]]))

Eq(theta_4, Matrix([
[-11533.7597472711],
[-346843.331477372],
[-1010602.28724597]]))


Loss J(θ) = 4848626323247809.0


Iteration : 5


Eq(h_{θ}(x), Matrix([
[-92999233.3411328],
[-89530800.0263591],
[-111477062.428665]]))

Eq(theta_5, Matrix([
[1948513.57716044],
[58570191.2392465],
[170656793.217193]]))


Loss J(θ) = 1.3826280085267869e+20



## Linear Regression With L1 Regularization

In [254]:
df = pd.DataFrame({'X1': [35,25,30], 'X2': [100,80,60], 'Y': [1.81,1.22,1.71]})
GradientDescent(model = 'Linear Regression', data = df, alpha = 0.02,  lambda_ = 2, regularization = 'L1',
                theta_init = [5,-0.03, -0.03]).run()

Running Gradient descent for Linear Regression with L1 Regularization

Cost Function:


Eq(J(θ), 1/(2n)∑_{i=1}^n(h_{θ}(x^{i}) - y^{i})^2 + λ/(n)∑_{j=1}^d|θ_j|)


Theta Update:


Eq(θ_0, θ_0-α/(n)∑_{i=1}^n(h_{θ}(x^{i}) - y^{i}))

Eq(θ_j, θ_j-α/(n)[∑_{i=1}^n(h_{θ}(x^{i}) - y^{i})x^{i} + λsign(θ_j)])


Where:


Eq(
h_{θ}(x), θ^{T}x)




Eq(theta_0, Matrix([
[    5],
[-0.03],
[-0.03]]))



Iteration : 1


Eq(h_{θ}(x), Matrix([
[0.95],
[1.85],
[ 2.3]]))

Eq(theta_1, Matrix([
[             4.9976],
[             -0.039],
[-0.0153333333333334]]))


Loss J(θ) = 0.7031366948148087


Iteration : 2


Eq(h_{θ}(x), Matrix([
[2.09926666666666],
[2.79593333333333],
[          2.9076]]))

Eq(theta_2, Matrix([
[  4.97718133333333],
[-0.595337777777774],
[ -1.51438222222221]]))


Loss J(θ) = 9537.335772164506


Iteration : 3


Eq(h_{θ}(x), Matrix([
[ -167.29786311111],
[-131.056840888888],
[-103.745885333333]]))

Eq(theta_3, Matrix([
[7.68945192888887],
[82.0138141629624],
[223.967529125924]]))


Loss J(θ) = 215108725.00458696


Iteration : 4


Eq(h_{θ}(x), Matrix([
[ 25274.925860225],
[19975.4371360769],
[15906.1556243732]]))

Eq(theta_4, Matrix([
[-399.989072208945],
[ -12324.985200777],
[ -33639.483766681]]))


Loss J(θ) = 4851777056120.387


Iteration : 5


Eq(h_{θ}(x), Matrix([
[-3795722.84776751],
[-2999683.32042612],
[-2388518.57109638]]))

Eq(theta_5, Matrix([
[60826.2074563911],
[ 1850995.5945686],
[5052083.50207747]]))


Loss J(θ) = 1.0943182488777405e+17



## Linear Regression With L2 Regularization

In [255]:
df = pd.DataFrame({'X1': [35,25,30], 'X2': [100,80,60], 'Y': [1.81,1.22,1.71]})
GradientDescent(model = 'Linear Regression', data = df, alpha = 0.02,  lambda_ = 2, regularization = 'L2',
                theta_init = [5,-0.03, -0.03]).run()

Running Gradient descent for Linear Regression with L2 Regularization

Cost Function:


Eq(J(θ), 1/(2n)∑_{i=1}^n(h_{θ}(x^{i}) - y^{i})^2 + λ/(2n)∑_{j=1}^dθ_j^2)


Theta Update:


Eq(θ_0, θ_0-α/(n)∑_{i=1}^n(h_{θ}(x^{i}) - y^{i}))

Eq(θ_j, θ_j-α/(n)[∑_{i=1}^n(h_{θ}(x^{i}) - y^{i})x^{i} + λθ_j])


Where:


Eq(
h_{θ}(x), θ^{T}x)




Eq(theta_0, Matrix([
[    5],
[-0.03],
[-0.03]]))



Iteration : 1


Eq(h_{θ}(x), Matrix([
[0.95],
[1.85],
[ 2.3]]))

Eq(theta_1, Matrix([
[             4.9976],
[-0.0519333333333334],
[-0.0282666666666667]]))


Loss J(θ) = 0.36294800888889234


Iteration : 2


Eq(h_{θ}(x), Matrix([
[0.353266666666658],
[ 1.43793333333333],
[ 1.74359999999999]]))

Eq(theta_2, Matrix([
[ 5.00563466666667],
[0.245621333333337],
[0.813594666666678]]))


Loss J(θ) = 2973.939619005096


Iteration : 3


Eq(h_{θ}(x), Matrix([
[94.9618480000013],
[76.2337413333343],
[61.1899546666674]]))

Eq(theta_3, Matrix([
[ 3.48799770666665],
[-45.8913659733339],
[-125.097795840002]]))


Loss J(θ) = 67083991.48113447


Iteration : 4


Eq(h_{θ}(x), Matrix([
[-14112.4893953602],
[-11151.6198188268],
[-8879.12073189345]]))

Eq(theta_4, Matrix([
[231.141130680536],
[6883.02982734018],
[18786.6166344763]]))


Loss J(θ) = 1513347972520.222


Iteration : 5


Eq(h_{θ}(x), Matrix([
[2119798.84853522],
[1675236.21757229],
[1333919.03401947]]))

Eq(theta_5, Matrix([
[-33961.8546034993],
[-1033817.35062785],
[-2821687.49225714]]))


Loss J(θ) = 3.4139621735077836e+16



## Logistic Regression Without Regularization

In [247]:
df = pd.DataFrame({'X1': [0,0,1,1], 'X2': [0,1,0,1], 'Y': [0,0,0,1]})
GradientDescent(model = 'Logistic Regression', data = df, alpha = 1, theta_init = [0,0,0], n_iter = 7,
               ).run()

Running Gradient descent for Logistic Regression

Cost Function:


Eq(J(θ), -1/(n)∑_{i=1}^n[y^{i}log(h_{θ}(x^{i})) + (1-y^{i})log(1-h_{θ}(x^{i}))])


Theta Update:


Eq(θ_j, θ_j-α/(n)∑_{i=1}^n(h_{θ}(x^{i}) - y^{i})x^{i})


Where:


Eq(
h_{θ}(x), 1/1+ e^{-θ^{T}x})




Eq(theta_0, Matrix([
[0],
[0],
[0]]))



Iteration : 1


Eq(h_{θ}(x), Matrix([
[0.5],
[0.5],
[0.5],
[0.5]]))

Eq(theta_1, Matrix([
[-0.25],
[    0],
[    0]]))


Loss J(θ) = 0.6384394198788437


Iteration : 2


Eq(h_{θ}(x), Matrix([
[0.437823499114202],
[0.437823499114202],
[0.437823499114202],
[0.437823499114202]]))

Eq(theta_2, Matrix([
[-0.437823499114202],
[ 0.031088250442899],
[ 0.031088250442899]]))


Loss J(θ) = 0.6042874826272245


Iteration : 3


Eq(h_{θ}(x), Matrix([
[0.392259707869409],
[ 0.39969520501907],
[ 0.39969520501907],
[0.407177215696006]]))

Eq(theta_3, Matrix([
[-0.58753033251509],
[ 0.07937014526413],
[ 0.07937014526413]]))


Loss J(θ) = 0.578570796042201


Iteration : 4


Eq(h_{θ}(x), Matrix([
[0.357201711121256],
[0.375624919523644],
[0.375624919523644],
[ 0.39441529535122]]))

Eq(theta_4, Matrix([
[-0.713247043895031],
[ 0.136860091545414],
[ 0.136860091545414]]))


Loss J(θ) = 0.5568786632543374


Iteration : 5


Eq(h_{θ}(x), Matrix([
[0.328881759577646],
[0.359764378924268],
[0.359764378924268],
[0.391853714625561]]))

Eq(theta_5, Matrix([
[-0.823313101907967],
[ 0.198955568157957],
[ 0.198955568157957]]))


Loss J(θ) = 0.537520220631412


Iteration : 6


Eq(h_{θ}(x), Matrix([
[0.305060833244023],
[0.348791048273636],
[0.348791048273636],
[0.395224832554881]]))

Eq(theta_6, Matrix([
[-0.922780042494512],
[ 0.262951597950827],
[ 0.262951597950827]]))


Loss J(θ) = 0.51978984545148


Iteration : 7


Eq(h_{θ}(x), Matrix([
[0.284391780280939],
[ 0.34077815015503],
[ 0.34077815015503],
[0.402062941792137]]))

Eq(theta_7, Matrix([
[ -1.0147827980903],
[0.327241324964035],
[0.327241324964035]]))


Loss J(θ) = 0.50335475399999



## Logistic Regression With L1 Regularization

In [248]:
df = pd.DataFrame({'X1': [0,0,1,1], 'X2': [0,1,0,1], 'Y': [0,0,0,1]})
GradientDescent(model = 'Logistic Regression', data = df, alpha = 1,  lambda_ = 1, regularization = 'L1', n_iter = 7,
                theta_init = [0,0,0]).run()

Running Gradient descent for Logistic Regression with L1 Regularization

Cost Function:


Eq(J(θ), -1/(n)∑_{i=1}^n[y^{i}log(h_{θ}(x^{i})) + (1-y^{i})log(1-h_{θ}(x^{i}))] + λ/(n)∑_{j=1}^d|θ_j|)


Theta Update:


Eq(θ_0, θ_0-α/(n)∑_{i=1}^n(h_{θ}(x^{i}) - y^{i}))

Eq(θ_j, θ_j-α/(n)[1/2∑_{i=1}^n(h_{θ}(x^{i}) - y^{i})x^{i} + λsign(θ_j)])


Where:


Eq(
h_{θ}(x), 1/1+ e^{-θ^{T}x})




Eq(theta_0, Matrix([
[0],
[0],
[0]]))



Iteration : 1


Eq(h_{θ}(x), Matrix([
[0.5],
[0.5],
[0.5],
[0.5]]))

Eq(theta_1, Matrix([
[-0.25],
[    0],
[    0]]))


Loss J(θ) = 0.6384394198788437


Iteration : 2


Eq(h_{θ}(x), Matrix([
[0.437823499114202],
[0.437823499114202],
[0.437823499114202],
[0.437823499114202]]))

Eq(theta_2, Matrix([
[-0.437823499114202],
[ 0.031088250442899],
[ 0.031088250442899]]))


Loss J(θ) = 0.6042874826272245


Iteration : 3


Eq(h_{θ}(x), Matrix([
[0.392259707869409],
[ 0.39969520501907],
[ 0.39969520501907],
[0.407177215696006]]))

Eq(theta_3, Matrix([
[-0.58753033251509],
[-0.17062985473587],
[-0.17062985473587]]))


Loss J(θ) = 0.6180379628686322


Iteration : 4


Eq(h_{θ}(x), Matrix([
[0.357201711121256],
[0.319045842680267],
[0.319045842680267],
[0.283170253217801]]))

Eq(theta_4, Matrix([
[-0.657146244939988],
[ 0.178816121289613],
[ 0.178816121289613]]))


Loss J(θ) = 0.5590782822005457


Iteration : 5


Eq(h_{θ}(x), Matrix([
[0.341380959088345],
[0.382646519669874],
[0.382646519669874],
[0.425676293646806]]))

Eq(theta_5, Matrix([
[ -0.790233817958713],
[-0.0232645820395571],
[-0.0232645820395571]]))


Loss J(θ) = 0.5761548121121137


Iteration : 6


Eq(h_{θ}(x), Matrix([
[0.312118466372464],
[0.307145509311833],
[0.307145509311833],
[0.302216975061922]]))

Eq(theta_6, Matrix([
[-0.847390432973226],
[ 0.324394796867004],
[ 0.324394796867004]]))


Loss J(θ) = 0.5212328513821869


Iteration : 7


Eq(h_{θ}(x), Matrix([
[0.299980560116869],
[0.372152021001108],
[0.372152021001108],
[0.450512342255997]]))

Eq(theta_7, Matrix([
[-0.971089669066996],
[ 0.118728706052728],
[ 0.118728706052728]]))


Loss J(θ) = 0.5393051091599013



## Logistic Regression With L2 Regularization

In [250]:
df = pd.DataFrame({'X1': [0,0,1,1], 'X2': [0,1,0,1], 'Y': [0,0,0,1]})
GradientDescent(model = 'Logistic Regression', data = df, alpha = 1,  lambda_ = 1, regularization = 'L2',
                theta_init = [0,0,0]).run()

Running Gradient descent for Logistic Regression with L2 Regularization

Cost Function:


Eq(J(θ), -1/(n)∑_{i=1}^n[y^{i}log(h_{θ}(x^{i})) + (1-y^{i})log(1-h_{θ}(x^{i}))] + λ/(2n)∑_{j=1}^dθ_j^2)


Theta Update:


Eq(θ_0, θ_0-α/(n)∑_{i=1}^n(h_{θ}(x^{i}) - y^{i}))

Eq(θ_j, θ_j-α/(n)[∑_{i=1}^n(h_{θ}(x^{i}) - y^{i})x^{i} + λθ_j])


Where:


Eq(
h_{θ}(x), 1/1+ e^{-θ^{T}x})




Eq(theta_0, Matrix([
[0],
[0],
[0]]))



Iteration : 1


Eq(h_{θ}(x), Matrix([
[0.5],
[0.5],
[0.5],
[0.5]]))

Eq(theta_1, Matrix([
[-0.25],
[    0],
[    0]]))


Loss J(θ) = 0.6384394198788437


Iteration : 2


Eq(h_{θ}(x), Matrix([
[0.437823499114202],
[0.437823499114202],
[0.437823499114202],
[0.437823499114202]]))

Eq(theta_2, Matrix([
[-0.437823499114202],
[ 0.031088250442899],
[ 0.031088250442899]]))


Loss J(θ) = 0.6042874826272245


Iteration : 3


Eq(h_{θ}(x), Matrix([
[0.392259707869409],
[ 0.39969520501907],
[ 0.39969520501907],
[0.407177215696006]]))

Eq(theta_3, Matrix([
[ -0.58753033251509],
[0.0715980826534052],
[0.0715980826534052]]))


Loss J(θ) = 0.5794751723455329


Iteration : 4


Eq(h_{θ}(x), Matrix([
[0.357201711121256],
[ 0.37380390061215],
[ 0.37380390061215],
[0.390708709933118]]))

Eq(theta_4, Matrix([
[-0.711409888084759],
[ 0.112570409353737],
[ 0.112570409353737]]))


Loss J(θ) = 0.5601913486026288


Iteration : 5


Eq(h_{θ}(x), Matrix([
[0.329287381341743],
[0.354609247609945],
[0.354609247609945],
[0.380772872571612]]))

Eq(theta_5, Matrix([
[-0.81622957536807],
[0.150582276969913],
[0.150582276969913]]))


Loss J(θ) = 0.5447525535380514



In [251]:
df = pd.DataFrame({'X1': [-4,0,0,-4,0,0], 'X2': [0,2,-2,0,1,-1], 'Y': [0,0,0,0,1,1]})
GradientDescent(model = 'Logistic Regression', data = df, alpha = 0.5, theta_init = [1,1,1], n_iter = 3,
               ).run()

Running Gradient descent for Logistic Regression

Cost Function:


Eq(J(θ), -1/(n)∑_{i=1}^n[y^{i}log(h_{θ}(x^{i})) + (1-y^{i})log(1-h_{θ}(x^{i}))])


Theta Update:


Eq(θ_j, θ_j-α/(n)∑_{i=1}^n(h_{θ}(x^{i}) - y^{i})x^{i})


Where:


Eq(
h_{θ}(x), 1/1+ e^{-θ^{T}x})




Eq(theta_0, Matrix([
[1],
[1],
[1]]))



Iteration : 1


Eq(h_{θ}(x), Matrix([
[0.0474258731775668],
[ 0.952574126822433],
[ 0.268941421369995],
[0.0474258731775668],
[ 0.880797077977882],
[               0.5]]))

Eq(theta_1, Matrix([
[0.941902968956213],
[ 1.03161724878504],
[0.854328125926437]]))


Loss J(θ) = 0.6642119957731525


Iteration : 2


Eq(h_{θ}(x), Matrix([
[0.0397506794282286],
[ 0.934045449417086],
[ 0.317181854480769],
[0.0397506794282286],
[ 0.857689529190199],
[ 0.521879728930573]]))

Eq(theta_2, Matrix([
[0.882711475549956],
[  1.0581177017372],
[0.723533376748748]]))


Loss J(θ) = 0.6235782528900514


Iteration : 3


Eq(h_{θ}(x), Matrix([
[0.0339030460451886],
[ 0.911313414569012],
[ 0.362540332556025],
[0.0339030460451886],
[ 0.832889380613594],
[ 0.539710712020612]]))

Eq(theta_3, Matrix([
[0.823189814562488],
[ 1.08071973243399],
[0.607639640697169]]))


Loss J(θ) = 0.5903612091303095

