In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split

from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score

import warnings
warnings.filterwarnings('ignore')

# Gradient descent

A gradient is basically the slope of a function; the degree of change of a parameter with the amount of change in another parameter. Gradient Descent can be described as an looping construct which is used to find the values of the parameters of a function that minimizes the cost function. The parameters are initially defined a particular value and from that, Gradient Descent is run in a loop to find the optimal values of the parameters, to find the minimum possible value of the given cost function.

### Types of Gradient Descent:

    1. Batch Gradient Descent
    2. Stochastic Gradient Descent

## Batch Gradient Descent
Parameters are updated after computing the gradient of error with respect to the entire training set

In [2]:
def gradient_descent(x,y):
    n = len(x)
    learning_rate = 0.01 #0.1
    intercept = slope = 0
    iterations = 15000 #1000
    for i in range(iterations):
        y_pred = x*slope + intercept
        mse = mean_squared_error(y, y_pred)
        r2 = r2_score(y, y_pred)
        
        md = (2/n)*sum(x*(y-y_pred)) #slope
        bd = (2/n)*sum(y-y_pred) #intercept

        slope = slope + (learning_rate * md)
        intercept = intercept + (learning_rate * bd)
        
        print("slope: ",slope,"intercept: ",intercept,"mse: "
              ,mse ,"r2", r2, "iteration: ",i)

In [3]:
#testing gradient descent function
x = np.array([10,12,13,14,20,21])
y1 = np.array([12,12,13,15,16,26])
gradient_descent(x,y1)


slope:  5.03 intercept:  0.31333333333333335 mse:  269.0 r2 -10.419811320754718 iteration:  0
slope:  -14.345666666666666 intercept:  -0.8886000000000002 mse:  3890.8565666666673 r2 -164.1778731132076 iteration:  1
slope:  60.28830222222222 intercept:  3.7462053333333323 mse:  57630.48295214518 r2 -2445.577106458994 iteration:  2
slope:  -227.1990201185185 intercept:  -14.101876106666666 mse:  854997.0590517033 r2 -36296.044959742125 iteration:  3
slope:  880.1901399529875 intercept:  54.65320078435553 mse:  12685995.864658995 r2 -538555.4282166555 iteration:  4
slope:  -3385.428163388425 intercept:  -210.18357188389444 mse:  188229512.38211393 r2 -7990874.525655781 iteration:  5
slope:  13045.559697887466 intercept:  809.9618819036443 mse:  2792872340.717798 r2 -118565334.21915182 iteration:  6
slope:  -50245.937406473044 intercept:  -3119.591931767335 mse:  41439496253.611626 r2 -1759223896.5589845 iteration:  7
slope:  193550.33430434347 intercept:  12016.894462143257 mse:  61486227

slope:  -2.5850096241640314e+57 intercept:  -1.6049449844617582e+56 mse:  1.0967786244583356e+116 r2 -4.656135669870293e+114 iteration:  97
slope:  9.957351908829306e+57 intercept:  6.182182787719572e+56 mse:  1.6273552194572883e+117 r2 -6.908583478828111e+115 iteration:  98
slope:  -3.835531446747725e+58 intercept:  -2.381351659452274e+57 mse:  2.414603048635074e+118 r2 -1.0250673319677202e+117 iteration:  99
slope:  1.4774311095649848e+59 intercept:  9.172869713979948e+57 mse:  3.582689146640127e+119 r2 -1.520952939611375e+118 iteration:  100
slope:  -5.6910045291410494e+59 intercept:  -3.533352096724919e+58 mse:  5.315847475927237e+120 r2 -2.2567277020445822e+119 iteration:  101
slope:  2.192151792460911e+60 intercept:  1.3610328532632727e+59 mse:  7.887436847213714e+121 r2 -3.3484401709869546e+120 iteration:  102
slope:  -8.444079523364723e+60 intercept:  -5.242643181184724e+59 mse:  1.1703055872183973e+123 r2 -4.968278436304518e+121 iteration:  103
slope:  3.252625080166698e+61 in

slope:  -1.168029517769932e+96 intercept:  -7.251899949325624e+94 mse:  2.239248465940484e+193 r2 -9.50624348748319e+191 iteration:  163
slope:  4.499202184632717e+96 intercept:  2.793402358275885e+95 mse:  3.322505196077884e+194 r2 -1.4104974889009885e+193 iteration:  164
slope:  -1.7330743778507026e+97 intercept:  -1.0760072242787785e+96 mse:  4.929797182345347e+195 r2 -2.0928384264673648e+194 iteration:  165
slope:  6.67573199848939e+97 intercept:  4.1447360537589056e+96 mse:  7.314631226987681e+196 r2 -3.105267973721186e+195 iteration:  166
slope:  -2.5714648075822094e+98 intercept:  -1.5965354662784443e+97 mse:  1.0853150344284327e+198 r2 -4.607469485781083e+196 iteration:  167
slope:  9.905177826386825e+98 intercept:  6.149789665793753e+97 mse:  1.610346013904897e+199 r2 -6.836374587332111e+197 iteration:  168
slope:  -3.815434202445665e+99 intercept:  -2.3688739606682594e+98 mse:  2.3893654858149786e+200 r2 -1.014353272279944e+199 iteration:  169
slope:  1.46968973281951e+100 in

slope:  -1.0151496242189268e+164 intercept:  -6.302720433372829e+162 mse:  inf r2 -inf iteration:  279
slope:  3.9103150541393375e+164 intercept:  2.427782270186243e+163 mse:  inf r2 -inf iteration:  280
slope:  -1.5062374508973047e+165 intercept:  -9.351718537635494e+163 mse:  inf r2 -inf iteration:  281
slope:  5.801965384052573e+165 intercept:  3.6022439360036353e+164 mse:  inf r2 -inf iteration:  282
slope:  -2.234893462361497e+166 intercept:  -1.3875697094874157e+165 mse:  inf r2 -inf iteration:  283
slope:  8.608718697003694e+166 intercept:  5.344862071786823e+165 mse:  inf r2 -inf iteration:  284
slope:  -3.3160434200667766e+167 intercept:  -2.058819126066e+166 mse:  inf r2 -inf iteration:  285
slope:  1.2773264350704622e+168 intercept:  7.930487516655649e+166 mse:  inf r2 -inf iteration:  286
slope:  -4.920209463653405e+168 intercept:  -3.054791528579133e+167 mse:  inf r2 -inf iteration:  287
slope:  1.8952446689862096e+169 intercept:  1.1766932692952664e+168 mse:  inf r2 -inf 

slope:  -1.309090531532314e+233 intercept:  -8.127700040840717e+231 mse:  inf r2 -inf iteration:  397
slope:  5.042563470996392e+233 intercept:  3.130756990594551e+232 mse:  inf r2 -inf iteration:  398
slope:  -1.942374934853734e+234 intercept:  -1.2059548562206516e+233 mse:  inf r2 -inf iteration:  399
slope:  7.481949229292598e+234 intercept:  4.645289045464963e+233 mse:  inf r2 -inf iteration:  400
slope:  -2.882016405031891e+235 intercept:  -1.7893464423322128e+234 mse:  inf r2 -inf iteration:  401
slope:  1.1101409945892214e+236 intercept:  6.892489701610105e+234 mse:  inf r2 -inf iteration:  402
slope:  -4.276217948363513e+236 intercept:  -2.654958993009873e+235 mse:  inf r2 -inf iteration:  403
slope:  1.6471817571850425e+237 intercept:  1.0226794031940865e+236 mse:  inf r2 -inf iteration:  404
slope:  -6.344877117971818e+237 intercept:  -3.939319456424922e+236 mse:  inf r2 -inf iteration:  405
slope:  2.4440208535918054e+238 intercept:  1.5174098286619029e+237 mse:  inf r2 -inf

slope:  2.9536853571341924e+300 intercept:  1.8338432690144028e+299 mse:  inf r2 -inf iteration:  512
slope:  -1.1377475833751504e+301 intercept:  -7.063889667768461e+299 mse:  inf r2 -inf iteration:  513
slope:  4.382557405274715e+301 intercept:  2.720981562684142e+300 mse:  inf r2 -inf iteration:  514
slope:  -1.6881432833766927e+302 intercept:  -1.0481110284393683e+301 mse:  inf r2 -inf iteration:  515
slope:  6.5026592504638355e+302 intercept:  4.0372810422594967e+301 mse:  inf r2 -inf iteration:  516
slope:  -2.5047978891379156e+303 intercept:  -1.5551442329977197e+302 mse:  inf r2 -inf iteration:  517
slope:  9.648379568685272e+303 intercept:  5.990352319075982e+302 mse:  inf r2 -inf iteration:  518
slope:  -3.716516558286582e+304 intercept:  -2.3074593433361356e+303 mse:  inf r2 -inf iteration:  519
slope:  1.4315870587065313e+305 intercept:  8.88823951839033e+303 mse:  inf r2 -inf iteration:  520
slope:  -inf intercept:  -3.423713703317342e+304 mse:  inf r2 -inf iteration:  521

ValueError: Input contains NaN, infinity or a value too large for dtype('float64').

In [4]:
#Batch Gradient Descent for the advertising
df = pd.read_csv("advertising.csv")
gradient_descent(df["TV"], df["sales"])

slope:  48.210834 intercept:  0.28045000000000003 mse:  223.71625 r2 -7.259557207100876 iteration:  0
slope:  -27823.867830846222 intercept:  -141.22554016890007 mse:  67072797.712930396 r2 -2476312.677483211 iteration:  1
slope:  16085770.93374725 intercept:  81687.70113097852 mse:  22417771491462.234 r2 -827659439532.1025 iteration:  2
slope:  -9299614799.306402 intercept:  -47225785.22295222 mse:  7.492704456171994e+18 r2 -2.76629083008732e+17 iteration:  3
slope:  5376356297617.321 intercept:  27302490913.302197 mse:  2.504290851968839e+24 r2 -9.24578950123472e+22 iteration:  4
slope:  -3108215518865136.0 intercept:  -15784300976752.58 mse:  8.370105491200564e+29 r2 -3.0902254589928192e+28 iteration:  5
slope:  1.7969426088809902e+18 intercept:  9125326993697314.0 mse:  2.797545096598775e+35 r2 -1.03284780452033e+34 iteration:  6
slope:  -1.0388606324155365e+21 intercept:  -5.275595850873839e+18 mse:  9.350250813124799e+40 r2 -3.452093063948003e+39 iteration:  7
slope:  6.005931454

slope:  2.830563994319214e+178 intercept:  1.4374316629307278e+176 mse:  inf r2 -inf iteration:  64
slope:  -1.636424828872125e+181 intercept:  -8.310177292396945e+178 mse:  inf r2 -inf iteration:  65
slope:  9.460610061894152e+183 intercept:  4.804335984242039e+181 mse:  inf r2 -inf iteration:  66
slope:  -5.469431969258326e+186 intercept:  -2.777515260787584e+184 mse:  inf r2 -inf iteration:  67
slope:  3.1620250565908685e+189 intercept:  1.605755935723763e+187 mse:  inf r2 -inf iteration:  68
slope:  -1.8280513433032613e+192 intercept:  -9.283304979505168e+189 mse:  inf r2 -inf iteration:  69
slope:  1.0568454246709174e+195 intercept:  5.36692715407348e+192 mse:  inf r2 -inf iteration:  70
slope:  -6.109906353229611e+197 intercept:  -3.102764278532476e+195 mse:  inf r2 -inf iteration:  71
slope:  3.532300445626639e+200 intercept:  1.7937911008965688e+198 mse:  inf r2 -inf iteration:  72
slope:  -2.0421174592273277e+203 intercept:  -1.0370386612732318e+201 mse:  inf r2 -inf iteration

ValueError: Input contains NaN, infinity or a value too large for dtype('float64').

# Stocastic Gradient Descent

The main problem with Batch Gradient Descent is the fact that it uses the whole training set to compute the gradients at every step, which makes it very slow when the training set is large. At the opposite extreme, Stochastic Gradient Descent just picks a random instance in the training set at every step and computes the gradients based only on that single instance. Obviously this makes the algorithm much faster since it has very little data to manipulate at every iteration.

In [5]:
from sklearn.linear_model import SGDRegressor

sgd_reg = SGDRegressor()
x_train, x_test, y_train, y_test = train_test_split(df[["TV"]], df["sales"], 
                                                    random_state=1, 
                                                    test_size=0.3)
sgd_reg.fit(x_train, y_train)

SGDRegressor(alpha=0.0001, average=False, early_stopping=False, epsilon=0.1,
             eta0=0.01, fit_intercept=True, l1_ratio=0.15,
             learning_rate='invscaling', loss='squared_loss', max_iter=1000,
             n_iter_no_change=5, penalty='l2', power_t=0.25, random_state=None,
             shuffle=True, tol=0.001, validation_fraction=0.1, verbose=0,
             warm_start=False)

In [6]:
sgd_reg.intercept_, sgd_reg.coef_

(array([2.08618293e+10]), array([2.15946878e+11]))

In [7]:
y_pred = sgd_reg.predict(x_test)
print("MSE:", mean_squared_error(y_test, y_pred))
print("R2:",r2_score( y_test, y_pred))

MSE: 1.4702371851855662e+27
R2: -5.910098000759567e+25


In [8]:
# Multiple Linear Regression using Stochastic Gradient Descent
sgd_reg = SGDRegressor()
x_train, x_test, y_train, y_test = train_test_split(df[["TV", "radio"]], 
                                                    df["sales"], 
                                                    random_state=1
                                                    , test_size=0.3)
sgd_reg.fit(x_train, y_train)
sgd_reg.intercept_, sgd_reg.coef_

(array([1.92305195e+10]), array([3.88005569e+10, 4.56456873e+10]))

In [9]:
y_pred = sgd_reg.predict(x_test)
print("MSE:", mean_squared_error(y_test, y_pred))
print("R2:",r2_score( y_test, y_pred))

MSE: 6.267368515245572e+25
R2: -2.519373234822734e+24
