In [1]:
import pandas as pd
from sklearn.datasets import load_boston
import random
import numpy as np
import math

In [2]:
boston_data = load_boston()

In [3]:
data = pd.DataFrame(boston_data.data, columns = boston_data.feature_names)
data["Price"] = boston_data.target

In [4]:
data.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,Price
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33,36.2


# Regression

In [29]:
y = data["Price"]
intercept = pd.DataFrame([1] * data.shape[0], columns = ["intercept"])
X = pd.DataFrame(data["CRIM"])
X = pd.concat([intercept, X], axis = 1)


Minimize Objective function (Cost) = $$ \sum\limits_{i=1}^n (\theta {X_i}^T - y_i)^2$$

In [48]:
def regression_cost(thetas_temp):
    
    # the cost (or objective function) is the root mean squared error of the prediction
    y_preds = np.dot(thetas_temp, X.T)
    error = sum((y_preds - y)**2)
    return error

Stochastic gradient descent is performed on the derivative of the cost function w.r.t each coefficient theta "j":
$$ \sum\limits_{i=1}^n (\theta {X_i}^T - y_i){X_{i,j}}^T$$

In [None]:
thetas_init = np.array([100,100])

# if alpha is set too high, the cost may end up increasing in  gradient descent
alpha = 0.02
precision = 0.001
thetas_old = thetas_init
thetas_new = thetas_old
n = X.shape[0]

error_old = float("inf")
error_new = regression_cost(thetas_new)

In [199]:
def update_theta(thetas_old_temp):
    
   # for each theta, substract it by the value of the derivative at the given theta, weighted by 
# the learning rate alpha and size of sample
    y_preds = np.dot(thetas_old_temp, X.T)
    thetas_new_temp = [0]*2
    for i in range(len(thetas_old_temp)):
        thetas_new_temp[i] = thetas_old_temp[i] - alpha * (np.dot((y_preds - y), X.iloc[:,i].T))/n
    return thetas_new_temp

In [200]:
while abs(error_old - error_new) > precision:
    print(error_new)
    thetas_new = update_theta(thetas_new)
    error_old = error_new
    error_new = regression_cost(thetas_new)


474214179.5577082
261313958.54726046
144385694.0012819
80153872.07003069
44857135.8193305
25448783.63854243
14765219.643814908
8873090.739498975
5612676.951358219
3798109.5262179743
2778233.9933118573
2195492.851736954
1853535.057570188
1644524.2251465197
1509225.6513411512
1415091.2361008488
1344226.5236126094
1286783.72850437
1237334.0130139505
1192875.5848159075
1151739.9770941501
1112991.495103441
1076097.4468582482
1040747.0542562777
1006752.0010361274
973991.8058591872
942383.808255779
911866.6694499605
882391.2944602738
853915.8296351345
826402.8984978666
799818.0671692386
774128.985482355
749304.8996486596
725316.3694615004
702135.0983172234
679733.8256757056
658086.2542826282
637166.9969403066
616951.5344585063
597416.1801752123
578538.0485016605
560295.0260798153
542665.744763551
525629.5559773855
509166.506195117
493257.3133846866
477883.3443230505
463026.592716829
448669.6580824048
434795.7253493916
421388.5451573372
408432.4148191664
395912.1599271824
383813.1165790564
372

36275.79259319401
36275.783165348854
36275.7740546791
36275.76525051423
36275.75674254268
36275.74852079985
36275.74057565622
36275.73289780629
36275.72547825782
36275.718308320844
36275.71137959778
36275.70468397369
36275.69821360647
36275.69196091806
36275.68591858515
36275.68007953084
36275.674436916364
36275.66898413306
36275.66371479455
36275.65862272925
36275.65370197333
36275.6489467635
36275.644351530376
36275.63991089199
36275.63561964729
36275.63147277044
36275.62746540454
36275.62359285607
36275.61985058945
36275.61623422176
36275.61273951735
36275.60936238319
36275.60609886398
36275.6029451374
36275.59989750979
36275.596952411826
36275.59410639393
36275.591356122924
36275.58869837774
36275.58613004543
36275.583648118045
36275.58124968866
36275.57893194823
36275.57669218217
36275.574527767305
36275.572436168455
36275.5704149361
36275.56846170286
36275.566574181124
36275.56475016008
36275.56298750357
36275.561284147065
36275.55963809555
36275.5580474212
36275.556510260976
362

In [201]:
thetas_new

[24.0412187699838, -0.4155309539552428]

Verify gradient descent result:

In [147]:
import statsmodels.api as sm

In [148]:
X2 = X["CRIM"]
X2 = sm.add_constant(X2)
model = sm.OLS(y,X2)
results = model.fit()

In [149]:
results.summary()

0,1,2,3
Dep. Variable:,Price,R-squared:,0.151
Model:,OLS,Adj. R-squared:,0.149
Method:,Least Squares,F-statistic:,89.49
Date:,"Sun, 17 Jan 2021",Prob (F-statistic):,1.17e-19
Time:,23:02:41,Log-Likelihood:,-1798.9
No. Observations:,506,AIC:,3602.0
Df Residuals:,504,BIC:,3610.0
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,24.0331,0.409,58.740,0.000,23.229,24.837
CRIM,-0.4152,0.044,-9.460,0.000,-0.501,-0.329

0,1,2,3
Omnibus:,139.832,Durbin-Watson:,0.713
Prob(Omnibus):,0.0,Jarque-Bera (JB):,295.404
Skew:,1.49,Prob(JB):,7.1400000000000005e-65
Kurtosis:,5.264,Cond. No.,10.1
