# gradient descent on boston dataset

In [97]:
import numpy as np
import pandas as pd
from sklearn import preprocessing, datasets
from sklearn import cross_validation as cv

In [98]:
def gd_cost(X, Y, coef):
    cost = 0
    N = len(Y)
    cost = np.sum((Y - (X.dot(coef))) **2)
    cost = cost/N
    return cost


def gd_step(X, Y, l_rate, coef):
    Y = np.reshape(Y, (len(Y), 1))
    N = len(Y)
    hox = X.dot(coef)
    loss = hox - Y
    c = X.T
    grad = c.dot(loss)
    grad = grad*(2/N)
    coef = coef - l_rate*(grad)
    return coef

def gd_runner(X, Y, learning_rate=0.02, num_iter=1):
    arb, count = X.shape
    coef = np.random.rand(count,1)
    print("cost before gd = " + str(gd_cost(X, Y, coef)))
    for i in range(num_iter):
        coef = gd_step(X, Y, learning_rate, coef)
    print("cost = " + str(gd_cost(X, Y, coef)))
    return coef

def predict(X_test, coef):
    Y_pred = X_test.dot(coef)
    print(Y_pred.shape)
    return Y_pred    



In [99]:
boston = datasets.load_boston()
df = pd.DataFrame(boston.data)
print(df.head())
df.describe()

        0     1     2    3      4      5     6       7    8      9     10  \
0  0.00632  18.0  2.31  0.0  0.538  6.575  65.2  4.0900  1.0  296.0  15.3   
1  0.02731   0.0  7.07  0.0  0.469  6.421  78.9  4.9671  2.0  242.0  17.8   
2  0.02729   0.0  7.07  0.0  0.469  7.185  61.1  4.9671  2.0  242.0  17.8   
3  0.03237   0.0  2.18  0.0  0.458  6.998  45.8  6.0622  3.0  222.0  18.7   
4  0.06905   0.0  2.18  0.0  0.458  7.147  54.2  6.0622  3.0  222.0  18.7   

       11    12  
0  396.90  4.98  
1  396.90  9.14  
2  392.83  4.03  
3  394.63  2.94  
4  396.90  5.33  


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12
count,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0
mean,3.593761,11.363636,11.136779,0.06917,0.554695,6.284634,68.574901,3.795043,9.549407,408.237154,18.455534,356.674032,12.653063
std,8.596783,23.322453,6.860353,0.253994,0.115878,0.702617,28.148861,2.10571,8.707259,168.537116,2.164946,91.294864,7.141062
min,0.00632,0.0,0.46,0.0,0.385,3.561,2.9,1.1296,1.0,187.0,12.6,0.32,1.73
25%,0.082045,0.0,5.19,0.0,0.449,5.8855,45.025,2.100175,4.0,279.0,17.4,375.3775,6.95
50%,0.25651,0.0,9.69,0.0,0.538,6.2085,77.5,3.20745,5.0,330.0,19.05,391.44,11.36
75%,3.647423,12.5,18.1,0.0,0.624,6.6235,94.075,5.188425,24.0,666.0,20.2,396.225,16.955
max,88.9762,100.0,27.74,1.0,0.871,8.78,100.0,12.1265,24.0,711.0,22.0,396.9,37.97


In [100]:
X_train, X_test, Y_train, Y_test = cv.train_test_split(df, boston.target, test_size=0.2)

scaler = preprocessing.StandardScaler().fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

In [101]:
num_iter = 30
rate = 0.05
arr = np.full((len(Y_train), 1), 1)
X = np.append(arr, X_train, axis=1)
arr = np.full((len(X_test), 1), 1)
X_test1 = np.append(arr, X_test, axis=1)


coef = gd_runner(X, Y_train, rate, num_iter)
Y_pred = predict(X_test1, coef)
print(Y_pred.shape)
for i in Y_pred[:10]:
    print(i)

cost before gd = 236025.53740211346
cost = 54237.89191547447
(102, 1)
(102, 1)
[26.7108202]
[20.87598728]
[7.00654242]
[29.78720258]
[9.00158037]
[20.55327204]
[4.18101906]
[13.50961093]
[14.41582192]
[20.12566946]


In [102]:
from sklearn import linear_model

In [103]:
clf = linear_model.SGDRegressor(alpha=0.05, max_iter=30)
clf.fit(X_train, Y_train)
Y_pred2 = clf.predict(X_test)

for i in Y_pred2[:10]:
    print(i)
    
print("score = " + str(clf.score(X_test, Y_test)))

27.25804223204009
22.68853552705916
7.618069657452875
29.98570163195754
11.244948871327583
21.09949003133345
5.636109299785527
14.050016774097635
14.57836132983837
21.482979330109227
score = 0.7057788837752907


In [104]:
clf.coef_

array([-0.81063568,  0.39460882, -0.1030158 ,  0.62703354, -1.4554976 ,
        2.6844791 ,  0.05508609, -2.03899991,  1.28295367, -0.97698641,
       -2.09734866,  0.72435714, -3.455553  ])

In [105]:
# If we don't perform feature scaling on this dataset, we get nan in all X_test predictions 

# Adding higher degree terms to the hypothesis

In [115]:
boston = datasets.load_boston()
df2 = pd.DataFrame(boston.data)

In [116]:
def add_features(X_old):
    n,m = X_old.shape
    X_new = np.array(X_old)
    for i in range(m):
        arr = X_old[:,i]**2
        arr = np.reshape(arr,(n,1))
        X_new = np.append(X_new, arr, axis=1)
    print(X_new.shape)
    return X_new

In [137]:
data = np.array(df2)
X_new = add_features(data)
scaler = preprocessing.StandardScaler().fit(X_new)
X_new = scaler.transform(X_new)
X_train, X_test, Y_train, Y_test = cv.train_test_split(X_new, boston.target, test_size=0.2)

(506, 26)


In [138]:
clf = linear_model.SGDRegressor(alpha=0.05, max_iter=30)
clf.fit(X_train, Y_train)
Y_pred2 = clf.predict(X_test)

for i in Y_pred2[:10]:
    print(i)
    
print("score = " + str(clf.score(X_test, Y_test)))

23.540542525452278
22.275257312300727
25.552631753003123
27.50013677548943
20.12066673436285
10.226145639177735
22.24414674401155
23.840270690830305
17.80596555606552
20.506087858008478
score = 0.8403898369585207


In [139]:
num_iter = 50
rate = 0.05
arr = np.full((len(Y_train), 1), 1)
X = np.append(arr, X_train, axis=1)
arr = np.full((len(X_test), 1), 1)
X_test1 = np.append(arr, X_test, axis=1)


coef = gd_runner(X, Y_train, rate, num_iter)
Y_pred = predict(X_test1, coef)
print(Y_pred.shape)
for i in Y_pred[:10]:
    print(i)

cost before gd = 237306.1313267127
cost = 57597.14025726429
(102, 1)
(102, 1)
[23.54356026]
[22.1351292]
[25.10988443]
[27.29360181]
[20.37988166]
[9.26519124]
[21.37971198]
[23.84158654]
[18.12297202]
[20.33091598]
