In [1]:
import numpy as np
import pandas as pd
from sklearn import datasets
from sklearn.model_selection import train_test_split, cross_val_score

In [2]:
def one_step_for_gradient(X_train, Y_train, alpha, m) :
    slope = np.array([0 for i in range(len(m))])
    
    for i in range(len(X_train)) :
        for j in range(len(slope)) :
            slope[j] += (2)*X_train[i][j]*(m.dot(X_train[i]) - Y_train[i])

    m = m - slope*(alpha/len(X_train))
    return m



def fit(X_train, Y_train, alpha, num_iterations) :
    X_train = X_train.as_matrix()
    ones_column = np.ones((len(X_train), 1), dtype = float)
    np.append(X_train, ones_column, axis = 1)
    m = np.array([0 for i in range(len(X_train[0]))])
    
    for i in range(num_iterations) :
        print("Cost = ", cost(X_train, Y_train, m))
        m = one_step_for_gradient(X_train, Y_train, alpha, m)
        
    return m



def predict(X_test, m) :
    X_test = X_test.as_matrix()
    ones_column = np.ones((len(X_test), 1), dtype = float)
    np.append(X_test, ones_column, axis = 1)
    
    return [m.dot(current_row) for current_row in X_test]



def cost(x, y, m) :
    temp = 0
    
    for i in range(len(x)) :
        temp += (y[i] - m.dot(x[i]))**2
        
    return temp

In [3]:
diabetes = datasets.load_diabetes()
dataframe = pd.DataFrame(diabetes.data)
dataframe.columns = diabetes.feature_names

X_train, X_test, Y_train, Y_test = train_test_split(dataframe, diabetes.target, test_size = 0.2)

In [5]:
m = fit(X_train, Y_train, alpha = 0.007, num_iterations = 100)
Y_pred = predict(X_test, m)

Cost =  10429569.0
Cost =  10429406.5688
Cost =  10429244.153
Cost =  10429081.8113
Cost =  10428919.5046
Cost =  10428757.2242
Cost =  10428594.9591
Cost =  10428432.7558
Cost =  10428270.5999
Cost =  10428108.4592
Cost =  10427946.3752
Cost =  10427784.3565
Cost =  10427622.3619
Cost =  10427460.3827
Cost =  10427298.4518
Cost =  10427136.5639
Cost =  10426974.6914
Cost =  10426812.843
Cost =  10426651.0378
Cost =  10426489.2478
Cost =  10426327.5204
Cost =  10426165.8253
Cost =  10426004.1625
Cost =  10425842.515
Cost =  10425680.8827
Cost =  10425519.2657
Cost =  10425357.6747
Cost =  10425196.1119
Cost =  10425034.583
Cost =  10424873.1192
Cost =  10424711.6601
Cost =  10424550.2441
Cost =  10424388.8433
Cost =  10424227.4706
Cost =  10424066.1264
Cost =  10423904.8195
Cost =  10423743.5278
Cost =  10423582.2792
Cost =  10423421.0678
Cost =  10423259.8922
Cost =  10423098.7318
Cost =  10422937.5867
Cost =  10422776.4567
Cost =  10422615.3918
Cost =  10422454.3509
Cost =  10422293.

In [6]:
print(Y_pred, "\n")
print(Y_test)

[0.04030597003397618, 0.68187095580383539, 0.0096144364482910472, -0.82411598849214518, 0.034517699815497252, -0.18074005418436229, -0.63059492102119874, 0.60471431293697009, 0.37299013405905668, 0.59399683174756657, 0.059339595837827205, -0.72143120905662528, 0.23308296093381464, -0.14912562912568275, -0.70998949964923341, 0.18393350657709018, -0.38084668990334991, 0.50774812662741986, -0.32573381834355264, 0.3269696664318873, 0.19278459255006414, -0.26199365114590883, -0.82611358369268229, 0.22594970628267463, -0.16055806307183695, -0.79045607715156574, 0.72932680869083688, 0.57117383243121633, 0.16929088860965627, -0.20281785906954808, -0.29458321295138395, 0.1538230283408823, -0.61076611752589471, 0.3276668049163583, 0.37840289149804651, 0.13515942834693936, 0.489622113351389, -0.77473381352256798, 0.15347529546567301, -0.65757197794011868, 0.48921396218184743, 0.16279008959919114, 0.31543863841588987, 0.086658350414288585, -0.92243817739848799, -0.060770001604982637, -0.2504380171