In [1]:
import pandas as pd
import numpy as np
from scipy.optimize import fmin, minimize

In [2]:
train_df = pd.read_csv('./train.csv', index_col='ID')

In [3]:
y = train_df['medv'].values
y = y.reshape(-1, 1)

In [4]:
train_df['constant'] = 1
columns = ['constant', 'crim', 'zn', 'indus', 'chas', 'nox', 'rm', 'age', 'dis', 'rad', 'tax', 'ptratio', 'black', 'lstat']
x = train_df[columns].values

In [5]:
w = np.zeros([x.shape[1], 1])

In [6]:
print(w)

[[0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]]


In [7]:
def pred(x, w):
    return np.dot(x, w)

In [8]:
y_pred = pred(x, w)

In [9]:
def loss(_w):
    p = pred(x, _w)
    e = y - p
    se = np.power(e, 2)
    rse = np.sqrt(np.sum(se))
    rmse = rse / y.shape[0]
    return rmse

In [10]:
l = loss(w)

In [11]:
l

1.3449009883922638

In [12]:
min = fmin(loss, w, maxiter=1000)



In [13]:
min

array([-0.00666563, -0.03605829,  0.04495306,  0.01109164,  0.04641392,
       -0.11383605, -0.00518238,  0.05799832, -0.03884004, -0.06830298,
        0.01235407,  0.02862037,  0.03404354,  0.07511245])

In [14]:
w

array([[0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.]])

In [15]:
w.shape

(14, 1)

In [16]:
min.shape

(14,)

In [17]:
loss(min)

9.728897700864076

In [18]:
y_min = pred(x, min)

In [19]:
print(y.shape, y_pred.shape, y_min.shape)

(333, 1) (333, 1) (333,)


In [20]:
out = pd.DataFrame({'y': y[:,0], 'y_pred': y_pred[:,0], 'y_min': pred(x, min)})

In [21]:
out.head(n=15)

Unnamed: 0,y,y_pred,y_min
0,24.0,0.0,22.267535
1,21.6,0.0,21.928179
2,33.4,0.0,19.07714
3,36.2,0.0,19.819029
4,22.9,0.0,22.53217
5,15.0,0.0,24.598919
6,18.9,0.0,23.557982
7,21.7,0.0,21.00886
8,20.4,0.0,21.623121
9,18.2,0.0,22.523735


In [22]:
nms = minimize(loss, w, method='nelder-mead')

In [23]:
nms.x.shape

(14,)

In [24]:
out_2 = pd.DataFrame({'y': y[:,0], 'y_pred': y_pred[:,0], 'y_min': pred(x, nms.x)})

In [25]:
out_2.head()

Unnamed: 0,y,y_pred,y_min
0,24.0,0.0,23.94322
1,21.6,0.0,23.30723
2,33.4,0.0,21.672159
3,36.2,0.0,22.230856
4,22.9,0.0,23.130217


In [40]:
def loss_der(_w):
    learning_rate = 0.01
    _p = pred(x, _w)
    e = (y - _p)
    der = np.copy(_w)
    der[0,0] = _w[0,0] + learning_rate/y.shape[0] * e.sum()
    for i in range(1, w.shape[0]):
        der[i,0] = _w[i,0] + learning_rate/y.shape[0] * (e * x[:,i]).sum()
    return der

In [41]:
loss_der(w)

8484210.309220001
26988129.0
28513854.859999992
151640.0
1406680.878
15819471.481999997
172258490.80000004
9366862.697800001
24323056.0
1033350780.0
46577742.4
907582156.2199999
31599046.479999997


array([[2.27687688e-01],
       [2.54781090e+02],
       [8.10454324e+02],
       [8.56271918e+02],
       [4.55375375e+00],
       [4.22426690e+01],
       [4.75059204e+02],
       [5.17292765e+03],
       [2.81287168e+02],
       [7.30422102e+02],
       [3.10315550e+04],
       [1.39873100e+03],
       [2.72547194e+04],
       [9.48920315e+02]])

In [43]:
bfgs = minimize(loss, w, method='BFGS')

In [45]:
bfgs.x

array([ 2.27686457e+01, -6.46045525e-07,  2.85951171e-07,  3.97793067e-06,
       -1.27583499e-04, -1.22596995e-05,  2.45949868e-05,  9.94520460e-08,
        1.35117738e-06,  5.50057968e-06, -4.09548443e-07,  3.43433844e-06,
       -1.03974535e-07,  1.09943018e-06])

In [46]:
out_3 = pd.DataFrame({'y': y[:,0], 'y_pred': y_pred[:,0], 'y_min': pred(x, bfgs.x)})

In [47]:
out_3.head()

Unnamed: 0,y,y_pred,y_min
0,24.0,0.0,22.768728
1,21.6,0.0,22.768782
2,33.4,0.0,22.768786
3,36.2,0.0,22.768792
4,22.9,0.0,22.768761
