In [97]:
import numpy as np
import sklearn
from sklearn import metrics, datasets
from sklearn.preprocessing import StandardScaler

In [98]:
X,y = datasets.fetch_california_housing(return_X_y=True)
print(X.shape)

(20640, 8)


In [99]:
from sklearn.model_selection import train_test_split

In [108]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=101)

In [109]:
print(X_train.shape)

print(X_test.shape)

(16512, 8)
(4128, 8)


In [110]:
X_train_temp = np.ones((X_train.shape[0], X_train.shape[1]+1))
X_test_temp = np.ones((X_test.shape[0], X_test.shape[1]+1))

X_train_temp[:,1:] = X_train
X_test_temp[:,1:] = X_test

In [111]:
print(X_train_temp.shape)
print(X_train_temp)

(16512, 9)
[[   1.            4.7269       48.         ...    2.55494505
    34.19       -118.13      ]
 [   1.            2.2621       43.         ...    2.10892857
    39.74       -121.84      ]
 [   1.            4.1685       20.         ...    2.37261146
    38.8        -121.15      ]
 ...
 [   1.            6.1463        6.         ...    2.80651731
    37.3        -121.76      ]
 [   1.            3.2813       36.         ...    2.58293839
    37.35       -121.95      ]
 [   1.            2.6719       23.         ...    2.86021505
    36.85       -121.53      ]]


In [112]:
scaler=StandardScaler()
scaler.fit(X_train_temp[:,1:])
X_train_temp[:,1:] = scaler.transform(X_train_temp[:,1:])
X_test_temp[:,1:] = scaler.transform(X_test_temp[:,1:])

# Gradient Descent

In [113]:
theta = np.random.uniform(0,1,size = X_train_temp.shape[1])

In [114]:
theta

array([0.2577798 , 0.28038308, 0.38809915, 0.3037192 , 0.56095458,
       0.97283601, 0.84074906, 0.01269282, 0.93721484])

In [115]:
n_iter = 1000
alpha = 0.01

m = X_train_temp.shape[0]
n = X_train_temp.shape[1]

for i in range(n_iter):
    update = np.zeros(n)
    y_pred = np.dot(X_train_temp,theta)
    error = y_pred - y_train
    for j in range(n):
        update[j] = np.sum(error*(X_train_temp.T)[j])
        
    theta = theta - (alpha*update)/m

print(theta)

[ 2.06597302  0.90167348  0.21050994 -0.25126661  0.21789775  0.02874362
 -0.05612343 -0.18725483 -0.15935509]


In [116]:
predections = np.dot(X_test_temp,theta)
print(metrics.mean_absolute_error(y_test, predections))
print(metrics.mean_squared_error(y_test,predections))

0.5721012421090079
0.6193540654035855


# Normal Equation

In [117]:
from numpy.linalg import inv,pinv,LinAlgError

In [119]:
try:
    XTXi = inv(np.dot(X_train_temp.T,X_train_temp))
except:
    XTXi = pinv(np.dot(X_train_temp.T,X_train_temp))

theta = np.dot(XTXi,np.dot(X_train_temp.T,y_train))

In [123]:
predictions = np.dot(theta, X_test_temp.T)

In [124]:
predections = np.dot(X_test_temp,theta)
print(metrics.mean_absolute_error(y_test, predections))
print(metrics.mean_squared_error(y_test,predections))

0.5379335579317596
0.5446775996844716
