In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

In [2]:
dataset = pd.read_csv('data/SaratogaHouses.csv')

In [3]:
X = dataset[['livingArea','landValue','rooms','centralAir']].values
y = dataset['price'].values

In [4]:
pd.unique(dataset['centralAir'])

array(['No', 'Yes'], dtype=object)

In [5]:
X[:,-1]

array(['No', 'No', 'No', ..., 'No', 'No', 'No'], dtype=object)

In [9]:
X

array([[906, 50000, 5, 0],
       [1953, 22300, 6, 0],
       [1944, 7300, 8, 0],
       ...,
       [1099, 20400, 3, 0],
       [1225, 16800, 7, 0],
       [1959, 26000, 6, 0]], dtype=object)

In [6]:
from sklearn.preprocessing import LabelEncoder

In [7]:
encoder = LabelEncoder()
X[:,-1] = encoder.fit_transform(X[:,-1])

In [8]:
X[:,-1]

array([0, 0, 0, ..., 0, 0, 0], dtype=object)

In [10]:
from sklearn.preprocessing import OneHotEncoder

In [11]:
fuel = dataset['fuel'].values

In [12]:
pd.unique(fuel)

array(['electric', 'gas', 'oil'], dtype=object)

In [13]:
fuel = encoder.fit_transform(fuel)

In [14]:
fuel[:20]

array([0, 1, 1, 1, 1, 1, 2, 2, 0, 1, 2, 1, 0, 1, 2, 1, 2, 1, 2, 2])

In [15]:
fuel = fuel.reshape(-1,1)
oneHot = OneHotEncoder()
fuel = oneHot.fit_transform(fuel)

In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.


In [16]:
fuel

<1728x3 sparse matrix of type '<class 'numpy.float64'>'
	with 1728 stored elements in Compressed Sparse Row format>

In [17]:
fuel.toarray()

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       ...,
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.]])

In [18]:
X

array([[906, 50000, 5, 0],
       [1953, 22300, 6, 0],
       [1944, 7300, 8, 0],
       ...,
       [1099, 20400, 3, 0],
       [1225, 16800, 7, 0],
       [1959, 26000, 6, 0]], dtype=object)

In [19]:
from sklearn.preprocessing import StandardScaler

In [20]:
std = StandardScaler()
X = std.fit_transform(X)



In [21]:
y = y.reshape(-1,1)
y = std.fit_transform(y)



In [22]:
X.shape

(1728, 4)

In [23]:
B = np.zeros(X.shape[1])

In [24]:
B

array([0., 0., 0., 0.])

In [24]:
X0 = np.ones(X.shape[0])

In [30]:
X[:,3]

array([-0.76221375, -0.76221375, -0.76221375, ..., -0.76221375,
       -0.76221375, -0.76221375])

In [34]:
X = np.array([X0, X[:,0],X[:,1],X[:,2],])

In [35]:
X[:10]

array([[ 1.        ,  1.        ,  1.        , ...,  1.        ,
         1.        ,  1.        ],
       [-1.36985441,  0.31951971,  0.30499787, ..., -1.05844161,
        -0.85513584,  0.32920094],
       [ 0.44108417, -0.350095  , -0.77853137, ..., -0.4043636 ,
        -0.50718833, -0.24441402],
       [-0.88163155, -0.44981202,  0.41382706, ..., -1.74527063,
        -0.01799248, -0.44981202]])

In [36]:
X.shape

(4, 1728)

In [37]:
n = len(X)
epochs = 5000
alpha = 0.001

In [38]:
X.shape

(4, 1728)

In [39]:
X = X.T

In [40]:
X.shape

(1728, 4)

In [41]:
X[:5]

array([[ 1.        , -1.36985441,  0.44108417, -0.88163155],
       [ 1.        ,  0.31951971, -0.350095  , -0.44981202],
       [ 1.        ,  0.30499787, -0.77853137,  0.41382706],
       [ 1.        ,  0.30499787, -0.45291972, -0.88163155],
       [ 1.        , -1.47634791, -0.5586007 , -1.74527063]])

In [42]:
def cost_function(X,y,B):
    return (np.sum((np.dot(X,B) - y) ** 2)) / (2 * n)

In [43]:
cost_function(X,y,B)

373248.00000000006

In [47]:
def gradient_descent(X,y,B,epochs,alpha):
    cost_history = [0] * epochs
    for i in range(epochs):
        y_pred = X.dot(B)
        loss = y_pred - y
        gradient = X.T.dot(loss) / n
        B = B - alpha * gradient
        cost_history[i] = cost_function(X,y,B)
#         print(i)
    return cost_history, B

In [48]:
cost,b = gradient_descent(X,y.flatten(),B,epochs=1000,alpha=0.0001)

In [49]:
B

array([0., 0., 0., 0.])

In [179]:
B.shape, X.shape, y.shape

((4,), (1728, 4), (1728, 1))

In [180]:
y_pred = X.dot(B)

In [181]:
y_pred.shape

(1728,)

In [195]:
loss = y_pred - y.flatten()

In [196]:
loss.shape

(1728,)

In [197]:
grad = X.T.dot(loss) / n

In [198]:
grad

array([-4.97379915e-14, -3.07752581e+02, -2.51106709e+02, -2.29465493e+02])