In [1]:
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
def h(X, w):
    return np.dot(X, w)

In [3]:
def loss_function(X, y, w):
    return np.square(h(X, w) - y).sum() / (2 * len(X))

In [4]:
def grad_w(X, y, w):
    m = len(X)
    return np.dot(X.T, (h(X, w) - y)) / m

In [5]:
def grad_step(w, grad_w, learning_rate=0.001):
    w = w - learning_rate*grad_w
    return w

In [6]:
def grad_descent(X, y, w, num_iter=10000, learning_rate=0.001, epsilon=0.0000001):

    loss = loss_function(X, y, w)
    loss_history = [loss]

    for i in range(num_iter):
        w_best = None
        d_w = grad_w(X, y, w)
        w = grad_step(w, d_w, learning_rate=learning_rate)

        loss = loss_function(X, y, w)
        if abs(loss - loss_history[-1]) < epsilon:
            loss_history.append(loss)
            w_best = d_w
            break
        
        loss_history.append(loss)
        
    return w, w_best, loss_history

In [7]:
df = pd.read_csv("Housing.csv")

In [9]:
def normalization(data):
    return (data - data.mean())/data.std()

In [10]:

n_df = pd.DataFrame()

n_df["price"] = normalization(df["price"])
n_df["area"] = normalization(df["area"])
n_df["bathrooms"] = normalization(df["bathrooms"])
n_df["bedrooms"] = normalization(df["bedrooms"])

In [11]:
X = n_df[['area', 'bathrooms', 'bedrooms']].values
X = np.hstack((np.ones((X.shape[0], 1)) , X))
y = n_df["price"].values.reshape(-1, 1)

n = X.shape[1]
w = np.linspace(0, 0, n).reshape((n, 1))

In [12]:
w, w_best, loss_history = grad_descent(X, y, w, 10000, learning_rate=0.001)
loss_best = loss_history[-1]
print(w_best.flatten())
print(loss_best)

[-1.46671666e-17 -6.74033514e-03 -4.72771123e-03  5.66844067e-03]
0.25605342833253486


In [14]:
a_w = np.dot(np.dot(np.linalg.inv(np.dot(X.T, X)), X.T), y)
a_loss = loss_function(X, y, a_w)
print(a_w.flatten())
print(a_loss)

The best analitical w:  [9.86731840e-17 4.39452085e-01 3.72344423e-01 1.60528660e-01]
The best analitical loss function:  0.2559879006532141


In [15]:
best_values_of_loss_function = loss_best
best_analitical_loss_function = a_loss
a = best_values_of_loss_function 
b = best_analitical_loss_function

if a > b:
    print({best_values_of_loss_function.round(4)},{best_analitical_loss_function.round(4)})
elif a < b:
    print({best_values_of_loss_function.round(4)},{best_analitical_loss_function.round(4)})
else:
    print({best_values_of_loss_function.round(4)},{best_values_of_loss_function.round(4)})

{0.2561} {0.256}
