In [1]:
import numpy as np
import pandas as pd

In [16]:
# fetching data from files
df = pd.read_csv("housing_prices.csv")

In [17]:
# extracting xi and yi
Y = df["Price"].to_numpy().reshape(-1, 1) / 10000
X = df.drop(columns=["Price"]).to_numpy()
print(Y[0])
print(X[0])

[35.30561938]
[1.66000000e+03 4.00000000e+00 1.00000000e+00 1.37707182e+00
 9.70000000e+01 5.39145006e+00]


In [18]:
# adding bias term to the feature vectors
X_bias = np.c_[np.ones((len(X), 1)), X]
print(X_bias[0])

[1.00000000e+00 1.66000000e+03 4.00000000e+00 1.00000000e+00
 1.37707182e+00 9.70000000e+01 5.39145006e+00]


In [37]:
# defining parameter vector
theta = np.random.rand(len(X_bias[0])).reshape(-1, 1)
print(theta)

[[0.27540546]
 [0.4665801 ]
 [0.52334363]
 [0.93944542]
 [0.94420795]
 [0.85979642]
 [0.67275847]]


In [38]:
# defining hyperparameters
learning_rate = 0.00000000001
epoch = 100

In [39]:
# defining hypothesis
def hypo(theta, X):
    return X @ theta

In [40]:
# BGD
learning_rate = 0.000000001
theta_b = theta

for _ in range(epoch):
    error = (Y - hypo(theta_b, X_bias))
    gradient = (1/len(X_bias)) * (X_bias.T @ error)
    theta_b = theta_b - learning_rate * gradient

theta_ne = np.linalg.inv(X_bias.T @ X_bias) @ (X_bias.T @ Y)

print(np.mean(theta_b - theta_ne))

-0.3732251188840873


In [9]:
# SGD
theta_s = theta

for _ in range(epoch):
    for i in range(len(X_bias)):
        xi = X_bias[i:i+1]
        yi = Y[i]
        error = yi - hypo(theta_s, xi)
        gradient = -xi.T * error 
        theta_s = theta_s - learning_rate * gradient 

print(theta_s)

[[0.85379862]
 [0.01166994]
 [0.06766077]
 [0.34706964]
 [0.28035263]
 [0.09650293]
 [0.73000954]]


In [10]:
# Normal Equation
theta_ne = np.linalg.inv(X_bias.T @ X_bias) @ (X_bias.T @ Y)
print(theta)

[[0.85403996]
 [0.7775954 ]
 [0.06836163]
 [0.34754347]
 [0.28058795]
 [0.1094356 ]
 [0.73437123]]


In [11]:
# fetching test set
df_test = pd.read_csv("housing_prices_test.csv")
Y = df_test["Price"].to_numpy().reshape(-1, 1)
X_test = df_test.drop(columns=["Price"]).to_numpy()
X_test = np.c_[np.ones((len(X_test), 1)), X_test]
print("Normal Equation: ", np.mean((Y/10000) - hypo(theta_ne, X_test)))
print("SGD: ", np.mean((Y/10000) - hypo(theta_s, X_test)))
print("BGD: ", np.mean((Y/10000) - hypo(theta_b, X_test)))

Normal Equation:  -0.15134502064842278
SGD:  -2.0010578880826047
BGD:  -2196.1083701379307
