In [118]:
import numpy as np
import pandas as pd

In [119]:
# fetching data from files
df = pd.read_csv("/content/drive/MyDrive/ML/house_price/housing_prices.csv")

In [120]:
# extracting xi and yi
Y = df["Price"].to_numpy().reshape(-1, 1) / 10000
X = df.drop(columns=["Price"]).to_numpy()
print(Y[0])
print(X[0])

[35.30561938]
[1.66000000e+03 4.00000000e+00 1.00000000e+00 1.37707182e+00
 9.70000000e+01 5.39145006e+00]


In [121]:
# adding bias term to the feature vectors
X_bias = np.c_[np.ones((len(X), 1)), X]
print(X_bias[0])

[1.00000000e+00 1.66000000e+03 4.00000000e+00 1.00000000e+00
 1.37707182e+00 9.70000000e+01 5.39145006e+00]


In [122]:
# defining parameter vector
theta = np.random.rand(len(X_bias[0])).reshape(-1, 1)
print(theta)

[[0.43663346]
 [0.7737017 ]
 [0.87267302]
 [0.66878958]
 [0.00716458]
 [0.74216979]
 [0.89972738]]


In [123]:
# defining hyperparameters
epoch = 100

In [124]:
# defining hypothesis
def hypo(theta, X):
    return X @ theta

In [132]:
# BGD
learning_rate = 1e-10
theta_b = theta

for _ in range(epoch):
    error = (Y - hypo(theta_b, X_bias))
    gradient = (1/len(X_bias)) * (X_bias.T @ error)
    theta_b = theta_b - learning_rate * gradient

print(theta_b)

[[0.43665713]
 [0.8541232 ]
 [0.8727434 ]
 [0.66883678]
 [0.00718896]
 [0.74334268]
 [0.90009033]]


In [133]:
# SGD
learning_rate = 0.00000000001
theta_s = theta

for _ in range(epoch):
    for i in range(len(X_bias)):
        xi = X_bias[i:i+1]
        yi = Y[i]
        error = yi - hypo(theta_s, xi)
        gradient = -xi.T * error
        theta_s = theta_s - learning_rate * gradient

print(theta_s)

[[0.43633773]
 [0.00118926]
 [0.8717962 ]
 [0.66820598]
 [0.00687409]
 [0.72127593]
 [0.89443017]]


In [134]:
# Normal Equation
theta_ne = np.linalg.inv(X_bias.T @ X_bias) @ (X_bias.T @ Y)
print(theta)

[[0.43663346]
 [0.7737017 ]
 [0.87267302]
 [0.66878958]
 [0.00716458]
 [0.74216979]
 [0.89972738]]


In [135]:
# fetching test set
df_test = pd.read_csv("/content/drive/MyDrive/ML/house_price/housing_prices_test.csv")
Y = df_test["Price"].to_numpy().reshape(-1, 1)
X_test = df_test.drop(columns=["Price"]).to_numpy()
X_test = np.c_[np.ones((len(X_test), 1)), X_test]
print("Normal Equation: ", np.mean((Y/10000) - hypo(theta_ne, X_test)))
print("SGD: ", np.mean((Y/10000) - hypo(theta_s, X_test)))
print("BGD: ", np.mean((Y/10000) - hypo(theta_b, X_test)))

Normal Equation:  -0.15134502064863814
SGD:  -7.634428003343904
BGD:  -2427.2930592626735
