In [1]:
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler

In [2]:
# Load Data
data = pd.read_csv('data2.txt', header=None)

X = data.iloc[:, 0:2]
y = data.iloc[:, [2]]
m = len(y)

In [3]:
X.head()

Unnamed: 0,0,1
0,2104,3
1,1600,3
2,2400,3
3,1416,2
4,3000,4


In [4]:
y.head()

Unnamed: 0,2
0,399900
1,329900
2,369000
3,232000
4,539900


In [5]:
# Feature Normalize
scaler = StandardScaler()
X_norm = scaler.fit_transform(X)

# Add bias term
X_norm_b = np.c_[np.ones((m, 1)), X_norm]

In [6]:
# Gradient Descent
def gradientDescent(X, y, theta, alpha, iterations):
    for i in range(iterations):
        prediction = X.dot(theta)
        theta = theta - alpha * (1/m) * (X.T.dot((prediction - y)))
    
    return theta

In [7]:
# Initialize alpha, theta, iteration and run Gradient Descent to get the best theta
alpha = 0.1
n_iterations = 400
zero_theta = np.zeros((3, 1))
theta_best = gradientDescent(X_norm_b, y, zero_theta, alpha, n_iterations)

print("Theta computed from Gradient Descent:\n\tTheta_0: {}\n\tTheta_1: {}\n\tTheta_2: {}"
      .format(theta_best[0][0], theta_best[1][0], theta_best[2][0]))

Theta computed from Gradient Descent:
	Theta_0: 340412.65957446786
	Theta_1: 109447.79558638527
	Theta_2: -6578.353970904724


In [8]:
# Predict the price of a 1650sq-ft with 3 bedrooms house
pred_val = np.array([[1650, 3]])

# Normalize the features
pred_val_norm = scaler.transform(pred_val)

# Add bias term
pred_val_norm_b = np.c_[1, pred_val_norm]

# Predict using the value of theta obtained
prediction = pred_val_norm_b.dot(theta_best)

print("Predicted price of a 1650sq-ft with 3 bedrooms house (using gradient descent):\n\t{}".format(prediction[0][0])) 

Predicted price of a 1650sq-ft with 3 bedrooms house (using gradient descent):
	293081.4645291686


In [9]:
# Add bias term
X_b = np.c_[np.ones((m, 1)), X]

# Normal Equation
neq_theta_best = np.linalg.inv(X_b.T.dot(X_b)).dot(X_b.T).dot(y)

print("Theta computed from Normal Equation:\n\tTheta_0: {}\n\tTheta_1: {}\n\tTheta_2: {}"
      .format(neq_theta_best[0][0], neq_theta_best[1][0], neq_theta_best[2][0]))

Theta computed from Normal Equation:
	Theta_0: 89597.90954279543
	Theta_1: 139.21067401762562
	Theta_2: -8738.01911232782


In [10]:
# Predict the price of a 1650sq-ft with 3 bedrooms house
neq_pred_val = np.array([[1650, 3]])

# Add bias term
neq_pred_val_b = np.c_[1, neq_pred_val]

# Predict
neq_prediction = neq_pred_val_b.dot(neq_theta_best)

print("Predicted price of a 1650sq-ft with 3 bedrooms house (using normal equation):\n\t{}".format(neq_prediction[0][0])) 

Predicted price of a 1650sq-ft with 3 bedrooms house (using normal equation):
	293081.46433489426
