In [302]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [303]:
#loading the data
data = pd.read_csv('ex1data2.txt', names=['Size', 'Bedrooms', 'Price'])

#splitting the data
x_data = np.array(data.drop(['Price'], axis=1).values)
y_data = np.array(data['Price'].values).reshape(len(x_data), 1)

In [304]:
#feature scalling the data
def feature_normalize(data):
    mean = np.mean(data)
    std = np.std(data)
    new_data = (data - mean) / std
    
    return new_data, mean, std

x_data, mean, std = feature_normalize(x_data)

In [305]:
#formating the x_data to be a (m, 3) shaped matrix
x_data = np.append(np.ones((len(x_data), 1)), x_data, axis=1)

In [306]:
#creating some utility functions
def cost_function(x_data, y_data, theta):
    h = np.dot(x_data, theta)
    err_sq = (h - y_data) ** 2
    err_sum = np.sum(err_sq)
    cost = (err_sum / (2 * len(x_data)))
    
    return cost

def gradient_descent(x_data, y_data, theta, learning_rate):
    h = np.dot(x_data, theta)
    err = h - y_data
    dtheta = (1 / len(x_data)) * np.dot(x_data.T, err)
    theta = theta - learning_rate * dtheta
    
    return theta

In [307]:
#creating the training function
def train_model(x_data, y_data, theta, learning_rate, nbr_iter):
    cost = []
    for i in range(nbr_iter):
        cost.append(cost_function(x_data, y_data, theta))
        theta = gradient_descent(x_data, y_data, theta, learning_rate)
        
    return theta, cost

In [308]:
#running the training and finding the optimal solution
initial_theta = np.zeros([3, 1])
learning_rate = 0.3
nbr_iter = 1500

theta, cost = train_model(x_data, y_data, initial_theta, learning_rate, nbr_iter)
[theta[0][0], theta[1][0], theta[2][0]], cost[-1]

([116179.3860250721, 153821.9409830553, -102809.6263270349],
 2057828552.0743992)

In [309]:
#creating the prediction function
def predict_price(size, bedrooms):
    data = np.array([size, bedrooms])
    data = (data - mean) / std
    data = np.append(1, data)
    
    return np.dot(data, theta)


In [313]:
#running some predictions to make sure the model is well trained (we can use some testing data here)
predict_price(1650, 3)[0], predict_price(2100, 3)[0], predict_price(4500, 5)[0]

(293237.89956883236, 353794.9203984489, 676585.8121100022)