# Linear Regression on Housing Prices

The goal is to predict housing prices based on the square footage (denoted as $x{_1}$) and the number of bedrooms $x_2$

In [None]:
# Linear regression
# Estimating house prices based on square footage and number of bedrooms.

import numpy as np
# See: http://matplotlib.org/api/pyplot_api.html#matplotlib.pyplot
import matplotlib.pyplot as plt
from numpy.linalg import inv

# This is the training data, 16 examples
# sqft, bedrooms, price
train_data = np.array([
    [1500, 2, 60000],
    [1800, 2, 70000],
    [1900, 2, 80000],
    [2000, 3, 90000],
    [2200, 3, 95000],
    [2300, 2, 100000],
    [2350, 3, 110000],
    [2500, 3, 112000],
    [2800, 4, 120000],
    [2900, 3, 135000],
    [3000, 4, 140000],
    [3100, 4, 145000],
    [3300, 5, 162000],
    [3400, 4, 157000],
    [3600, 5, 167000],
    [3900, 5, 178000]
])

# Test data
test_data = np.array([
    [1785, 2],
    [1950, 2],
    [2065, 3],
    [2345, 2],
    [2385, 3],
    [2415, 3],
    [2485, 4],
    [2665, 3],
    [2875, 4],
    [3050, 4],
    [3250, 5],
    [3350, 4],
    [3425, 3],
    [3715, 5]
])

# Extract the feature vectors
trained_featrs = train_data[:, :-1]
num_examples, num_features = trained_featrs.shape # rows and cols

# Adjust n to account for the additional intercept term (x0 = 1)
num_features = num_features + 1

# Extract the expected outcomes
trained_labels = train_data[:, -1]

#Plot the data set
x_sq_footage = trained_featrs[:, 0] # extract square footage column vector

ax1 = plt.subplot()
ax1.plot(x_sq_footage, trained_labels, 'b.') # b. is blue dots
# Set the left y-axis label, ticks and tick labels match the line color.
ax1.set_xlabel('Square Footage')
ax1.set_ylabel('House Price', color='b')
ax1.tick_params('y', colors='b')
plt.title("Data Set")
plt.show()