## Multiple Linear Regression
<p> Based on Andrew Ng's lecture on Stanford Machine Learning</p>
<p> A dataset is used to predict the house price given 2 predictors/independent variables (area and no. of bedrooms)    

In [1]:
# importing libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

In [3]:
# importing the dataset
df = pd.read_csv('data/ex1data2.csv')
df.head()

Unnamed: 0,area,bedrooms,price
0,2104,3,399900
1,1600,3,329900
2,2400,3,369000
3,1416,2,232000
4,3000,4,539900


In [10]:
# extracting training data from dataset
X = df.iloc[:, 0:2].values.astype(float)
y = df.iloc[:, 2:3].values.astype(float)

In [28]:
# normalize the training data
def featureNormalize(X):
    X_norm = X
    num_features = X.shape[1]
    num_examples = X.shape[0]
    mu = np.zeros((1, num_features))
    sigma = np.zeros((1, num_features))
    
    # calculate per feature
    for j in range(num_features):
        mu[0,j] = np.mean(X[:, j])
        sigma[0,j] = np.std(X[:, j])
        
        for i in range(num_examples):
            X_norm[i, j] = (X_norm[i, j] - mu[0,j]) / sigma[0, j]
                     
    return X_norm, mu, sigma

In [29]:
[X_norm, mu, sigma] = featureNormalize(X)
np.set_printoptions(suppress=True, precision=3)
print(X_norm[:5, :])

[[ 0.131 -0.226]
 [-0.51  -0.226]
 [ 0.508 -0.226]
 [-0.744 -1.554]
 [ 1.271  1.102]]


In [23]:
# get the number of training examples
m = X.shape[0]

# cost function
def computeCost(X, y, theta):
    yhat = np.dot(X, theta)
    J = (1/(2*m))*sum((yhat - y)**2)
    return J

# implementing gradient descent
def gradientDescent(X, y, theta, alpha, num_iters):
    J_history = np.zeros((num_iters, 1))
    
    for i in range(num_iters):
        yhat = X.dot(theta)
        gradients = (X.T).dot((yhat - y))
        theta = theta - alpha * (1/m) * gradients
        J_history[i] = computeCost(X, y, theta)
        
    # return theta and J_history
    return (theta, J_history)

In [27]:
# setting values for gradient descent implementation
# adding a column of 1's to X for theta_0
X_train = np.concatenate((np.ones((m,1)), X), axis=1)

# initializing theta for 2-predictors and bias term: theta_0, theta_1 and theta_2
theta_prelim = np.zeros((3, 1))

# learning rate and no. of iterations
alpha = 0.01
num_iters = 4000

# calculate minimum theta and J's
(theta, J_hist) = gradientDescent(X_train, y, theta_prelim, alpha, num_iters)

# print minimum theta
print('Minimum theta determined by gradient descent:\n',theta)
print('Minimum Cost function, J:',J_hist[num_iters-1])

Minimum theta determined by gradient descent:
 [[340412.66 ]
 [109447.795]
 [ -6578.354]]
Minimum Cost function, J: [2.043e+09]


### Using scikit-Learn

In [None]:
# Using scikit-Learn
from sklearn.linear_model import LinearRegression
reg = LinearRegression()
reg.fit(Xtrain, y)

In [None]:
# retrieving bias and features weight
print(reg.intercept_, reg.coef_)

### Using Normal Equation

In [None]:
# Using Normal Equation with numpy linear algebra function
theta_best = np.linalg.inv(Xtrain.T.dot(Xtrain)).dot(Xtrain.T).dot(y)
print (theta_best)

In [None]:
# Using pseudoinverse function (Moore-Penrose inverse)
theta_pinv = np.linalg.pinv(Xtrain).dot(y)
print(theta_pinv)

In [None]:
from mpl_toolkits.mplot3d import Axes3D
fig = plt.figure()
ax = Axes3D(fig)
ax.scatter(X[:,0], X[:,1], zs=y, zdir='z')
yhat = Xtrain.dot(theta)
ax.scatter(X[:,0], X[:,1], zs=yhat, zdir='z', c='red')
