In [2]:
import numpy as np
from numpy import array, dot, transpose
from numpy.linalg import inv 

import pandas as pd
df = pd.read_csv("/Users/palash/sem2/ML1/Assignment_1/Data_Q1/housing.csv")
df = df.drop("ocean_proximity", axis=1)
df = df.drop("total_bedrooms", axis=1) 

In [3]:
from sklearn.model_selection import train_test_split
train_set,test_set=train_test_split(df,test_size=0.2,random_state=42)
train_y = train_set["median_house_value"].copy()
train_X = train_set.drop("median_house_value", axis=1) 
test_X = test_set.drop("median_house_value", axis=1) 
test_y = test_set["median_house_value"].copy()
train_X = (train_X-train_X.mean())/train_X.std()
train_y.describe()

count     16512.000000
mean     207194.693738
std      115622.626448
min       14999.000000
25%      119800.000000
50%      179850.000000
75%      265125.000000
max      500001.000000
Name: median_house_value, dtype: float64

# 1.1 Closed Form

Closed form solution for theta is given by:
\begin{equation*}
\theta = (X^T.X)^{-1}.X^T.\vec y
\end{equation*}



In [17]:
def ClosedForm(X_train, y_train):
    X= np.array(X_train)
    ones = np.ones(len(X))
    X = np.column_stack((ones,X))
    y = np.array(y_train)
    
    Xt = transpose(X)
    product = dot(Xt, X)
    theInverse = inv(product)
    theta = dot(dot(theInverse, Xt), y)
       
    return theta

## Data Normalization and Standardization

In [18]:
from sklearn import preprocessing
normalized_X = preprocessing.normalize(train_X)
standardized_X = preprocessing.scale(train_X)

In [19]:
#predict labels
test_X_n=np.c_[np.ones((4128,1)),test_X]
theta_best = ClosedForm(standardized_X, train_y)
predicted_labels = np.dot(test_X_n,theta_best)

### mean squared error for Closed Form

In [7]:
from sklearn.metrics import mean_squared_error
mean_squared_error(test_y,predicted_labels)

3710437907436182.0

# 1.2 Gradient Descent
\begin{equation*}
J(\theta) = (\theta^T.X -\vec y)^T.(\theta^T.X -\vec y)
\end{equation*}

\begin{equation*}
\theta ^{new} = \theta ^{old} - \alpha(\theta^T.X -\vec y).X
\end{equation*}

In [8]:
def GradientDescent(x_train, y_train):
    X = np.array(x_train)
    ones = np.ones(len(X))
    X = np.column_stack((ones,X))
    y = np.array(y_train)
    
    m = len(y)
    iter = 10000
    eta=0.001
    theta= np.zeros(8)
    
    for i in range (iter):
        hypothesis = X.dot(theta)
        error = hypothesis - y
        gradient =  (X.T.dot(error)*2)/m
        theta = theta - eta*gradient
    
    return theta

In [9]:
# predict labels
test_X_n=np.c_[np.ones((4128,1)),test_X]
theta_best = GradientDescent(standardized_X, train_y)
predicted_labels = np.dot(test_X_n,theta_best)

### mean squared error for Gradient Descent

In [10]:
from sklearn.metrics import mean_squared_error
mean_squared_error(test_y,predicted_labels)

2058148342911836.8

# 1.3 Newton's Method

\begin{equation*}
\theta ^{new} = \theta ^{old} - \alpha \dfrac{J^{'}(\theta)} {J^{''}(\theta)}
\end{equation*}

\begin{equation*}
J^{'}(\theta) = 2X^T(\theta^T.X -\vec y)
\end{equation*}

\begin{equation*}
J^{''}(\theta) = 2X^T.X
\end{equation*}

In [11]:
def NewtonsMethod(x_train, y_train):
    X = np.array(x_train)
    ones = np.ones(len(X))
    X = np.column_stack((ones,X))
    y = np.array(y_train)
    
    m = len(y)
    theta= np.zeros(8)
    second_deriv = (X.T.dot(X)*2)/m
    second_deriv = np.linalg.inv(second_deriv)
    alpha =0.001
    for i in range (1500):
        hypothesis = X.dot(theta)
        error = hypothesis - y
        first_deriv = (X.T.dot(error)*2)/m
        
        theta = theta - alpha*(second_deriv.dot(first_deriv))
    
    return theta

In [12]:
# predict labels
test_X_n=np.c_[np.ones((4128,1)),test_X]
theta_best = NewtonsMethod(standardized_X, train_y)
predicted_labels = np.dot(test_X_n,theta_best)

### mean squared error for Gradient Descent

In [13]:
from sklearn.metrics import mean_squared_error
mean_squared_error(test_y,predicted_labels)

2243366375420120.2

# 1.4 Ridge Regression

\begin{equation*}
\theta = (X^T.X + \lambda I_p)^{âˆ’1} X^T.\vec y
\end{equation*}

In [14]:

def ridge_regression(x_train, y_train, lam):
    
    X = np.array(x_train)
    ones = np.ones(len(X))
    X = np.column_stack((ones,X))
    y = np.array(y_train)
    
    Xt = np.transpose(X)
    lambda_identity = lam*np.identity(len(Xt))
    theInverse = np.linalg.inv(np.dot(Xt, X)+lambda_identity)
    w = np.dot(np.dot(theInverse, Xt), y)
    return w

In [15]:
# predict labels
test_X_n=np.c_[np.ones((4128,1)),test_X]
theta_best = ridge_regression(standardized_X, train_y,0.001)
predicted_labels = np.dot(test_X_n,theta_best)

### mean squared error for Gradient Descent

In [16]:
from sklearn.metrics import mean_squared_error
mean_squared_error(test_y,predicted_labels)

3710430984962626.5

In [None]:
from sklearn.linear_model import LinearRegression
model = LinearRegression.fit()