In [19]:
import pandas as pd
import numpy 
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn import preprocessing
import torch
device = torch.device('cpu')
from autograd import elementwise_grad
import autograd.numpy as np
import time

## 4 (a), 4 (b), 4 (c), 4 (d)

In [20]:
def cost_function(theta):
    error = ((y - np.dot(X,theta))**2)/len(X)
    return error

def normalEquationRegression(X,y):
    X_t = np.transpose(X)
    first = np.linalg.pinv(X_t.dot(X))
    second = X_t.dot(y)
    return first.dot(second)

def gradientDescentRegression(X,y,alpha=0.1,iteration=50):
    
    theta_old = np.random.rand(len(X[0]))
    for i in range(iteration):
        predicted = X.dot(theta_old)
        error = y - predicted
        temp = theta_old + 2/len(X)*alpha*X.T.dot(error)
        theta_old = temp
    return theta_old

def gradientDescentAutogradRegression(X,y,alpha=0.1,iteration=50):
    
    theta_old = np.random.rand(len(X[0]))
    agrad = elementwise_grad(cost_function)
    for i in range(iteration):
        val = agrad(theta_old)
        predicted = X.dot(theta_old)
        error = y - predicted
        temp = theta_old - alpha*val
        theta_old = temp
    return theta_old

## PyTorch Implementation is still left
def gradientDescentPyTorchRegression(X,y,alpha=0.1,iteration=50):
    X = np.c_[np.ones(len(X)),X]
    theta_old = torch.rand(len(X[0]),requires_grad=True)
    print(theta_old)
    for i in range(iteration):
        print(theta_old)
        predicted = X.dot(theta_old)
        error = y - predicted
        temp = theta_old + 2*alpha*X.T.dot(error)
        theta_old = temp
        with torch.no_grad():
            theta_old -= theta_old * theta_old.grad
        theta_old.grad.zero_()
    return theta_old

## 4 (e)

In [21]:
df = pd.read_excel('realestate.xlsx')
df.head()

Unnamed: 0,No,X1 transaction date,X2 house age,X3 distance to the nearest MRT station,X4 number of convenience stores,X5 latitude,X6 longitude,Y house price of unit area
0,1,2012.916667,32.0,84.87882,10,24.98298,121.54024,37.9
1,2,2012.916667,19.5,306.5947,9,24.98034,121.53951,42.2
2,3,2013.583333,13.3,561.9845,5,24.98746,121.54391,47.3
3,4,2013.5,13.3,561.9845,5,24.98746,121.54391,54.8
4,5,2012.833333,5.0,390.5684,5,24.97937,121.54245,43.1


In [22]:
linear = LinearRegression()
col = list(df.columns)
col.remove('Y house price of unit area')
col.remove('No')
X = df[col]
y = df['Y house price of unit area']

In [23]:
x_train, x_test, y_train, y_test = train_test_split(X,y,test_size = 0.3, random_state = 42)
x_train = np.array(x_train)
x_test = np.array(x_test)
y_train = np.array(y_train)
y_test = np.array(y_test)
linear = LinearRegression()
x_train = preprocessing.scale(x_train)
x_test = preprocessing.scale(x_test)
start_time = time.time()
linear.fit(x_train,y_train)
predicted = linear.predict(x_test)
end_time = time.time()
print('The RMSE for the Scikit Learn model is',np.sqrt(mean_squared_error(predicted,y_test)))
print('The time taken for the inbuilt implementation is',end_time - start_time)

The RMSE for the Scikit Learn model is 8.712564884272549
The time taken for the inbuilt implementation is 0.0007538795471191406


### Normal Equation 

In [38]:
X = np.c_[np.ones(len(x_train)),x_train]
y = y_train
start_time = time.time()
val = normalEquationRegression(X,y)
end_time = time.time()
intercept = val[0]
predicted = x_test.dot(normalEquationRegression(X,y)[1:])+intercept
print('The RMSE for my Normal Equation Regression is, ',np.sqrt(mean_squared_error(predicted,y_test)))
print('The time taken for normal equation regression is implementation is',end_time - start_time)

The RMSE for my Normal Equation Regression is,  8.712564884272545
The time taken for normal equation regression is implementation is 0.0012040138244628906


### Vanilla Gradient Descent

In [39]:
X = np.c_[np.ones(len(x_train)),x_train]
y = y_train
start_time = time.time()
val = gradientDescentRegression(X,y,alpha = 0.01,iteration=5000)
end_time = time.time()
intercept = val[0]
predicted = x_test.dot(gradientDescentRegression(X,y,alpha = 0.01,iteration=5000)[1:])+intercept
print('The RMSE for my Gradient Descent is, ',np.sqrt(mean_squared_error(predicted,y_test)))
print('The time taken for my implementation is',end_time - start_time)

The RMSE for my Gradient Descent is,  8.712564744776516
The time taken for my implementation is 0.03239750862121582


### Autograd Gradient Descent

In [40]:
X = np.c_[np.ones(len(x_train)),x_train]
y = y_train
start_time = time.time()
val = gradientDescentAutogradRegression(X,y,alpha = 0.01,iteration=5000)
end_time = time.time()
intercept = val[0]
predicted = x_test.dot(gradientDescentAutogradRegression(X,y,alpha = 0.01,iteration=5000)[1:])+intercept
print('The RMSE for my Gradient Descent is, ',np.sqrt(mean_squared_error(predicted,y_test)))
print('The time taken for my implementation is',end_time - start_time)

The RMSE for my Gradient Descent is,  8.712564764849184
The time taken for my implementation is 0.7496404647827148


### PyTorch Gradient Descent

In [41]:
X = np.c_[np.ones(len(x_train)),x_train]
y = y_train
start_time = time.time()
val = gradientDescentPyTorchRegression(X,y,alpha = 0.01,iteration=5000)
end_time = time.time()
intercept = val[0]
predicted = x_test.dot(gradientDescentPyTorchRegression(X,y,alpha = 0.01,iteration=5000)[1:])+intercept
print('The RMSE for my Gradient Descent is, ',np.sqrt(mean_squared_error(predicted,y_test)))
print('The time taken for my implementation is',end_time - start_time)

tensor([ 0.5588,  0.8796,  0.1395,  0.4163,  0.3972,  0.5320,  0.2983,
         0.2156])
tensor([ 0.5588,  0.8796,  0.1395,  0.4163,  0.3972,  0.5320,  0.2983,
         0.2156])


TypeError: Cannot find a common data type.