In [1]:
#importing libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from math import exp
import copy
m = 546


The next two cells implement normal equation for LWR as well as normalize for the dataset.

In [2]:
#calculates Eta matrix for the normal equation
def calcTau(X , X1 , tau):
    return np.exp(np.transpose(X - X1) @ (X - X1) / (-2 * tau * tau))


def normal_eqn_lwr(X , X1 , Y , tau):    
    eta = X @ calcTau(X , X1 , tau)
    eta = np.transpose(eta)
    return np.linalg.pinv(eta @ X) @ (eta @ Y)

In [3]:
def normalize(df):
    for column in df:
        avg1 = (df[column].sum())/df[column].count()
        range1 = (df[column].max() - df[column].min())
        if(range1 == 0):
            df[column] = 1
            continue
        df[column] = (df[column] - avg1)/range1


In [4]:
#dropping the rows of the dataset that have "yes" or "no" attributes and normalizing the dataset,
df = pd.read_csv('House.csv')
df.drop(['driveway' , 'recroom' , 'fullbase' , 'gashw' , 'airco'  , 'prefarea'] , 1 , inplace=True)
df['Unnamed: 0'] = (df['Unnamed: 0'] > 0)*1
normalize(df)
df.head()

Unnamed: 0.1,Unnamed: 0,price,lotsize,bedrooms,bathrms,stories,garagepl
0,1,-0.158313,0.048092,0.00696,-0.095238,0.064103,0.102564
1,1,-0.179525,-0.079056,-0.19304,-0.095238,-0.269231,-0.230769
2,1,-0.112858,-0.143661,0.00696,-0.095238,-0.269231,-0.230769
3,1,-0.046191,0.103075,0.00696,-0.095238,0.064103,-0.230769
4,1,-0.043161,0.083143,-0.19304,-0.095238,-0.269231,-0.230769


In [5]:
# convert the pandas dataframe to a numpy matrix and reshaping it according to our requirements
Y = np.asmatrix(df.price)
Y = Y.reshape(m , 1)
df.drop('price' , axis = 1 , inplace=True)
X = np.asmatrix(df)

print(X.shape , Y.shape)

(546, 6) (546, 1)


In [6]:
#applying the normal equation for LWR to obtain the weights for a particular x -> vector of independent attributes
W = normal_eqn_lwr(X , X[2] , Y , 10)

In [7]:
for i in range(10):
    W = normal_eqn_lwr(X , X[i] , Y , 0.2)
    print(X[i].dot(W) - Y[i])

[[0.16516784]]
[[0.02629187]]
[[-0.02913232]]
[[0.01482158]]
[[-0.05486968]]
[[-0.10170889]]
[[0.04788238]]
[[0.02416377]]
[[-0.19364634]]
[[0.01084089]]
