# Housing Price Prediction using LWR

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [2]:
df = pd.read_csv('Housing Price data set.csv', index_col=0)
df.head()

Unnamed: 0,price,lotsize,bedrooms,bathrms,stories,driveway,recroom,fullbase,gashw,airco,garagepl,prefarea
1,42000.0,5850,3,1,2,yes,no,yes,no,no,1,no
2,38500.0,4000,2,1,1,yes,no,no,no,no,0,no
3,49500.0,3060,3,1,1,yes,no,no,no,no,0,no
4,60500.0,6650,3,1,2,yes,yes,no,no,no,0,no
5,61000.0,6360,2,1,1,yes,no,no,no,no,0,no


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 546 entries, 1 to 546
Data columns (total 12 columns):
price       546 non-null float64
lotsize     546 non-null int64
bedrooms    546 non-null int64
bathrms     546 non-null int64
stories     546 non-null int64
driveway    546 non-null object
recroom     546 non-null object
fullbase    546 non-null object
gashw       546 non-null object
airco       546 non-null object
garagepl    546 non-null int64
prefarea    546 non-null object
dtypes: float64(1), int64(5), object(6)
memory usage: 55.5+ KB


## Preprocessing the data

In [4]:
df.driveway.replace(to_replace=['no', 'yes'], value=[0, 1], inplace=True)
df.recroom.replace(to_replace=['no','yes'], value=[0,1], inplace=True)
df.fullbase.replace(to_replace=['no','yes'], value=[0,1], inplace=True)
df.gashw.replace(to_replace=['no','yes'], value=[0,1], inplace=True)
df.airco.replace(to_replace=['no','yes'], value=[0,1], inplace=True)
df.prefarea.replace(to_replace=['no','yes'], value=[0,1], inplace=True)

In [5]:
Y = df['price']
X = df.drop(['price'], axis=1)

In [6]:
#Normalising Inputs(2D input)
def normalise(inp):
    return np.array((inp-inp.mean())/inp.std())

Y = np.array(Y)
Y = Y.reshape(Y.shape[0],1)
Y = normalise(Y)

X = normalise(X)
X = np.c_[np.ones(X.shape[0]),np.array(X)]

In [8]:
print(X[:5])
print(X.shape)

[[ 1.          0.32273211  0.04719162 -0.56897304  0.22150097  0.40481898
  -0.4643705   1.36206998 -0.21885329 -0.68040981  0.35723902 -0.55286459]
 [ 1.         -0.53052646 -1.30894664 -0.56897304 -0.93030406  0.40481898
  -0.4643705  -0.73283202 -0.21885329 -0.68040981 -0.80378779 -0.55286459]
 [ 1.         -0.96407405  0.04719162 -0.56897304 -0.93030406  0.40481898
  -0.4643705  -0.73283202 -0.21885329 -0.68040981 -0.80378779 -0.55286459]
 [ 1.          0.69170878  0.04719162 -0.56897304  0.22150097  0.40481898
   2.14950883 -0.73283202 -0.21885329 -0.68040981 -0.80378779 -0.55286459]
 [ 1.          0.55795474 -1.30894664 -0.56897304 -0.93030406  0.40481898
  -0.4643705  -0.73283202 -0.21885329 -0.68040981 -0.80378779 -0.55286459]]
(546, 12)


In [9]:
Y[:5]

array([[-0.97913617],
       [-1.11032939],
       [-0.69800783],
       [-0.28568626],
       [-0.26694437]])

## Defining functions

In [10]:
#Kernel to locally weight the samples
def kernel(X, xi, tau):
    return np.exp(-np.sum((xi-X)**2,axis=1)/(2*tau**2))

def LWR(X, xi, Y, tau):
    XW = X.T * kernel(X, xi, tau)
    theta = np.matmul(np.matmul(np.linalg.pinv(np.matmul(XW ,X)), XW), Y)
    
    # predict value
    return theta.T.dot(xi)

In [11]:
print(LWR(X,X[0],Y,tau=0.5))
print(Y[0])

[-0.90703628]
[-0.97913617]


In [12]:
def predict(X, Y, tau = 0.5):
    results = []
    for i in range(X.shape[0]):
        y_pred = LWR(X,X[i],Y,tau)
        results.append(y_pred)
    return results

In [13]:
def calculate_error(X, Y, Y_pred):
    error=0
    for i in range(X.shape[0]):
        error += (Y[i] - Y_pred[i]) ** 2
    error = error / X.shape[0]
    return error

## LWR prediction for different tau

In [14]:
# tau = 0.5
pred = predict(X,Y,tau=0.5)
print(calculate_error(X,Y,pred))

[0.03186231]


In [15]:
# tau = 0.05
pred = predict(X,Y,tau=0.05)
print(calculate_error(X,Y,pred))

[0.00876991]


In [16]:
# tau = 5
pred = predict(X,Y,tau=5)
print(calculate_error(X,Y,pred))

[0.31270592]
