In [178]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [179]:
df = pd.read_csv('Housing Price data set.csv', index_col=0)

In [180]:
df.head()

Unnamed: 0,price,lotsize,bedrooms,bathrms,stories,driveway,recroom,fullbase,gashw,airco,garagepl,prefarea
1,42000.0,5850,3,1,2,yes,no,yes,no,no,1,no
2,38500.0,4000,2,1,1,yes,no,no,no,no,0,no
3,49500.0,3060,3,1,1,yes,no,no,no,no,0,no
4,60500.0,6650,3,1,2,yes,yes,no,no,no,0,no
5,61000.0,6360,2,1,1,yes,no,no,no,no,0,no


In [181]:
df.shape

(546, 12)

In [182]:
df.driveway.replace(to_replace=['no', 'yes'], value=[0, 1], inplace=True)
df.recroom.replace(to_replace=['no','yes'], value=[0,1], inplace=True)
df.fullbase.replace(to_replace=['no','yes'], value=[0,1], inplace=True)
df.gashw.replace(to_replace=['no','yes'], value=[0,1], inplace=True)
df.airco.replace(to_replace=['no','yes'], value=[0,1], inplace=True)
df.prefarea.replace(to_replace=['no','yes'], value=[0,1], inplace=True)

In [183]:
Y = df['price']
X = df.drop(['price'], axis=1)

In [184]:
#Normalising Inputs(2D input)
def normalise(inp):
    return np.array((inp-inp.mean())/inp.std())

In [185]:
Y = np.array(Y)
Y = Y.reshape(Y.shape[0],1)
Y = normalise(Y)

In [186]:
X.shape

(546, 11)

In [187]:
X = normalise(X)

In [188]:
X[:5]

array([[ 0.32273211,  0.04719162, -0.56897304,  0.22150097,  0.40481898,
        -0.4643705 ,  1.36206998, -0.21885329, -0.68040981,  0.35723902,
        -0.55286459],
       [-0.53052646, -1.30894664, -0.56897304, -0.93030406,  0.40481898,
        -0.4643705 , -0.73283202, -0.21885329, -0.68040981, -0.80378779,
        -0.55286459],
       [-0.96407405,  0.04719162, -0.56897304, -0.93030406,  0.40481898,
        -0.4643705 , -0.73283202, -0.21885329, -0.68040981, -0.80378779,
        -0.55286459],
       [ 0.69170878,  0.04719162, -0.56897304,  0.22150097,  0.40481898,
         2.14950883, -0.73283202, -0.21885329, -0.68040981, -0.80378779,
        -0.55286459],
       [ 0.55795474, -1.30894664, -0.56897304, -0.93030406,  0.40481898,
        -0.4643705 , -0.73283202, -0.21885329, -0.68040981, -0.80378779,
        -0.55286459]])

In [189]:
Y[:5]

array([[-0.97913617],
       [-1.11032939],
       [-0.69800783],
       [-0.28568626],
       [-0.26694437]])

In [190]:
X = np.c_[np.ones(X.shape[0]),np.array(X)]

In [191]:
X[5]

array([ 1.        , -0.45673112,  0.04719162, -0.56897304, -0.93030406,
        0.40481898,  2.14950883,  1.36206998, -0.21885329,  1.46701074,
       -0.80378779, -0.55286459])

In [192]:
X.shape

(546, 12)

In [193]:
#Kernel to locally weight the samples
kernel = lambda X,xi,tau : np.exp(-np.sum((xi-X)**2,axis=1)/(2*tau**2))

In [194]:
#Calculating W paramters to get minimized Cost using normal equation for each training example
def LWR(X, xi, Y, tau):
    XW = X.T * kernel(X, xi, tau)
    theta = np.matmul(np.matmul(np.linalg.pinv(np.matmul(XW ,X)), XW), Y)
    
    # predict value
    return theta.T@xi

In [195]:
LWR(X,X[0],Y,tau=0.5)

array([-0.90703628])

In [196]:
Y[0]

array([-0.97913617])

In [202]:
#Now predicting Y for every query point xi
pred = []
for i in range(X.shape[0]):
    y_pred = LWR(X,X[i],Y,tau=0.05)
    pred.append(y_pred)

In [204]:
#Showing first 10 values of predictions
print(Y[:10])
print(pred[:10])

[[-0.97913617]
 [-1.11032939]
 [-0.69800783]
 [-0.28568626]
 [-0.26694437]
 [-0.07952548]
 [-0.07952548]
 [ 0.03292586]
 [ 0.58768579]
 [ 0.76385955]]
[array([-0.97913617]), array([-0.76811151]), array([-0.69800783]), array([-0.28568626]), array([-0.27052604]), array([-0.07952548]), array([-0.07952548]), array([0.03292558]), array([0.58768579]), array([0.76385955])]


In [231]:
#Taking the batch size of 20
#Calculating W paramters to get minimized Cost using normal equation for each training example
def LWR(X, xi, Y, tau):
    W = kernel(X,xi,tau)
    indices = np.argsort(W)[::-1]
    W_sorted, X_sorted, Y_sorted = W[indices], X[indices], Y[indices]
    XW = X_sorted[:20].T * W_sorted[:20]
    theta = np.matmul(np.matmul(np.linalg.pinv(np.matmul(XW ,X_sorted[:20])), XW), Y_sorted[:20])
    
    # predict value
    return theta.T@xi

In [234]:
LWR(X,X[500],Y,tau=0.5)

array([-0.00831615])

In [235]:
Y[500]

array([-0.0083063])