In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [2]:
df = pd.read_csv('Housing Price data set.csv', index_col=0)

In [3]:
df.head()

Unnamed: 0,price,lotsize,bedrooms,bathrms,stories,driveway,recroom,fullbase,gashw,airco,garagepl,prefarea
1,42000.0,5850,3,1,2,yes,no,yes,no,no,1,no
2,38500.0,4000,2,1,1,yes,no,no,no,no,0,no
3,49500.0,3060,3,1,1,yes,no,no,no,no,0,no
4,60500.0,6650,3,1,2,yes,yes,no,no,no,0,no
5,61000.0,6360,2,1,1,yes,no,no,no,no,0,no


In [4]:
df.driveway.replace(to_replace=['no', 'yes'], value=[0, 1], inplace=True)
df.recroom.replace(to_replace=['no','yes'], value=[0,1], inplace=True)
df.fullbase.replace(to_replace=['no','yes'], value=[0,1], inplace=True)
df.gashw.replace(to_replace=['no','yes'], value=[0,1], inplace=True)
df.airco.replace(to_replace=['no','yes'], value=[0,1], inplace=True)
df.prefarea.replace(to_replace=['no','yes'], value=[0,1], inplace=True)

In [5]:
Y = df['price']
X = df.drop(['price'], axis=1)

In [6]:
X.head()

Unnamed: 0,lotsize,bedrooms,bathrms,stories,driveway,recroom,fullbase,gashw,airco,garagepl,prefarea
1,5850,3,1,2,1,0,1,0,0,1,0
2,4000,2,1,1,1,0,0,0,0,0,0
3,3060,3,1,1,1,0,0,0,0,0,0
4,6650,3,1,2,1,1,0,0,0,0,0
5,6360,2,1,1,1,0,0,0,0,0,0


In [7]:
Y.head()

1    42000.0
2    38500.0
3    49500.0
4    60500.0
5    61000.0
Name: price, dtype: float64

In [8]:
#Normalising Inputs(2D input)
def normalise(inp):
    return np.array((inp-inp.mean())/inp.std())

In [9]:
Y = np.array(Y)
Y = Y.reshape(Y.shape[0],1)
Y = normalise(Y)

In [10]:
X.shape

(546, 11)

In [11]:
X = normalise(X)

In [12]:
X[:5]

array([[ 0.32273211,  0.04719162, -0.56897304,  0.22150097,  0.40481898,
        -0.4643705 ,  1.36206998, -0.21885329, -0.68040981,  0.35723902,
        -0.55286459],
       [-0.53052646, -1.30894664, -0.56897304, -0.93030406,  0.40481898,
        -0.4643705 , -0.73283202, -0.21885329, -0.68040981, -0.80378779,
        -0.55286459],
       [-0.96407405,  0.04719162, -0.56897304, -0.93030406,  0.40481898,
        -0.4643705 , -0.73283202, -0.21885329, -0.68040981, -0.80378779,
        -0.55286459],
       [ 0.69170878,  0.04719162, -0.56897304,  0.22150097,  0.40481898,
         2.14950883, -0.73283202, -0.21885329, -0.68040981, -0.80378779,
        -0.55286459],
       [ 0.55795474, -1.30894664, -0.56897304, -0.93030406,  0.40481898,
        -0.4643705 , -0.73283202, -0.21885329, -0.68040981, -0.80378779,
        -0.55286459]])

In [13]:
Y[:5]

array([[-0.97913617],
       [-1.11032939],
       [-0.69800783],
       [-0.28568626],
       [-0.26694437]])

In [14]:
X = np.c_[np.ones(X.shape[0]),np.array(X)]

In [15]:
X[5]

array([ 1.        , -0.45673112,  0.04719162, -0.56897304, -0.93030406,
        0.40481898,  2.14950883,  1.36206998, -0.21885329,  1.46701074,
       -0.80378779, -0.55286459])

In [16]:
X.shape

(546, 12)

In [17]:
#Calculating W parameters using normal equations
W = np.matmul(np.matmul(np.linalg.inv(np.matmul(X.T,X)), X.T), Y)

In [18]:
print("Parameters: ", W)
print("Shape of W:", W.shape)

Parameters:  [[1.04083409e-17]
 [2.88210816e-01]
 [5.06367339e-02]
 [2.69834979e-01]
 [2.13386032e-01]
 [8.73298037e-02]
 [6.46931031e-02]
 [9.75587465e-02]
 [1.00626395e-01]
 [2.20510356e-01]
 [1.37044408e-01]
 [1.48922030e-01]]
Shape of W: (12, 1)


In [19]:
#Calculating Cost(Without Averaging)
cost = lambda X,Y,theta : 1/(2*X.shape[0])*np.sum(np.square(np.matmul(X,theta)-Y))

In [20]:
cost(X,Y,W)

0.1634381896792747

# Using Regularization

In [21]:
lambd=500

In [22]:
#Calculating W parameters using normal equations
W = np.matmul(np.matmul(np.linalg.pinv(np.matmul(X.T,X)+lambd), X.T), Y)

In [23]:
print("Parameters: ", W)
print("Shape of W:", W.shape)

Parameters:  [[-0.21947285]
 [ 0.23922913]
 [-0.01733403]
 [ 0.22209671]
 [ 0.08169397]
 [-0.03386699]
 [-0.02770585]
 [-0.05466806]
 [-0.12807807]
 [ 0.10103568]
 [ 0.03888904]
 [ 0.03784567]]
Shape of W: (12, 1)


In [24]:
#Calculating Cost(Without Averaging)
cost = lambda X,Y,theta,lambd : 1/(2*X.shape[0])*(np.sum(np.square(np.matmul(X,theta)-Y))+lambd*np.sum(theta**2))

In [25]:
cost(X,Y,W,lambd)

0.4111756460232463

In [26]:
#Trying different values of lambda
lambdas = [500,1000,1500]

In [27]:
for lambd in lambdas:
    #Calculating W parameters using normal equations
    W = np.matmul(np.matmul(np.linalg.pinv(np.matmul(X.T,X)+lambd), X.T), Y)
    print("Costs for lambd:",lambd)
    print(cost(X,Y,W,lambd))

Costs for lambd: 500
0.4111756460232463
Costs for lambd: 1000
0.5330343556160958
Costs for lambd: 1500
0.6406573247629825
