In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
df = pd.read_excel("5. ChoaChuKang Resale transactions 4_room Jan June_2023.xlsx", usecols="A:H", index_col=0)
display(df)

Unnamed: 0_level_0,Block,Street Name,Storey,Floor Area (sqm) /,Remaining Lease (years),Resale Price,Resale Registration Date
S/No,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,818A,Choa Chu Kang Ave 1,01 to 03,92,93,525000,2023-06-01
2,249,Choa Chu Kang Ave 2,04 to 06,104,69,485000,2023-06-01
3,289,Choa Chu Kang Ave 3,04 to 06,104,69,445000,2023-06-01
4,429,Choa Chu Kang Ave 4,10 to 12,104,69,450000,2023-06-01
5,442,Choa Chu Kang Ave 4,13 to 15,91,72,448888,2023-06-01
...,...,...,...,...,...,...,...
290,8,Teck Whye Ave,04 to 06,92,54,410000,2023-01-01
291,8,Teck Whye Ave,10 to 12,92,54,420000,2023-01-01
292,15,Teck Whye Lane,07 to 09,92,55,410000,2023-01-01
293,103,Teck Whye Lane,04 to 06,106,65,446888,2023-01-01


## Question 1

Given that,

$
\begin{equation}
E(b) = \frac{1}{294} \sum_{i=1}^{294} (y_{i} - bx{_i})^2
\nonumber
\end{equation}
$

The derivative of the error function would be:

$
\begin{align}
E'(b) &= \frac{d}{db} \left( \frac{1}{294} \sum_{i=1}^{294} (y_{i} - bx{_i})^2 \right) \nonumber
\\&= \frac{1}{294} \sum_{i=1}^{294} 2 \cdot (y_{i} - bx{_i})\, (-x_{i}) \nonumber
\\&= - \frac{1}{147} \sum_{i=1}^{294} (y_{i} - bx{_i})\,x_{i} \nonumber
\end{align}
$

In [3]:
x = df['Remaining Lease (years)'].values # features for linear regression
y = df['Resale Price'].values # target variable for linear regression
n = 294 # number of rows

b = 100 # Starting value of b
rate = 0.00_001 # Set learning rate
epsilon = 0.000_001 # Stop algorithm when absolute difference between 2 consecutive x-values is less than epsilon
diff = 1 # difference between 2 consecutive iterates
max_iter = 1000 # set maximum number of iterations
iter = 1 # iterations counter
e = lambda b: 1/n * np.sum((y - np.dot(b,x))**2) # error function
deriv = lambda b: -2*(1/n) * np.matmul((y - np.dot(b,x)),x)

# Now Gradient Descent

while diff > epsilon and iter < max_iter:
    b_new = b - rate * deriv(b)
    print(f"Iteration: {iter}, b-value is: {b_new:.2f}, E(b) is: {e(b_new):.2f}, derivative is: {deriv(b_new):.2f}")
    diff = abs(b_new - b)
    iter = iter + 1
    b = b_new

print('\n')
print(f"Number of iterations is {iter}\nThe local minimum occurs when b is {b:.2f}\nMinimum error is {e(b):,.2f}")

Iteration: 1, b-value is: 856.73, E(b) is: 186710346177.23, derivative is: -66539098.87
Iteration: 2, b-value is: 1522.12, E(b) is: 145107835637.94, derivative is: -58507711.95
Iteration: 3, b-value is: 2107.20, E(b) is: 112942214786.42, derivative is: -51445727.63
Iteration: 4, b-value is: 2621.65, E(b) is: 88072870332.17, derivative is: -45236137.31
Iteration: 5, b-value is: 3074.02, E(b) is: 68844754187.93, derivative is: -39776055.53
Iteration: 6, b-value is: 3471.78, E(b) is: 53978240471.16, derivative is: -34975015.28
Iteration: 7, b-value is: 3821.53, E(b) is: 42483966750.68, derivative is: -30753469.07
Iteration: 8, b-value is: 4129.06, E(b) is: 33596992246.89, derivative is: -27041470.96
Iteration: 9, b-value is: 4399.48, E(b) is: 26725891231.58, derivative is: -23777517.59
Iteration: 10, b-value is: 4637.25, E(b) is: 21413393807.12, derivative is: -20907529.16
Iteration: 11, b-value is: 4846.33, E(b) is: 17305954719.56, derivative is: -18383953.41
Iteration: 12, b-value is: 5

## Question 2

Since,

$
\begin{align}
E(a, b) &= \frac{1}{294} \sum_{i=1}^{294} (y_{i} - (bx_i + a))^2 \nonumber
\\&= \frac{1}{294} \sum_{i=1}^{294} (y_{i} - bx_i - a)^2 \nonumber
\end{align}
$

We would need to use partial differentiation to obtain the two derivatives of the error function (one w.r.t a and another w.r.t. b)


$
\begin{align}
E_a(a, b) &= \frac{\partial}{\partial a} \left( \frac{1}{294} \sum_{i=1}^{294} (y_{i} - bx_i - a)^2 \right) \nonumber
\\&= \frac{1}{294} \sum_{i=1}^{294} 2 \cdot (y_{i} - bx_i - a)\, (-1) \nonumber
\\&= - \frac{1}{147} \sum_{i=1}^{294} (y_{i} - bx_i - a) \nonumber
\end{align}
$

$
\begin{align}
E_b(a, b) &= \frac{\partial}{\partial b} \left( \frac{1}{294} \sum_{i=1}^{294} (y_{i} - bx_i - a)^2 \right) \nonumber
\\&= \frac{1}{294} \sum_{i=1}^{294} 2 \cdot (y_{i} - bx_i - a)\, (-x_{i}) \nonumber
\\&= - \frac{1}{147} \sum_{i=1}^{294} (y_{i} - bx_i - a)\, x_{i} \nonumber
\end{align}
$

In [4]:
x = df['Remaining Lease (years)'].values # features for linear regression
y = df['Resale Price'].values # target variable for linear regression
n = 294 # number of rows

a = 1000 # Starting value of a
b = 1000 # Starting value of b

rate_a = 0.1 # Set learning rate of a
rate_b = 0.000_01 # Set learning rate of b


epsilon = 0.000_001 # Stop algorithm when absolute difference between 2 consecutive x-values is less than epsilon
change_func = 1 # difference between 2 consecutive iterates
max_iter = 5_000_000 # set maximum number of iterations
iter = 1 # iterations counter
e = lambda a, b: 1/n * np.sum((y - (np.dot(b,x) + a))**2) # error function
partiale_a = lambda a, b: -2*(1/n) * np.sum(y - (np.dot(b,x) + a)) # derivative of error function with respect to a
partiale_b = lambda a, b: -2*(1/n) * np.matmul((y - (np.dot(b,x) + a)),x) # derivative of error function with respect to b

cur_e = e(a,b) # current error

# Now Gradient Descent

while change_func > epsilon and iter < max_iter:
    # Update a and b
    a_new = a - rate * partiale_a(a,b)
    b_new = b - rate * partiale_b(a,b)

    # Update error based on new values of a and b
    new_e = e(a_new,b_new)

    change_func = abs(new_e-cur_e) # stopping criterion: values of function converge

    iter += 1

    cur_e = new_e
    a = a_new
    b = b_new

print('\n')
print(f"Number of iterations is {iter}\nThe local minimum occurs when a is {a:.2f} and b is {b:.2f}\nMinimum error is {e(a, b):,.2f}")



Number of iterations is 5000000
The local minimum occurs when a is 279705.85 and b is 2807.67
Minimum error is 1,186,853,018.02


## Question 3

#### Question 3a: Data Collection

Initially with just one predictor, this is how the dataset looks like:

In [5]:
display(
    df[['Remaining Lease (years)', 'Resale Price']]
    .set_axis(['Remaining Lease (x)', 'Resale Price (y)'], axis=1, inplace=False)
    .head(10)
)

Unnamed: 0_level_0,Remaining Lease (x),Resale Price (y)
S/No,Unnamed: 1_level_1,Unnamed: 2_level_1
1,93,525000
2,69,485000
3,69,445000
4,69,450000
5,72,448888
6,75,470000
7,89,510000
8,74,460000
9,75,455000
10,92,520000


Looking at the initial data source:

In [6]:
display(df.head(10))

Unnamed: 0_level_0,Block,Street Name,Storey,Floor Area (sqm) /,Remaining Lease (years),Resale Price,Resale Registration Date
S/No,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,818A,Choa Chu Kang Ave 1,01 to 03,92,93,525000,2023-06-01
2,249,Choa Chu Kang Ave 2,04 to 06,104,69,485000,2023-06-01
3,289,Choa Chu Kang Ave 3,04 to 06,104,69,445000,2023-06-01
4,429,Choa Chu Kang Ave 4,10 to 12,104,69,450000,2023-06-01
5,442,Choa Chu Kang Ave 4,13 to 15,91,72,448888,2023-06-01
6,465,Choa Chu Kang Ave 4,04 to 06,104,75,470000,2023-06-01
7,476C,Choa Chu Kang Ave 5,13 to 15,92,89,510000,2023-06-01
8,484C,Choa Chu Kang Ave 5,10 to 12,101,74,460000,2023-06-01
9,485A,Choa Chu Kang Ave 5,01 to 03,101,75,455000,2023-06-01
10,489C,Choa Chu Kang Ave 5,07 to 09,93,92,520000,2023-06-01


We can see some columns that are potentially useful for our Linear Regression model such as Floor Area. By including Floor Area as another predictor for our model as w, we will obtain the following for our predictor yhat:

$\begin{align}
\hat{y} &= bx + cw + a
\\&= b\,Remaining Lease + c\,Floor Area + a
\nonumber
\end{align}$

This is how the inserted data now looks like:

In [7]:
display(
    df[['Remaining Lease (years)', 'Floor Area (sqm) /', 'Resale Price']]
    .set_axis(['Remaining Lease (x)', 'Floor Area (w)', 'Resale Price (y)'], axis=1, inplace=False)
    .head(10)
)

Unnamed: 0_level_0,Remaining Lease (x),Floor Area (w),Resale Price (y)
S/No,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,93,92,525000
2,69,104,485000
3,69,104,445000
4,69,104,450000
5,72,91,448888
6,75,104,470000
7,89,92,510000
8,74,101,460000
9,75,101,455000
10,92,93,520000


#### Question 3b: Implementation of Error Function

With the addition of the two new predictors, each residual would be equal to $y - (bx_i + cw_i + a)$. Hence, the error function would now be:
$
\begin{align}
E(a, b, c) &= \frac{1}{294} \sum_{i=1}^{294} (y_{i} - (bx_i + cw_i + a))^2 \nonumber
\\&= \frac{1}{294} \sum_{i=1}^{294} (y_{i} - bx_i - cw_i - a)^2 \nonumber
\end{align}
$

Since our error function has three input variables (a,b and c), we would need to use partial differentiation to obtain the partial derivatives with respect to each of the variables. This would give us the following three equations

$
\begin{align}
E_a(a, b, c) &= \frac{\partial}{\partial a} \left( \frac{1}{294} \sum_{i=1}^{294} (y_{i} - bx_i - cw_i - a)^2 \right) \nonumber
\\&= \frac{1}{294} \sum_{i=1}^{294} 2 \cdot (y_{i} - bx_i - cw_i - a)\, (-1) \nonumber
\\&= - \frac{1}{147} \sum_{i=1}^{294} (y_{i} - bx_i - cw_i - a) \tag a
\end{align}
$

$
\begin{align}
E_b(a, b, c) &= \frac{\partial}{\partial b} \left( \frac{1}{294} \sum_{i=1}^{294} (y_{i} - bx_i - cw_i - a)^2 \right) \nonumber
\\&= \frac{1}{294} \sum_{i=1}^{294} 2 \cdot (y_{i} - bx_i - cw_i - a)\, (-x_{i}) \nonumber
\\&= - \frac{1}{147} \sum_{i=1}^{294} (y_{i} - bx_i - cw_i - a)\, x_{i} \tag b
\end{align}
$

$
\begin{align}
E_c(a, b, c) &= \frac{\partial}{\partial c} \left( \frac{1}{294} \sum_{i=1}^{294} (y_{i} - bx_i - cw_i - a)^2 \right) \nonumber
\\&= \frac{1}{294} \sum_{i=1}^{294} 2 \cdot (y_{i} - bx_i - cw_i - a)\, (-w_i) \nonumber
\\&= - \frac{1}{147} \sum_{i=1}^{294} (y_{i} - bx_i - cw_i - a)\, w_i \tag c
\end{align}
$

These three derivatives will be needed to update the values of a, b and c during gradient descent in order to converge at the minimum of the error function. Each variable will be updated in the following manner:

$\begin{align}
a := a - \alpha_a \, E_a(a, b, c, d) \tag a
\\
b := b - \alpha_b \, E_a(a, b, c, d) \tag b
\\
c := c - \alpha_c \, E_a(a, b, c, d) \tag c
\end{align}$

with each of the variables having their own unique learning rate

#### Question 3c: Implementation of Code

In [8]:
x = df['Remaining Lease (years)'].values # features for linear regression
w = df['Floor Area (sqm) /'].values # predictor for linear regression
y = df['Resale Price'].values # target variable for linear regression
n = 294 # number of rows

a = 1000 # Starting value of a
b = 1000 # Starting value of b
c = 1000 # Starting value of c

rate_a = 0.01 # Set learning rate for a
rate_b = 0.000_01 # Set learning rate for b
rate_c = 0.000_01 # Set learning rate for c

epsilon = 0.000_000_000_001 # Stop algorithm when absolute difference between 2 consecutive x-values is less than epsilon
change_func = 1 # difference between 2 consecutive iterates
max_iter = 1_000_000 # set maximum number of iterations
iter = 1 # iterations counter

e = lambda a, b, c: 1/n * np.sum((y - (np.dot(b,x) + np.dot(c,w) + a))**2) # error function
partiale_a = lambda a, b, c: -2*(1/n) * np.sum(y - (np.dot(b,x) + np.dot(c,w) + a)) # derivative of error function with respect to a
partiale_b = lambda a, b, c: -2*(1/n) * np.matmul((y - (np.dot(b,x) + np.dot(c,w) + a)),x) # derivative of error function with respect to b
partiale_c = lambda a, b, c: -2*(1/n) * np.matmul((y - (np.dot(b,x) + np.dot(c,w) + a)),w) # derivative of error function with respect to c

cur_e = e(a,b,c) # current error

# Now Gradient Descent

while change_func > epsilon and iter < max_iter:
    # Update a, b and c values
    a_new = a - rate_a * partiale_a(a,b,c)
    b_new = b - rate_b * partiale_b(a,b,c)
    c_new = c - rate_c * partiale_c(a,b,c)

    # Update error based on new values of a and b
    new_e = e(a_new,b_new,c_new)

    change_func = abs(new_e-cur_e) # stopping criterion: values of function converge

    if (iter%100==0):
        print(f"Iter: {iter}, a: {a}, b: {b}, c: {c}")

    iter += 1

    cur_e = new_e
    a = a_new
    b = b_new
    c = c_new

print('\n')
print(f"Number of iterations is {iter}\nThe local minimum occurs when a is {a:.2f}, b is {b:.2f} and c is {c:.2f}\nMinimum error is {e(a,b,c):,.2f}")

Iter: 100, a: 19480.727846987265, b: 2646.9483707640275, c: 2750.457272404878
Iter: 200, a: 19068.79354179704, b: 2785.285367088806, c: 2647.3781311049274
Iter: 300, a: 18716.317377938813, b: 2888.3032754973297, c: 2571.0755824416974
Iter: 400, a: 18408.190298218644, b: 2965.0631729897364, c: 2514.6786626929047
Iter: 500, a: 18133.179233083003, b: 3022.301387779175, c: 2473.079256655248
Iter: 600, a: 17882.932661314648, b: 3065.0259472504104, c: 2442.479813320177
Iter: 700, a: 17651.241308526976, b: 3096.959998557989, c: 2420.057101642237
Iter: 800, a: 17433.488524160308, b: 3120.8714632491806, c: 2403.7122346339306
Iter: 900, a: 17226.24167221397, b: 3138.8181142199483, c: 2391.8848283987877
Iter: 1000, a: 17026.948356598088, b: 3152.329774629152, c: 2383.414841355515
Iter: 1100, a: 16833.71058426573, b: 3162.5437710430374, c: 2377.439860609837
Iter: 1200, a: 16645.116870058995, b: 3170.305634114161, c: 2373.318740994962
Iter: 1300, a: 16460.11741748369, b: 3176.243963042972, c: 2370.

In [9]:
# Use a numpy array to store all the predictions from our model
predictions = np.zeros(n)

x = 'Remaining Lease (years)'
w = 'Floor Area (sqm) /'
y = 'Resale Price'

coefficients = np.array([b,c])
predictors = np.array([df[x], df[w]])

yhat = coefficients @ predictors + a
y = df[y].values

from sklearn.metrics import r2_score, mean_squared_error

print(f"RMSE: {mean_squared_error(y, yhat, squared=False):,.2f}, R2: {r2_score(y, yhat):.2f}")

RMSE: 30,425.17, R2: 0.50
