In [1]:
# Run this cell only if using Google Colab

from google.colab import drive 
drive.mount('/content/drive')
%cd /content/drive/My\ Drive/

Mounted at /content/drive
/content/drive/My Drive


In [2]:
# Run this cell to initialise the training data and train the model
# Note that 201701to202303_processed.csv has to be in the same folder as this notebook

import pandas as pd
import numpy as np
import re
from xgboost import XGBRegressor
from sklearn.metrics import mean_squared_error, r2_score
df = pd.read_csv("201701to202303_processed.csv")
X = df.drop(["resale_price"], axis=1)
y = df.resale_price
X = X.to_numpy()
best_xgbr = XGBRegressor(learning_rate= 0.2, max_depth= 6, n_estimators= 500)
best_xgbr.fit(X, y)

In [14]:
# Run this cell to enter flat information and make predictions

input_data = np.zeros(52)

# Retrieve index based on first index 2017-01 as 1
def get_month_index(month_str):
    pattern = r'^20\d{2}-(0?[1-9]|1[0-2])$'
    if not re.match(pattern, month_str):
        print("Invalid input")
        exit()
    year, month = month_str.split('-')
    year_diff = int(year) - 2017
    month_diff = (int(month) - 1)
    final = year_diff * 12 + month_diff
    input_data[0] = int(final)
    #print(year_diff * 12 + month_diff)

# Lookup flat_type
def lookup_flat_type(input_value):
    found = False
    for key, value in flat_type_data.items():
        if str(input_value) in value:
            #print("Flat type value", key)
            input_data[1] = int(key)
            found = True
    if not found:
        print("Invalid input")
        exit()

# Lookup storey_range_data
def lookup_storey_range(input_storey):
    found = False
    for index, storey_range in storey_range_data.items():
        range_start, range_end = map(int, storey_range.split(" TO "))
        if range_start <= input_storey <= range_end:
            #print("Storey value", index)
            input_data[2] = int(index)
            found = True
    if not found:
        print("Invalid input")
        exit()

# Lookup town
def lookup_town(input_town):
    found = False
    for key, town in town_data.items():
        if str(input_town).lower() in town.lower():
            #print("Town value", key)
            input_data[key+3] = 1
            found = True
    if not found:
        print("Invalid input")
        exit()

# Lookup flat_model
def lookup_flat_model(input_model):
    found = False
    for key, model in flat_model_data.items():
        if str(input_model).lower() == model.lower():
            #print("Flat model value", key)
            input_data[key+29] = 1
            found = True
    if not found:
        print("Invalid input")
        exit()

storey_range_data = {0: '01 TO 03', 1: '04 TO 06', 2: '07 TO 09', 3: '10 TO 12', 4: '13 TO 15', 5: '16 TO 18', 6: '19 TO 21', 7: '22 TO 24', 8: '25 TO 27', 9: '28 TO 30', 10: '31 TO 33', 11: '34 TO 36', 12: '37 TO 39', 13: '40 TO 42', 14: '43 TO 45', 15: '46 TO 48', 16: '49 TO 51'}
flat_type_data = {0: '1 ROOM', 1: '2 ROOM', 2: '3 ROOM', 3: '4 ROOM', 4: '5 ROOM', 5: 'EXECUTIVE', 6: 'MULTI-GENERATION'}
town_data = {0: 'ANG MO KIO', 1: 'BEDOK', 2: 'BISHAN', 3: 'BUKIT BATOK', 4: 'BUKIT MERAH', 5: 'BUKIT PANJANG', 6: 'BUKIT TIMAH', 7: 'CENTRAL AREA', 8: 'CHOA CHU KANG', 9: 'CLEMENTI', 10: 'GEYLANG', 11: 'HOUGANG', 12: 'JURONG EAST', 13: 'JURONG WEST', 14: 'KALLANG/WHAMPOA', 15: 'MARINE PARADE', 16: 'PASIR RIS', 17: 'PUNGGOL', 18: 'QUEENSTOWN', 19: 'SEMBAWANG', 20: 'SENGKANG', 21: 'SERANGOON', 22: 'TAMPINES', 23: 'TOA PAYOH', 24: 'WOODLANDS', 25: 'YISHUN'}
flat_model_data = {0: '2-room', 1: '3Gen', 2: 'Adjoined flat', 3: 'Apartment', 4: 'DBSS', 5: 'Improved', 6: 'Improved-Maisonette', 7: 'Maisonette', 8: 'Model A', 9: 'Model A-Maisonette', 10: 'Model A2', 11: 'Multi Generation', 12: 'New Generation', 13: 'Premium Apartment', 14: 'Premium Apartment Loft', 15: 'Premium Maisonette', 16: 'Simplified', 17: 'Standard', 18: 'Terrace', 19: 'Type S1', 20: 'Type S2'}

# Input month
input_date = str(input("Enter year-month, e.g 2023-05: "))
month_index = get_month_index(input_date)

# Input flat type
input_flat_type = str(input("Enter number of rooms, e.g 3 or 'executive', or 'multi-generation': "))
flat_type_index = lookup_flat_type(input_flat_type)

# Input storey
input_storey = int(input("Enter the storey number, e.g 33: "))
storey_index = lookup_storey_range(input_storey)

# Input town
input_town = str(input("Enter the name of the town, e.g Hougang: "))
town_index = lookup_town(input_town)

# Input flat model
input_flat_model = str(input("Enter flat model, e.g Standard: "))
flat_model_index = lookup_flat_model(input_flat_model)

# Input floor area
input_data[50] = int(input("Enter floor area in sqm: "))

# Input lease commencement date
input_data[51] = int(input("Enter your lease commencement year, e.g 2003: "))

print("\n")
print(f"The estimated resale price as of {input_date} for a {input_flat_type} room flat on the {input_storey} floor in {input_town}, with flat model \n as {input_flat_model}, floor area in sqm as {input_data[50]} with its lease commencement year in {int(input_data[51])} is", best_xgbr.predict([input_data]))

Enter year-month, e.g 2023-05: 2026-10
Enter number of rooms, e.g 3 or 'executive', or 'multi-generation': 5
Enter the storey number, e.g 33: 25
Enter the name of the town, e.g Hougang: Punggol
Enter flat model, e.g Standard: Standard
Enter floor area in sqm: 80
Enter your lease commencement year, e.g 2003: 2001


The estimated resale price as of 2026-10 for a 5 room flat on the 25 floor in Punggol, with flat model 
 as Standard, floor area in sqm as 80.0 with its lease commencement year in 2001 is [534446.4]
