# HDB Resale Price Prediction

## Case Studies

### Import Libraries

In [2]:
import random
random.seed(42)
import numpy as np
import pandas as pd
import pickle
import folium
from datetime import datetime
from math import radians
from sklearn.metrics.pairwise import haversine_distances
from sklearn.neighbors import NearestNeighbors

# Warnings
import warnings
warnings.filterwarnings('ignore')

### Import Trained Random Forest Regressor

In [3]:
with open('price_model.pkl', 'rb') as f:
        price_model = pickle.load(f)

### Import HDB Test Dataset

In [4]:
# Importing Data I
data_hdb_test_x = pd.read_csv('../dataset/hdb_test_x.csv')
data_hdb_test_x.index += 1
data_hdb_test_x

Unnamed: 0,floor_area_sqm,date_sold,lease_commence_date,remaining_lease,nearest_distance_to_mrt,healthcare_within_1km_count,healthcare_within_1km_average_rating,healthcare_within_2km_count,healthcare_within_2km_average_rating,recreational_within_1km_count,...,storey_range_26 TO 30,storey_range_31 TO 35,storey_range_36 TO 40,storey_range_41 TO 50,region_Central,region_City,region_East,region_North,region_South,region_West
1,104.0,2023-01-01,1983,59.416667,0.777,4.0,2.925000,10.0,2.530000,0.0,...,False,False,False,False,False,False,False,False,False,True
2,122.0,2022-01-01,1996,73.666667,0.365,7.0,2.471429,74.0,2.728378,2.0,...,True,False,False,False,True,False,False,False,False,False
3,121.0,2011-08-01,1996,84.000000,0.421,1.0,4.800000,6.0,3.683333,4.0,...,False,False,False,False,False,False,False,False,False,True
4,104.0,2018-05-01,1998,79.166667,1.307,4.0,2.050000,22.0,2.054545,2.0,...,False,False,False,False,True,False,False,False,False,False
5,91.0,2014-08-01,1980,65.000000,0.773,9.0,2.444444,18.0,2.550000,1.0,...,False,False,False,False,False,False,False,True,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
63368,112.0,2021-09-01,2016,93.833333,0.267,4.0,3.300000,12.0,2.791667,4.0,...,False,False,False,False,False,False,False,True,False,False
63369,74.0,2022-06-01,1986,62.833333,0.756,5.0,3.460000,9.0,2.555556,1.0,...,False,False,False,False,False,False,False,False,False,True
63370,113.0,2022-12-01,2016,92.416667,1.260,4.0,2.975000,16.0,2.306250,2.0,...,False,False,False,False,False,False,False,True,False,False
63371,61.0,2019-05-01,1974,54.000000,1.013,1.0,3.300000,10.0,2.350000,2.0,...,False,False,False,False,False,False,False,False,False,True


In [5]:
# Understanding Data I
data_hdb_test_x.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 63372 entries, 1 to 63372
Data columns (total 40 columns):
 #   Column                                  Non-Null Count  Dtype  
---  ------                                  --------------  -----  
 0   floor_area_sqm                          63372 non-null  float64
 1   date_sold                               63372 non-null  object 
 2   lease_commence_date                     63372 non-null  int64  
 3   remaining_lease                         63372 non-null  float64
 4   nearest_distance_to_mrt                 63372 non-null  float64
 5   healthcare_within_1km_count             63372 non-null  float64
 6   healthcare_within_1km_average_rating    63372 non-null  float64
 7   healthcare_within_2km_count             63372 non-null  float64
 8   healthcare_within_2km_average_rating    63372 non-null  float64
 9   recreational_within_1km_count           63372 non-null  float64
 10  recreational_within_1km_average_rating  63372 non-null  fl

In [6]:
# Importing Data II
data_hdb_test_y = pd.read_csv('../dataset/hdb_test_y.csv')
data_hdb_test_y.index += 1
data_hdb_test_y

Unnamed: 0,resale_price
1,528000.0
2,932000.0
3,506000.0
4,580000.0
5,460000.0
...,...
63368,669000.0
63369,345000.0
63370,670000.0
63371,190000.0


In [7]:
# Understanding Data II
data_hdb_test_y.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 63372 entries, 1 to 63372
Data columns (total 1 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   resale_price  63372 non-null  float64
dtypes: float64(1)
memory usage: 495.2 KB


### Import HDB Merged Dataset

In [8]:
# Import Cleaned Dataset 
hdb_last15_cleaned = pd.read_csv('../dataset/hdb_last15_cleaned.csv')
hdb_last15_cleaned

Unnamed: 0,town,flat_type,flat_model,floor_area_sqm,resale_price,month,lease_commence_date,storey_range,block,remaining_lease,...,recreational_within_1km_average_rating,recreational_within_2km_count,recreational_within_2km_average_rating,education_within_1km_count,education_within_1km_average_rating,education_within_2km_count,education_within_2km_average_rating,postal_code,region,price_per_sqm
0,SEMBAWANG,5 ROOM,Premium Apartment,111.0,362000.0,2009-07-01,2001,01 TO 05,357A,91.000000,...,3.733333,6.0,3.916667,4.0,4.275,8.0,4.250,751357.0,North,3261.261261
1,SEMBAWANG,5 ROOM,Premium Apartment,110.0,370000.0,2009-08-01,2001,06 TO 10,357A,91.000000,...,3.733333,6.0,3.916667,4.0,4.275,8.0,4.250,751357.0,North,3363.636364
2,SEMBAWANG,5 ROOM,Premium Apartment,110.0,403000.0,2010-01-01,2001,16 TO 20,357A,90.000000,...,3.733333,6.0,3.916667,4.0,4.275,8.0,4.250,751357.0,North,3663.636364
3,SEMBAWANG,4 ROOM,Premium Apartment,95.0,350000.0,2010-07-01,2001,01 TO 05,357A,90.000000,...,3.733333,6.0,3.916667,4.0,4.275,8.0,4.250,751357.0,North,3684.210526
4,SEMBAWANG,4 ROOM,Premium Apartment,95.0,399000.0,2010-07-01,2001,10 TO 15,357A,90.000000,...,3.733333,6.0,3.916667,4.0,4.275,8.0,4.250,751357.0,North,4200.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
316853,BUKIT MERAH,5 ROOM,Improved,114.0,921000.0,2022-10-01,1974,10 TO 15,87,50.750000,...,4.057143,78.0,4.052564,3.0,2.700,25.0,3.348,160087.0,South,8078.947368
316854,BUKIT MERAH,5 ROOM,Improved,117.0,930000.0,2022-10-01,1974,01 TO 05,87,50.750000,...,4.057143,78.0,4.052564,3.0,2.700,25.0,3.348,160087.0,South,7948.717949
316855,BUKIT MERAH,5 ROOM,Improved,117.0,978000.0,2022-12-01,1974,10 TO 15,87,50.666667,...,4.057143,78.0,4.052564,3.0,2.700,25.0,3.348,160087.0,South,8358.974359
316856,BUKIT MERAH,5 ROOM,Improved,114.0,950000.0,2022-12-01,1974,21 TO 25,87,50.583333,...,4.057143,78.0,4.052564,3.0,2.700,25.0,3.348,160087.0,South,8333.333333


In [9]:
# Understanding Data 
hdb_last15_cleaned.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 316858 entries, 0 to 316857
Data columns (total 35 columns):
 #   Column                                  Non-Null Count   Dtype  
---  ------                                  --------------   -----  
 0   town                                    316858 non-null  object 
 1   flat_type                               316858 non-null  object 
 2   flat_model                              316858 non-null  object 
 3   floor_area_sqm                          316858 non-null  float64
 4   resale_price                            316858 non-null  float64
 5   month                                   316858 non-null  object 
 6   lease_commence_date                     316858 non-null  int64  
 7   storey_range                            316858 non-null  object 
 8   block                                   316858 non-null  object 
 9   remaining_lease                         316858 non-null  float64
 10  address                                 3168

## Mini Case Study: HDB Flat in Tampines

The objective of this case study is to provide a visualisation of whether and how a flat's features can contribute to its resale price. More specifically, we hope to present a flat in its geographical context such as its location, and the amenities around it to and uncover the impact that its geographical context can have on its resale price.

### Selection of Specific Flat

In alignment with the case study's objective, HDB flats that exhibit great performance in terms of POI-related data will be identified. Thus, we calculate the 75th percentile of each POI-related measure below to serve as a filter for potential target flats.

In [11]:
# Finding the 75th Percentile for POI Data
q3_healthcare_1km_count = np.percentile(data_hdb_test_x['healthcare_within_1km_count'], 75)
q3_healthcare_1km_rating = np.percentile(data_hdb_test_x['healthcare_within_1km_average_rating'], 75)

q3_healthcare_2km_count = np.percentile(data_hdb_test_x['healthcare_within_2km_count'], 75)
q3_healthcare_2km_rating = np.percentile(data_hdb_test_x['healthcare_within_2km_average_rating'], 75)

q3_education_1km_count = np.percentile(data_hdb_test_x['education_within_1km_count'], 75)
q3_education_1km_rating = np.percentile(data_hdb_test_x['education_within_1km_average_rating'], 75)

q3_education_2km_count = np.percentile(data_hdb_test_x['education_within_2km_count'], 75)
q3_education_2km_rating = np.percentile(data_hdb_test_x['education_within_2km_average_rating'], 75)

q3_recreational_1km_count = np.percentile(data_hdb_test_x['recreational_within_1km_count'], 75)
q3_recreational_1km_rating = np.percentile(data_hdb_test_x['recreational_within_1km_average_rating'], 75)

q3_recreational_2km_count = np.percentile(data_hdb_test_x['recreational_within_2km_count'], 75)
q3_recreational_2km_rating = np.percentile(data_hdb_test_x['recreational_within_2km_average_rating'], 75)

q1_nearest_distance_to_mrt = np.percentile(data_hdb_test_x['nearest_distance_to_mrt'], 25)

print("Upper Quartile for Healthcare POI Count (1km): ", q3_healthcare_1km_count)
print("Upper Quartile for Healthcare POI Avg Rating (1km): ", q3_healthcare_1km_rating)

print("Upper Quartile for Healthcare POI Count (2km): ", q3_healthcare_2km_count)
print("Upper Quartile for Healthcare POI Avg Rating (2km): ", q3_healthcare_2km_rating)

print("Upper Quartile for Education POI Count (1km): ", q3_education_1km_count)
print("Upper Quartile for Education POI Avg Rating (1km): ", q3_education_1km_rating)

print("Upper Quartile for Education POI Count (2km): ", q3_education_2km_count)
print("Upper Quartile for Education POI Avg Rating (2km): ", q3_education_2km_rating)

print("Upper Quartile for Recreational POI Count (1km): ", q3_recreational_1km_count)
print("Upper Quartile for Recreational POI Avg Rating (1km): ", q3_recreational_1km_rating)

print("Upper Quartile for Recreational POI Count (2km): ", q3_recreational_2km_count)
print("Upper Quartile for Recreational POI Avg Rating (2km): ", q3_recreational_2km_rating)

print("Lower Quartile for Nearest Distance to Mrt:", q1_nearest_distance_to_mrt)

Upper Quartile for Healthcare POI Count (1km):  8.0
Upper Quartile for Healthcare POI Avg Rating (1km):  3.233333333333333
Upper Quartile for Healthcare POI Count (2km):  22.0
Upper Quartile for Healthcare POI Avg Rating (2km):  2.8
Upper Quartile for Education POI Count (1km):  6.0
Upper Quartile for Education POI Avg Rating (1km):  4.266666666666667
Upper Quartile for Education POI Count (2km):  17.0
Upper Quartile for Education POI Avg Rating (2km):  4.205263157894737
Upper Quartile for Recreational POI Count (1km):  5.0
Upper Quartile for Recreational POI Avg Rating (1km):  4.15
Upper Quartile for Recreational POI Count (2km):  13.0
Upper Quartile for Recreational POI Avg Rating (2km):  4.136363636363637
Lower Quartile for Nearest Distance to Mrt: 0.338


Upon trial and error, it is observed that the most encompassing combination of POI-measures for which there exists HDB flats above the 75th percentile across all measures includes:
1. Average rating of healthcare POIs within 2km
2. Average rating of healthcare POIs within 1km
3. Average rating of education POIs within 2km
4. Average rating of education POIs within 1km
5. Number of recreational POIs within 2km
6. Average rating of recreation POIs within 2km

In additionto

In [12]:
# Finding the HDB Flats with the best POI statistics
# The maximum number/combination of POI-related attributes for which there exists flats with POI values above the 75th percentile has been used
potential_cases1_x = data_hdb_test_x[(data_hdb_test_x.healthcare_within_2km_average_rating > q3_healthcare_2km_rating) &
                                    (data_hdb_test_x.healthcare_within_1km_average_rating > q3_healthcare_1km_rating) &
                                    (data_hdb_test_x.education_within_2km_average_rating > q3_education_2km_rating) &
                                    (data_hdb_test_x.education_within_1km_average_rating > q3_education_1km_rating) &
                                    (data_hdb_test_x.recreational_within_2km_count > q3_recreational_2km_count) &
                                    (data_hdb_test_x.recreational_within_2km_average_rating > q3_recreational_2km_rating) &
                                    (data_hdb_test_x.nearest_distance_to_mrt < q1_nearest_distance_to_mrt)]
                
potential_cases1_x

Unnamed: 0,floor_area_sqm,date_sold,lease_commence_date,remaining_lease,nearest_distance_to_mrt,healthcare_within_1km_count,healthcare_within_1km_average_rating,healthcare_within_2km_count,healthcare_within_2km_average_rating,recreational_within_1km_count,...,storey_range_26 TO 30,storey_range_31 TO 35,storey_range_36 TO 40,storey_range_41 TO 50,region_Central,region_City,region_East,region_North,region_South,region_West
1589,127.0,2020-01-01,1994,73.833333,0.194,4.0,4.075,18.0,2.822222,3.0,...,False,False,False,False,False,False,True,False,False,False
2377,124.0,2019-08-01,1994,74.333333,0.208,4.0,4.075,18.0,2.822222,3.0,...,False,False,False,False,False,False,True,False,False,False
8411,126.0,2011-06-01,1994,82.0,0.335,4.0,4.075,18.0,2.822222,3.0,...,False,False,False,False,False,False,True,False,False,False
14528,121.0,2021-08-01,1994,72.333333,0.161,4.0,4.075,18.0,2.822222,3.0,...,False,False,False,False,False,False,True,False,False,False
19148,127.0,2017-08-01,1994,76.25,0.194,4.0,4.075,18.0,2.822222,3.0,...,False,False,False,False,False,False,True,False,False,False
21785,137.0,2013-03-01,1994,80.0,0.331,4.0,4.075,18.0,2.822222,3.0,...,False,False,False,False,False,False,True,False,False,False
22966,104.0,2015-10-01,1994,78.0,0.194,4.0,4.075,18.0,2.822222,3.0,...,False,False,False,False,False,False,True,False,False,False
23368,121.0,2015-04-01,1994,78.0,0.335,4.0,4.075,18.0,2.822222,3.0,...,False,False,False,False,False,False,True,False,False,False
23785,107.0,2015-09-01,1994,78.0,0.161,4.0,4.075,18.0,2.822222,3.0,...,False,False,False,False,False,False,True,False,False,False
28680,105.0,2009-03-01,1994,84.0,0.161,4.0,4.075,18.0,2.822222,3.0,...,False,False,False,False,False,False,True,False,False,False


To further narrow down the choices for the target flat, we will now take other non-POI related attributes into consideration and identify flats that perform poorly in terms of attributes that are not related to their geographical context.

In [13]:
# Narrowing choices by looking at non-POI data
q3_floor_area = np.percentile(data_hdb_test_x['floor_area_sqm'], 75)
q2_remaining_lease = np.percentile(data_hdb_test_x['remaining_lease'], 50)

print("Upper Quartile for Floor Area: ", q3_floor_area)
print("Median for Remaining Lease: ", q2_remaining_lease)

Upper Quartile for Floor Area:  113.0
Median for Remaining Lease:  75.0


In considering the various non-POI related numerical attributes, the combination that provides us with the least number of choices (> 0) is as illustrated below.

In [14]:
# In contrast to how we selected the appropriate flats using POI-related attributes, here flats that rank poorly in terms of non POI-related attributes are selected
# This is to better highlight any impact that the amenities near a flat may have on its resale price
potential_cases2_x = potential_cases1_x[(potential_cases1_x.floor_area_sqm < q3_floor_area) & 
                                    (potential_cases1_x.remaining_lease < q2_remaining_lease) ]

pd.set_option('display.max_columns', None)
potential_cases2_x

Unnamed: 0,floor_area_sqm,date_sold,lease_commence_date,remaining_lease,nearest_distance_to_mrt,healthcare_within_1km_count,healthcare_within_1km_average_rating,healthcare_within_2km_count,healthcare_within_2km_average_rating,recreational_within_1km_count,recreational_within_1km_average_rating,recreational_within_2km_count,recreational_within_2km_average_rating,education_within_1km_count,education_within_1km_average_rating,education_within_2km_count,education_within_2km_average_rating,price_per_sqm,flat_type_1 ROOM,flat_type_2 ROOM,flat_type_3 ROOM,flat_type_4 ROOM,flat_type_5 ROOM,flat_type_EXECUTIVE,flat_type_MULTI-GENERATION,storey_range_01 TO 05,storey_range_06 TO 10,storey_range_10 TO 15,storey_range_16 TO 20,storey_range_21 TO 25,storey_range_26 TO 30,storey_range_31 TO 35,storey_range_36 TO 40,storey_range_41 TO 50,region_Central,region_City,region_East,region_North,region_South,region_West
32252,107.0,2019-05-01,1994,73.833333,0.335,4.0,4.075,18.0,2.822222,3.0,4.1,16.0,4.1375,5.0,4.38,16.0,4.28125,3813.084112,False,False,False,True,False,False,False,False,True,False,False,False,False,False,False,False,False,False,True,False,False,False
41908,108.0,2022-01-01,1994,71.916667,0.208,4.0,4.075,18.0,2.822222,3.0,4.1,16.0,4.1375,5.0,4.38,16.0,4.28125,4861.111111,False,False,False,True,False,False,False,False,False,True,False,False,False,False,False,False,False,False,True,False,False,False
43737,105.0,2022-10-01,1994,70.5,0.331,4.0,4.075,18.0,2.822222,3.0,4.1,16.0,4.1375,5.0,4.38,16.0,4.28125,5047.619048,False,False,False,True,False,False,False,True,False,False,False,False,False,False,False,False,False,False,True,False,False,False


Out of the 3 remaining potential cases, the flat at index 43737 is chosen. This is because it while its location and flat type are the same as the other 2, it has the lowest storey range. Given that in Singapore, the general preference leans towards flats in the higher storey range, this particular flat is chosen to further highlight how its geographical context might affect its resale price.

In [15]:
selected_case_x = data_hdb_test_x[data_hdb_test_x.index == 43737]
selected_case_y = data_hdb_test_y[data_hdb_test_y.index == 43737]

In [16]:
# Retrieve from cleaned dataset
actual_price = selected_case_y.iloc[0,0]
actual_fas = selected_case_x.iloc[0,0]
actual_month = selected_case_x.iloc[0,1]

hdb_last15_cleaned_selected = hdb_last15_cleaned[(hdb_last15_cleaned.resale_price == actual_price) &
                                                 (hdb_last15_cleaned.floor_area_sqm == actual_fas) &
                                                 (hdb_last15_cleaned.month == actual_month)]

hdb_last15_cleaned_selected

Unnamed: 0,town,flat_type,flat_model,floor_area_sqm,resale_price,month,lease_commence_date,storey_range,block,remaining_lease,address,full_address,lat,long,nearest_mrt,nearest_distance_to_mrt,flat_category,street_name,avg_long,avg_lat,healthcare_within_1km_count,healthcare_within_1km_average_rating,healthcare_within_2km_count,healthcare_within_2km_average_rating,recreational_within_1km_count,recreational_within_1km_average_rating,recreational_within_2km_count,recreational_within_2km_average_rating,education_within_1km_count,education_within_1km_average_rating,education_within_2km_count,education_within_2km_average_rating,postal_code,region,price_per_sqm
238602,TAMPINES,4 ROOM,Model A,105.0,530000.0,2022-10-01,1994,01 TO 05,390,70.5,390 TAMPINES AVE 7,390 TAMPINES AVENUE 7 SINGAPORE 520390,1.356182,103.957616,tampines east,0.331,4 ROOM Model A,TAMPINES AVE 7,103.956991,1.355567,4.0,4.075,18.0,2.822222,3.0,4.1,16.0,4.1375,5.0,4.38,16.0,4.28125,520390.0,East,5047.619048


In [17]:
latitude = hdb_last15_cleaned_selected.lat
longitude = hdb_last15_cleaned_selected.long

latitude
longitude

238602    103.957616
Name: long, dtype: float64

### Model Prediction

In [19]:
# Predictor Columns
predictor_cols = ['floor_area_sqm', 'remaining_lease', 'nearest_distance_to_mrt', 
                  'healthcare_within_1km_count', 'healthcare_within_1km_average_rating',
                  'healthcare_within_2km_count', 'healthcare_within_2km_average_rating', 
                  'recreational_within_1km_count', 'recreational_within_1km_average_rating', 
                  'recreational_within_2km_count', 'education_within_1km_count',
                  'education_within_1km_average_rating', 'education_within_2km_count',
                  'price_per_sqm', 'flat_type_3 ROOM', 'flat_type_4 ROOM',
                  'flat_type_5 ROOM', 'flat_type_EXECUTIVE', 'storey_range_01 TO 05',
                  'storey_range_06 TO 10', 'storey_range_10 TO 15', 'region_Central',
                  'region_East', 'region_North', 'region_South', 'region_West']

predictors = selected_case_x[predictor_cols]

In [20]:
# Predicted Value
predicted_price = price_model.predict(predictors)[0]
print("Predicted Resale Price:", predicted_price)

# Actual Value
print("Actual Resale Price:", actual_price)

Predicted Resale Price: 530000.0
Actual Resale Price: 530000.0


From the Model's prediction, we can see that the predicted price is the actual resale price. This further reinforces the predictive power of our trained model. 

### POIs Around HDB Flat

In [22]:
# Importing Filtered POI Data
data_poi_filtered = pd.read_csv('../dataset/data_poi_filtered.csv')

# get categorical column
data_poi_filtered['type'] = data_poi_filtered[['Healthcare', 'Recreational', 'Education']].idxmax(axis=1)
data_poi_filtered

Unnamed: 0,name,rating,lat,lng,Healthcare,Recreational,Education,type
0,Quayside Isle,4.3,1.247681,103.842072,False,True,False,Recreational
1,Sime Darby Centre,3.7,1.336644,103.783597,False,True,False,Recreational
2,PoMo,3.8,1.300192,103.849220,False,True,False,Recreational
3,Tampines Hub,4.6,1.353108,103.940361,False,True,False,Recreational
4,City Plaza,3.8,1.314764,103.893408,False,True,False,Recreational
...,...,...,...,...,...,...,...,...
1558,Orchid Garden & Koi Pond,4.7,1.353986,103.989008,False,True,False,Recreational
1559,JCube,4.1,1.333310,103.740199,False,True,False,Recreational
1560,Giant Panda Forest - River Safari,4.4,1.403751,103.792624,False,True,False,Recreational
1561,Tiong Bahru Plaza,4.1,1.286560,103.827543,False,True,False,Recreational


In [23]:
# Function to calculate Haversine distance between two points
def haversine_distance(lng1, lat1, lng2, lat2):
    lng1, lat1, lng2, lat2 = map(radians, [lng1, lat1, lng2, lat2])
    distances = haversine_distances([[lat1, lng1], [lat2, lng2]])
    return distances[1, 0] * 6371.0  # multiply by Earth radius to get km

# Function to return list of POIs within a certain radius of a HDB unit
def get_pois_within_radius(hdb_lng, hdb_lat, data_poi_filtered, radius):

    nearest_pois = []

    for index, poi in data_poi_filtered.iterrows():
        poi_lng = poi['lng']
        poi_lat = poi['lat']
        poi_name = poi['name']
        poi_type = poi['type']
        distance = haversine_distance(hdb_lng, hdb_lat, poi_lng, poi_lat)
        if distance <= radius:
            nearest_pois.append([poi_name, poi_type, poi_lat, poi_lng])
        
    return nearest_pois

In [24]:
# get POIs within a 2km radius of the selected case study
nearest_pois = get_pois_within_radius(longitude, latitude, data_poi_filtered, 2)
nearest_pois

[['Tampines Hub', 'Recreational', 1.3531079, 103.9403612],
 ['N4 Neighbourhood Centre', 'Recreational', 1.3605239, 103.9531662],
 ['Tampines Central Shopping Street', 'Recreational', 1.3556772, 103.9452237],
 ['Tampines N2 Shopping Street',
  'Recreational',
  1.3527441999999998,
  103.953689],
 ['Purity Haven', 'Recreational', 1.3524927, 103.9438027],
 ['Our Tampines Hub (North Plaza Entrance',
  'Recreational',
  1.3541097,
  103.9403594],
 ['Simei Medical Centre', 'Healthcare', 1.3432366000000002, 103.9536521],
 ['Neptune Healthcare Medical and Surgery',
  'Healthcare',
  1.3535108,
  103.9584973],
 ['Fire Post', 'Healthcare', 1.368442, 103.957359],
 ['AcuHealth TCM Medical Centre',
  'Healthcare',
  1.3598358999999998,
  103.9535112],
 ['Ruby Medical Centre Pte. Ltd.', 'Healthcare', 1.3554003, 103.9451369],
 ['SGH', 'Healthcare', 1.358415, 103.969151],
 ['SATA Commhealth Tampines Medical Centre',
  'Healthcare',
  1.3531422,
  103.9421835],
 ['The Integrated Building CGH', 'Healthc

### HDB Flat Map

In [29]:
hdb_map = folium.Map(location = [latitude, longitude], zoom_start = 14)

# HDB Flat
popup_content_hdb = f"Predicted Price: {predicted_price}<br>Actual Price: {actual_price}"
folium.Marker(location = [latitude, longitude], 
              icon = folium.Icon(color = 'red', prefix = "fa", icon = "house"), 
              tooltip = "Selected HDB Unit",
              popup = folium.Popup(popup_content_hdb, max_width=500)).add_to(hdb_map)

color_mapping = {
    'Healthcare': 'blue',
    'Education': 'green',
    'Recreational': 'orange'
}

icon_mapping = {
    'Healthcare': "house-medical",
    'Education': 'school',
    'Recreational': 'tree-city'
}

# POIs Near HDB Flat
for name, type, lat, lng in nearest_pois:
    popup_content_poi = f"{name}"
    marker_color = color_mapping.get(type)
    marker_icon = icon_mapping.get(type)
    folium.Marker(location = [lat, lng],
                  icon = folium.Icon(color = marker_color, prefix = "fa", icon = marker_icon),
                  tooltip = f"{type}",
                  popup = folium.Popup(popup_content_poi, max_width=500)).add_to(hdb_map)

hdb_map

### Save Map

In [30]:
# Save the map to an HTML file
hdb_map.save('hdb_map.html')