# Feature Engineering

In [169]:
import pandas as pd
import numpy as np

In [170]:
mrt_coordinates_df = pd.read_csv('mrt_coordinates.csv', index_col = 0)

In [171]:
mrt_coordinates_df

Unnamed: 0,name,latitude,longitude
0,Jurong East MRT Station,1.333295,103.742154
1,Bukit Batok MRT Station,1.349033,103.749566
2,Bukit Gombak MRT Station,1.358612,103.751791
3,Choa Chu Kang MRT Station,1.385363,103.744371
4,Yew Tee MRT Station,1.397476,103.747418
...,...,...,...
140,Upper Changi MRT Station,1.341740,103.961473
141,Expo MRT Station,1.335383,103.962375
142,Woodlands North MRT Station,1.447782,103.785136
143,Woodlands MRT Station,1.436058,103.787939


In [172]:
hdb_coordinates_df = pd.read_csv('hdb_coordinates.csv', index_col=0)
hdb_coordinates_df

Unnamed: 0,address,latitude,longitude
0,406 ANG MO KIO AVE 10,1.362005,103.853880
1,108 ANG MO KIO AVE 4,1.370943,103.837975
2,602 ANG MO KIO AVE 5,1.380709,103.835368
3,465 ANG MO KIO AVE 10,1.366201,103.857201
4,601 ANG MO KIO AVE 5,1.381041,103.835132
...,...,...,...
9423,676A YISHUN RING RD,1.421452,103.843328
9424,187B BEDOK NTH ST 4,1.330499,103.939996
9425,450B BT BATOK WEST AVE 6,1.352358,103.744396
9426,451B BT BATOK WEST AVE 6,1.352484,103.743415


#### Distance from nearest MRT in km

Use geopy package to calculate distance between two places, using their respective latitude and longitude values.

In [173]:
pip install geopy

Note: you may need to restart the kernel to use updated packages.


You should consider upgrading via the 'c:\Users\raych\AppData\Local\Programs\Python\Python39\python.exe -m pip install --upgrade pip' command.


In [174]:
from geopy import distance

In [175]:
mrt_coordinates_df

Unnamed: 0,name,latitude,longitude
0,Jurong East MRT Station,1.333295,103.742154
1,Bukit Batok MRT Station,1.349033,103.749566
2,Bukit Gombak MRT Station,1.358612,103.751791
3,Choa Chu Kang MRT Station,1.385363,103.744371
4,Yew Tee MRT Station,1.397476,103.747418
...,...,...,...
140,Upper Changi MRT Station,1.341740,103.961473
141,Expo MRT Station,1.335383,103.962375
142,Woodlands North MRT Station,1.447782,103.785136
143,Woodlands MRT Station,1.436058,103.787939


In [176]:
hdb_coordinates_dict = hdb_coordinates_df.to_dict('records')
mrt_coordinates_dict = mrt_coordinates_df.to_dict('records')

In [177]:
mrt_coordinates_dict[0]

{'name': 'Jurong East MRT Station',
 'latitude': 1.33329506563598,
 'longitude': 103.742153884191}

In [178]:
# hdb is the dictionary of a hdb's specific coordinates
# places is a list of places, each place being a dictionary with its specific coordinates (mrt/mall)
def get_shortest_distance(hdb, places):
    hdb_lat_long = (hdb['latitude'], hdb['longitude'])
    shortest_distance = 100
    current_place = ""

    for place in places:
        place_lat_long = (place['latitude'], place['longitude'])
        current_distance = distance.distance(hdb_lat_long, place_lat_long).km
        if current_distance < shortest_distance:
            shortest_distance = current_distance
            current_place = place['name']
    
    return (shortest_distance, current_place)

In [179]:
for hdb in hdb_coordinates_dict:
    shortest_distance = get_shortest_distance(hdb, mrt_coordinates_dict)
    hdb['mrt_shortest_dist'] = shortest_distance[0]
    hdb['mrt_shortest_dist_name'] = shortest_distance[1]
    

In [180]:
hdb_coordinates_df = pd.DataFrame(hdb_coordinates_dict)
hdb_coordinates_df

Unnamed: 0,address,latitude,longitude,mrt_shortest_dist,mrt_shortest_dist_name
0,406 ANG MO KIO AVE 10,1.362005,103.853880,0.957270,Ang Mo Kio MRT Station
1,108 ANG MO KIO AVE 4,1.370943,103.837975,1.288554,Ang Mo Kio MRT Station
2,602 ANG MO KIO AVE 5,1.380709,103.835368,1.076299,Yio Chu Kang MRT Station
3,465 ANG MO KIO AVE 10,1.366201,103.857201,0.932964,Ang Mo Kio MRT Station
4,601 ANG MO KIO AVE 5,1.381041,103.835132,1.099505,Yio Chu Kang MRT Station
...,...,...,...,...,...
9423,676A YISHUN RING RD,1.421452,103.843328,1.236360,Khatib MRT Station
9424,187B BEDOK NTH ST 4,1.330499,103.939996,0.796209,Tanah Merah MRT Station
9425,450B BT BATOK WEST AVE 6,1.352358,103.744396,0.682807,Bukit Batok MRT Station
9426,451B BT BATOK WEST AVE 6,1.352484,103.743415,0.783759,Bukit Batok MRT Station


Sanity Check

Lets check the first row of hdb_coordinates_df, to see if the mrt_shortest dist is calculated correctly.

The nearest mrt to 406 Ang Mo Kio ave 10 is very clearly Ang Mo Kio Station. Lets see if the calculations match.

In [181]:
ang_mo_kio_coordinates = mrt_coordinates_df[mrt_coordinates_df['name'] == 'Ang Mo Kio MRT Station'][['latitude', 'longitude']].values[0]

In [182]:
# convert list to tuple
ang_mo_kio_coordinates = (ang_mo_kio_coordinates[0], ang_mo_kio_coordinates[1])
ang_mo_kio_coordinates

(1.36942855699191, 103.849455226442)

In [183]:
dist_from_amk = distance.distance((hdb_coordinates_dict[0]['latitude'], hdb_coordinates_dict[0]['longitude']), 
                  (ang_mo_kio_coordinates)).km
dist_from_amk

0.9572697821512559

In [184]:
hdb_coordinates_df.loc[0,'mrt_shortest_dist'] == dist_from_amk

True

#### Distance from nearest Mall in km

In [185]:
mall_coordinates_df = pd.read_csv('shopping_mall_coordinates.csv', index_col=0)
mall_coordinates_df

Unnamed: 0,name,latitude,longitude
0,100 AM,1.274683,103.843488
1,313@Somerset,1.301014,103.838361
2,Aperia,1.309711,103.864326
3,Balestier Hill Shopping Centre,1.325596,103.842572
4,Bugis Cube,1.298141,103.855635
...,...,...,...
156,Holland Road Shopping Centre,1.310277,103.795371
157,Mustafa Centre,1.310112,103.855291
158,Shaw House,1.293694,103.773284
159,KINEX,1.314618,103.894738


In [186]:
mall_coordinates_dict = mall_coordinates_df.to_dict('records')
mall_coordinates_dict

[{'name': '100 AM',
  'latitude': 1.27468281482263,
  'longitude': 103.843488359469},
 {'name': '313@Somerset',
  'latitude': 1.30101436404056,
  'longitude': 103.838360664485},
 {'name': 'Aperia',
  'latitude': 1.3097112065077,
  'longitude': 103.864326436447},
 {'name': 'Balestier Hill Shopping Centre',
  'latitude': 1.32559594839311,
  'longitude': 103.842571612968},
 {'name': 'Bugis Cube',
  'latitude': 1.2981408343975,
  'longitude': 103.855635339249},
 {'name': 'Bugis Junction',
  'latitude': 1.2991371723215,
  'longitude': 103.855450325604},
 {'name': 'Bugis+',
  'latitude': 1.30095171530648,
  'longitude': 103.855172625542},
 {'name': 'Capitol Piazza',
  'latitude': 1.29307884763132,
  'longitude': 103.851261982149},
 {'name': 'Cathay Cineleisure Orchard',
  'latitude': 1.30152101873533,
  'longitude': 103.836429655016},
 {'name': 'The Centrepoint',
  'latitude': 1.30145045537088,
  'longitude': 103.840034074858},
 {'name': 'City Square Mall',
  'latitude': 1.31138865009152,
  

In [187]:
for hdb in hdb_coordinates_dict:
    shortest_distance = get_shortest_distance(hdb, mall_coordinates_dict)
    hdb['mall_shortest_dist'] = shortest_distance[0]
    hdb['mall_shortest_dist_name'] = shortest_distance[1]

In [188]:
hdb_coordinates_df = pd.DataFrame(hdb_coordinates_dict)
hdb_coordinates_df

Unnamed: 0,address,latitude,longitude,mrt_shortest_dist,mrt_shortest_dist_name,mall_shortest_dist,mall_shortest_dist_name
0,406 ANG MO KIO AVE 10,1.362005,103.853880,0.957270,Ang Mo Kio MRT Station,1.013992,AMK Hub
1,108 ANG MO KIO AVE 4,1.370943,103.837975,1.288554,Ang Mo Kio MRT Station,0.894266,Broadway Plaza
2,602 ANG MO KIO AVE 5,1.380709,103.835368,1.076299,Yio Chu Kang MRT Station,1.525573,Broadway Plaza
3,465 ANG MO KIO AVE 10,1.366201,103.857201,0.932964,Ang Mo Kio MRT Station,0.893796,myVillage At Serangoon Garden
4,601 ANG MO KIO AVE 5,1.381041,103.835132,1.099505,Yio Chu Kang MRT Station,1.569306,Broadway Plaza
...,...,...,...,...,...,...,...
9423,676A YISHUN RING RD,1.421452,103.843328,1.236360,Khatib MRT Station,0.443476,Wisteria Mall
9424,187B BEDOK NTH ST 4,1.330499,103.939996,0.796209,Tanah Merah MRT Station,1.258851,Bedok Mall
9425,450B BT BATOK WEST AVE 6,1.352358,103.744396,0.682807,Bukit Batok MRT Station,0.569993,West Mall
9426,451B BT BATOK WEST AVE 6,1.352484,103.743415,0.783759,Bukit Batok MRT Station,0.676788,West Mall


In [189]:
# Save progress first
hdb_coordinates_df.to_csv('hdb_added_features.csv')

In [190]:
hdb_coordinates_df = pd.read_csv('hdb_added_features.csv', index_col = 0)
hdb_coordinates_dict = hdb_coordinates_df.to_dict('records')

#### Distance from nearest primary school in km

In [191]:
school_coordinates_df = pd.read_csv('primary_school_coordinates.csv', index_col=0)
school_coordinates_df.head()

Unnamed: 0,name,SAP,GEP,Affiliated_Secondary,latitude,longitude
0,Admiralty Primary School,0,0,0,1.442635,103.80004
1,Ahmad Ibrahim Primary School,0,0,0,1.433153,103.832942
2,Ai Tong School,1,0,0,1.360583,103.83302
3,Alexandra Primary School,0,0,0,1.291334,103.824425
4,Anchor Green Primary School,0,0,0,1.39037,103.887165


In [192]:
school_coordinates_dict = school_coordinates_df.to_dict('records')
school_coordinates_dict

[{'name': 'Admiralty Primary School',
  'SAP': 0,
  'GEP': 0,
  'Affiliated_Secondary': 0,
  'latitude': 1.4426347903311,
  'longitude': 103.800040119743},
 {'name': 'Ahmad Ibrahim Primary School',
  'SAP': 0,
  'GEP': 0,
  'Affiliated_Secondary': 0,
  'latitude': 1.43315271543517,
  'longitude': 103.832942401086},
 {'name': 'Ai Tong School',
  'SAP': 1,
  'GEP': 0,
  'Affiliated_Secondary': 0,
  'latitude': 1.3605834338904,
  'longitude': 103.833020333986},
 {'name': 'Alexandra Primary School',
  'SAP': 0,
  'GEP': 0,
  'Affiliated_Secondary': 0,
  'latitude': 1.29133439161334,
  'longitude': 103.824424680531},
 {'name': 'Anchor Green Primary School',
  'SAP': 0,
  'GEP': 0,
  'Affiliated_Secondary': 0,
  'latitude': 1.39036998654612,
  'longitude': 103.887165375933},
 {'name': 'Anderson Primary School',
  'SAP': 0,
  'GEP': 0,
  'Affiliated_Secondary': 0,
  'latitude': 1.38426429436736,
  'longitude': 103.841392081119},
 {'name': 'Ang Mo Kio Primary School',
  'SAP': 0,
  'GEP': 0,
 

In [193]:
for hdb in hdb_coordinates_dict:
    shortest_distance = get_shortest_distance(hdb, school_coordinates_dict)
    hdb['school_shortest_dist'] = shortest_distance[0]
    hdb['school_shortest_dist_name'] = shortest_distance[1]

In [194]:
hdb_coordinates_dict

[{'address': '406 ANG MO KIO AVE 10',
  'latitude': 1.36200453938712,
  'longitude': 103.853879910407,
  'mrt_shortest_dist': 0.957269782151256,
  'mrt_shortest_dist_name': 'Ang Mo Kio MRT Station',
  'mall_shortest_dist': 1.0139920581933295,
  'mall_shortest_dist_name': 'AMK Hub',
  'school_shortest_dist': 0.2181252401285026,
  'school_shortest_dist_name': 'Townsville Primary School'},
 {'address': '108 ANG MO KIO AVE 4',
  'latitude': 1.37094273993861,
  'longitude': 103.837974822369,
  'mrt_shortest_dist': 1.2885537739028825,
  'mrt_shortest_dist_name': 'Ang Mo Kio MRT Station',
  'mall_shortest_dist': 0.894265975067369,
  'mall_shortest_dist_name': 'Broadway Plaza',
  'school_shortest_dist': 0.25708353288790375,
  'school_shortest_dist_name': 'Ang Mo Kio Primary School'},
 {'address': '602 ANG MO KIO AVE 5',
  'latitude': 1.38070883044887,
  'longitude': 103.835368226602,
  'mrt_shortest_dist': 1.076298559977006,
  'mrt_shortest_dist_name': 'Yio Chu Kang MRT Station',
  'mall_short

In [195]:
hdb_coordinates_df = pd.DataFrame(hdb_coordinates_dict)
# Save progress checkpoint
hdb_coordinates_df.to_csv('hdb_added_features.csv')

#### How many MRT stations within 1 km

In [196]:
def get_ammenities_counts(hdb, places, threshold_dist = 1):
    hdb_lat_long = (hdb['latitude'], hdb['longitude'])

    list_of_places = []
    count_places = 0
    for place in places:
        place_lat_long = (place['latitude'], place['longitude'])
        current_distance = distance.distance(hdb_lat_long, place_lat_long).km   
        if current_distance < threshold_dist:
            count_places += 1
            list_of_places.append(place['name'])
    
    print(count_places, list_of_places)
    return (count_places, list_of_places)

In [197]:
threshold_dist = 1
for hdb in hdb_coordinates_dict:
    ammenities_count = get_ammenities_counts(hdb, mrt_coordinates_dict, threshold_dist=threshold_dist)
    hdb[f'mrt_count_within_{threshold_dist}km'] = ammenities_count[0]
    hdb[f'mrt_list_within_{threshold_dist}km'] = ammenities_count[1]
    

1 ['Ang Mo Kio MRT Station']
0 []
0 []
1 ['Ang Mo Kio MRT Station']
0 []
1 ['Yio Chu Kang MRT Station']
1 ['Ang Mo Kio MRT Station']
1 ['Ang Mo Kio MRT Station']
1 ['Ang Mo Kio MRT Station']
1 ['Ang Mo Kio MRT Station']
0 []
0 []
0 []
1 ['Ang Mo Kio MRT Station']
0 []
1 ['Ang Mo Kio MRT Station']
1 ['Yio Chu Kang MRT Station']
1 ['Ang Mo Kio MRT Station']
1 ['Yio Chu Kang MRT Station']
1 ['Ang Mo Kio MRT Station']
0 []
1 ['Ang Mo Kio MRT Station']
1 ['Ang Mo Kio MRT Station']
1 ['Ang Mo Kio MRT Station']
0 []
1 ['Ang Mo Kio MRT Station']
0 []
1 ['Ang Mo Kio MRT Station']
1 ['Ang Mo Kio MRT Station']
1 ['Ang Mo Kio MRT Station']
1 ['Ang Mo Kio MRT Station']
1 ['Ang Mo Kio MRT Station']
0 []
0 []
1 ['Yio Chu Kang MRT Station']
1 ['Ang Mo Kio MRT Station']
1 ['Ang Mo Kio MRT Station']
0 []
1 ['Ang Mo Kio MRT Station']
1 ['Ang Mo Kio MRT Station']
1 ['Ang Mo Kio MRT Station']
1 ['Ang Mo Kio MRT Station']
1 ['Yio Chu Kang MRT Station']
1 ['Ang Mo Kio MRT Station']
1 ['Ang Mo Kio MRT Station

In [198]:
hdb_coordinates_df = pd.DataFrame(hdb_coordinates_dict)
hdb_coordinates_df.head()

Unnamed: 0,address,latitude,longitude,mrt_shortest_dist,mrt_shortest_dist_name,mall_shortest_dist,mall_shortest_dist_name,school_shortest_dist,school_shortest_dist_name,mrt_count_within_1km,mrt_list_within_1km
0,406 ANG MO KIO AVE 10,1.362005,103.85388,0.95727,Ang Mo Kio MRT Station,1.013992,AMK Hub,0.218125,Townsville Primary School,1,[Ang Mo Kio MRT Station]
1,108 ANG MO KIO AVE 4,1.370943,103.837975,1.288554,Ang Mo Kio MRT Station,0.894266,Broadway Plaza,0.257084,Ang Mo Kio Primary School,0,[]
2,602 ANG MO KIO AVE 5,1.380709,103.835368,1.076299,Yio Chu Kang MRT Station,1.525573,Broadway Plaza,0.506849,Mayflower Primary School,0,[]
3,465 ANG MO KIO AVE 10,1.366201,103.857201,0.932964,Ang Mo Kio MRT Station,0.893796,myVillage At Serangoon Garden,0.698165,Teck Ghee Primary School,1,[Ang Mo Kio MRT Station]
4,601 ANG MO KIO AVE 5,1.381041,103.835132,1.099505,Yio Chu Kang MRT Station,1.569306,Broadway Plaza,0.546444,Mayflower Primary School,0,[]


#### How many Malls within 1 km

In [199]:
for hdb in hdb_coordinates_dict:
    ammenities_count = get_ammenities_counts(hdb, mall_coordinates_dict, threshold_dist=threshold_dist)
    hdb[f'mall_count_within_{threshold_dist}km'] = ammenities_count[0]
    hdb[f'mall_list_within_{threshold_dist}km'] = ammenities_count[1]

0 []
1 ['Broadway Plaza']
0 []
1 ['myVillage At Serangoon Garden']
0 []
2 ['Broadway Plaza', 'Jubilee Square']
1 ['AMK Hub']
2 ['AMK Hub', 'Broadway Plaza']
2 ['AMK Hub', 'Jubilee Square']
3 ['AMK Hub', 'Broadway Plaza', 'Jubilee Square']
0 []
0 []
2 ['AMK Hub', 'Broadway Plaza']
3 ['AMK Hub', 'Broadway Plaza', 'Jubilee Square']
1 ['Broadway Plaza']
2 ['AMK Hub', 'Jubilee Square']
0 []
2 ['AMK Hub', 'Jubilee Square']
0 []
3 ['AMK Hub', 'Broadway Plaza', 'Jubilee Square']
0 []
3 ['AMK Hub', 'Broadway Plaza', 'Jubilee Square']
3 ['AMK Hub', 'Broadway Plaza', 'Jubilee Square']
3 ['AMK Hub', 'Broadway Plaza', 'Jubilee Square']
0 []
1 ['AMK Hub']
0 []
1 ['AMK Hub']
0 []
3 ['AMK Hub', 'Broadway Plaza', 'Jubilee Square']
2 ['AMK Hub', 'Jubilee Square']
3 ['AMK Hub', 'Broadway Plaza', 'Jubilee Square']
1 ['myVillage At Serangoon Garden']
1 ['myVillage At Serangoon Garden']
0 []
0 []
3 ['AMK Hub', 'Broadway Plaza', 'Jubilee Square']
0 []
1 ['myVillage At Serangoon Garden']
2 ['AMK Hub', 'myVill

In [200]:
hdb_coordinates_df = pd.DataFrame(hdb_coordinates_dict)
hdb_coordinates_df.head()

Unnamed: 0,address,latitude,longitude,mrt_shortest_dist,mrt_shortest_dist_name,mall_shortest_dist,mall_shortest_dist_name,school_shortest_dist,school_shortest_dist_name,mrt_count_within_1km,mrt_list_within_1km,mall_count_within_1km,mall_list_within_1km
0,406 ANG MO KIO AVE 10,1.362005,103.85388,0.95727,Ang Mo Kio MRT Station,1.013992,AMK Hub,0.218125,Townsville Primary School,1,[Ang Mo Kio MRT Station],0,[]
1,108 ANG MO KIO AVE 4,1.370943,103.837975,1.288554,Ang Mo Kio MRT Station,0.894266,Broadway Plaza,0.257084,Ang Mo Kio Primary School,0,[],1,[Broadway Plaza]
2,602 ANG MO KIO AVE 5,1.380709,103.835368,1.076299,Yio Chu Kang MRT Station,1.525573,Broadway Plaza,0.506849,Mayflower Primary School,0,[],0,[]
3,465 ANG MO KIO AVE 10,1.366201,103.857201,0.932964,Ang Mo Kio MRT Station,0.893796,myVillage At Serangoon Garden,0.698165,Teck Ghee Primary School,1,[Ang Mo Kio MRT Station],1,[myVillage At Serangoon Garden]
4,601 ANG MO KIO AVE 5,1.381041,103.835132,1.099505,Yio Chu Kang MRT Station,1.569306,Broadway Plaza,0.546444,Mayflower Primary School,0,[],0,[]


#### How many "good" schools within 1 km

How a "good" school is defined is seeing if the school is either SAP, GEP or has an affiliation with a secondary school. 

In [201]:
def get_good_sch_counts(hdb, places, threshold_dist = 1):
    hdb_lat_long = (hdb['latitude'], hdb['longitude'])

    list_of_schs = []
    count_schs = 0
    for place in places:
        # if school is considered a good school
        if place['SAP'] == 1 or place['GEP'] == 1 or place['Affiliated_Secondary'] == 1:
            place_lat_long = (place['latitude'], place['longitude'])
            current_distance = distance.distance(hdb_lat_long, place_lat_long).km   
            if current_distance < threshold_dist:
                count_schs += 1
                list_of_schs.append(place['name'])
        
    print(count_schs, list_of_schs)
    return (count_schs, list_of_schs)

In [202]:
for hdb in hdb_coordinates_dict:
    schs_count = get_good_sch_counts(hdb, school_coordinates_dict)
    hdb[f'good_sch_count_within_{threshold_dist}km'] = schs_count[0]
    hdb[f'good_sch_list_within_{threshold_dist}km'] = schs_count[1]

0 []
1 ["CHIJ St. Nicholas Girls' School"]
1 ["CHIJ St. Nicholas Girls' School"]
0 []
1 ["CHIJ St. Nicholas Girls' School"]
1 ["CHIJ St. Nicholas Girls' School"]
0 []
0 []
0 []
0 []
2 ['Ai Tong School', "CHIJ St. Nicholas Girls' School"]
2 ['Ai Tong School', "CHIJ St. Nicholas Girls' School"]
0 []
0 []
1 ["CHIJ St. Nicholas Girls' School"]
0 []
0 []
0 []
0 []
0 []
1 ["CHIJ St. Nicholas Girls' School"]
0 []
1 ["CHIJ St. Nicholas Girls' School"]
0 []
1 ["CHIJ St. Nicholas Girls' School"]
0 []
1 ["CHIJ St. Nicholas Girls' School"]
0 []
0 []
0 []
0 []
0 []
1 ['CHIJ Our Lady of Good Counsel']
0 []
0 []
0 []
1 ["CHIJ St. Nicholas Girls' School"]
1 ["CHIJ St. Nicholas Girls' School"]
0 []
0 []
0 []
0 []
0 []
0 []
0 []
0 []
1 ["CHIJ St. Nicholas Girls' School"]
0 []
0 []
0 []
0 []
0 []
1 ["CHIJ St. Nicholas Girls' School"]
3 ['Red Swastika School', "St. Anthony's Canossian Primary School", "St. Anthony's Primary School"]
1 ['Red Swastika School']
2 ["St. Anthony's Canossian Primary School", "S

In [203]:
hdb_coordinates_df = pd.DataFrame(hdb_coordinates_dict)
hdb_coordinates_df

Unnamed: 0,address,latitude,longitude,mrt_shortest_dist,mrt_shortest_dist_name,mall_shortest_dist,mall_shortest_dist_name,school_shortest_dist,school_shortest_dist_name,mrt_count_within_1km,mrt_list_within_1km,mall_count_within_1km,mall_list_within_1km,good_sch_count_within_1km,good_sch_list_within_1km
0,406 ANG MO KIO AVE 10,1.362005,103.853880,0.957270,Ang Mo Kio MRT Station,1.013992,AMK Hub,0.218125,Townsville Primary School,1,[Ang Mo Kio MRT Station],0,[],0,[]
1,108 ANG MO KIO AVE 4,1.370943,103.837975,1.288554,Ang Mo Kio MRT Station,0.894266,Broadway Plaza,0.257084,Ang Mo Kio Primary School,0,[],1,[Broadway Plaza],1,[CHIJ St. Nicholas Girls' School]
2,602 ANG MO KIO AVE 5,1.380709,103.835368,1.076299,Yio Chu Kang MRT Station,1.525573,Broadway Plaza,0.506849,Mayflower Primary School,0,[],0,[],1,[CHIJ St. Nicholas Girls' School]
3,465 ANG MO KIO AVE 10,1.366201,103.857201,0.932964,Ang Mo Kio MRT Station,0.893796,myVillage At Serangoon Garden,0.698165,Teck Ghee Primary School,1,[Ang Mo Kio MRT Station],1,[myVillage At Serangoon Garden],0,[]
4,601 ANG MO KIO AVE 5,1.381041,103.835132,1.099505,Yio Chu Kang MRT Station,1.569306,Broadway Plaza,0.546444,Mayflower Primary School,0,[],0,[],1,[CHIJ St. Nicholas Girls' School]
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9423,676A YISHUN RING RD,1.421452,103.843328,1.236360,Khatib MRT Station,0.443476,Wisteria Mall,0.272306,Northland Primary School,0,[],1,[Wisteria Mall],0,[]
9424,187B BEDOK NTH ST 4,1.330499,103.939996,0.796209,Tanah Merah MRT Station,1.258851,Bedok Mall,0.487276,St. Anthony's Canossian Primary School,1,[Tanah Merah MRT Station],0,[],3,"[Red Swastika School, St. Anthony's Canossian ..."
9425,450B BT BATOK WEST AVE 6,1.352358,103.744396,0.682807,Bukit Batok MRT Station,0.569993,West Mall,0.538538,Princess Elizabeth Primary School,1,[Bukit Batok MRT Station],1,[West Mall],0,[]
9426,451B BT BATOK WEST AVE 6,1.352484,103.743415,0.783759,Bukit Batok MRT Station,0.676788,West Mall,0.471763,Princess Elizabeth Primary School,1,[Bukit Batok MRT Station],1,[West Mall],0,[]


#### Calculate primary school scoring metric

In [204]:
school_coordinates_df['school_score'] = school_coordinates_df['SAP'] + school_coordinates_df['GEP'] + school_coordinates_df['Affiliated_Secondary']

In [205]:
school_coordinates_df

Unnamed: 0,name,SAP,GEP,Affiliated_Secondary,latitude,longitude,school_score
0,Admiralty Primary School,0,0,0,1.442635,103.800040,0
1,Ahmad Ibrahim Primary School,0,0,0,1.433153,103.832942,0
2,Ai Tong School,1,0,0,1.360583,103.833020,1
3,Alexandra Primary School,0,0,0,1.291334,103.824425,0
4,Anchor Green Primary School,0,0,0,1.390370,103.887165,0
...,...,...,...,...,...,...,...
180,Yuhua Primary School,0,0,0,1.342802,103.741106,0
181,Yumin Primary School,0,0,0,1.351292,103.950462,0
182,Zhangde Primary School,0,0,0,1.284212,103.825952,0
183,Zhenghua Primary School,0,0,0,1.379549,103.769314,0


In [206]:
school_coordinates_df.name.values

array(['Admiralty Primary School', 'Ahmad Ibrahim Primary School',
       'Ai Tong School', 'Alexandra Primary School',
       'Anchor Green Primary School', 'Anderson Primary School',
       'Ang Mo Kio Primary School', 'Anglo-Chinese School (Junior)',
       'Anglo-Chinese School (Primary)', 'Angsana Primary School',
       'Beacon Primary School', 'Bedok Green Primary School',
       'Bendemeer Primary School', 'Blangah Rise Primary School',
       'Boon Lay Garden Primary School', 'Bukit Panjang Primary School',
       'Bukit Timah Primary School', 'Bukit View Primary School',
       'CHIJ (Katong) Primary', 'CHIJ (Kellock)',
       'CHIJ Our Lady Queen of Peace', 'CHIJ Our Lady of Good Counsel',
       'CHIJ Our Lady of the Nativity', 'CHIJ Primary (Toa Payoh)',
       "CHIJ St. Nicholas Girls' School", 'Canberra Primary School',
       'Canossa Catholic Primary School', 'Cantonment Primary School',
       'Casuarina Primary School', 'Catholic High School',
       'Cedar Primary S

In [207]:
def get_sch_score(df, sch):
    return df[df['name'] == sch].school_score.values[0]

In [208]:
for hdb in hdb_coordinates_dict:

    school_score = 0
    # if address has at least one good school
    if hdb['good_sch_count_within_1km'] > 0:
        
        # iterate over good school list and sum up points
        for sch in hdb['good_sch_list_within_1km']:
            school_score += get_sch_score(school_coordinates_df, sch)
        
        hdb['good_sch_score'] = school_score

    # if address does not have any good school
    else:
        school_score = 0
        hdb['good_sch_score'] = school_score
    print(hdb['address'], hdb['good_sch_score'])

406 ANG MO KIO AVE 10 0
108 ANG MO KIO AVE 4 2
602 ANG MO KIO AVE 5 2
465 ANG MO KIO AVE 10 0
601 ANG MO KIO AVE 5 2
150 ANG MO KIO AVE 5 2
447 ANG MO KIO AVE 10 0
218 ANG MO KIO AVE 1 0
571 ANG MO KIO AVE 3 0
534 ANG MO KIO AVE 10 0
233 ANG MO KIO AVE 3 3
235 ANG MO KIO AVE 3 3
219 ANG MO KIO AVE 1 0
536 ANG MO KIO AVE 10 0
230 ANG MO KIO AVE 3 2
570 ANG MO KIO AVE 3 0
624 ANG MO KIO AVE 4 0
441 ANG MO KIO AVE 10 0
625 ANG MO KIO AVE 9 0
119 ANG MO KIO AVE 3 0
255 ANG MO KIO AVE 4 2
432 ANG MO KIO AVE 10 0
211 ANG MO KIO AVE 3 2
584 ANG MO KIO AVE 3 0
118 ANG MO KIO AVE 4 2
333 ANG MO KIO AVE 1 0
256 ANG MO KIO AVE 4 2
330 ANG MO KIO AVE 1 0
557 ANG MO KIO AVE 10 0
302 ANG MO KIO AVE 3 0
575 ANG MO KIO AVE 10 0
509 ANG MO KIO AVE 8 0
472 ANG MO KIO AVE 10 1
475 ANG MO KIO AVE 10 0
629 ANG MO KIO AVE 4 0
546 ANG MO KIO AVE 10 0
131 ANG MO KIO AVE 3 2
254 ANG MO KIO AVE 4 2
470 ANG MO KIO AVE 10 0
463 ANG MO KIO AVE 10 0
207 ANG MO KIO AVE 1 0
304 ANG MO KIO AVE 1 0
646 ANG MO KIO AVE 6

In [209]:
hdb_coordinates_df = pd.DataFrame(hdb_coordinates_dict)
hdb_coordinates_df

Unnamed: 0,address,latitude,longitude,mrt_shortest_dist,mrt_shortest_dist_name,mall_shortest_dist,mall_shortest_dist_name,school_shortest_dist,school_shortest_dist_name,mrt_count_within_1km,mrt_list_within_1km,mall_count_within_1km,mall_list_within_1km,good_sch_count_within_1km,good_sch_list_within_1km,good_sch_score
0,406 ANG MO KIO AVE 10,1.362005,103.853880,0.957270,Ang Mo Kio MRT Station,1.013992,AMK Hub,0.218125,Townsville Primary School,1,[Ang Mo Kio MRT Station],0,[],0,[],0
1,108 ANG MO KIO AVE 4,1.370943,103.837975,1.288554,Ang Mo Kio MRT Station,0.894266,Broadway Plaza,0.257084,Ang Mo Kio Primary School,0,[],1,[Broadway Plaza],1,[CHIJ St. Nicholas Girls' School],2
2,602 ANG MO KIO AVE 5,1.380709,103.835368,1.076299,Yio Chu Kang MRT Station,1.525573,Broadway Plaza,0.506849,Mayflower Primary School,0,[],0,[],1,[CHIJ St. Nicholas Girls' School],2
3,465 ANG MO KIO AVE 10,1.366201,103.857201,0.932964,Ang Mo Kio MRT Station,0.893796,myVillage At Serangoon Garden,0.698165,Teck Ghee Primary School,1,[Ang Mo Kio MRT Station],1,[myVillage At Serangoon Garden],0,[],0
4,601 ANG MO KIO AVE 5,1.381041,103.835132,1.099505,Yio Chu Kang MRT Station,1.569306,Broadway Plaza,0.546444,Mayflower Primary School,0,[],0,[],1,[CHIJ St. Nicholas Girls' School],2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9423,676A YISHUN RING RD,1.421452,103.843328,1.236360,Khatib MRT Station,0.443476,Wisteria Mall,0.272306,Northland Primary School,0,[],1,[Wisteria Mall],0,[],0
9424,187B BEDOK NTH ST 4,1.330499,103.939996,0.796209,Tanah Merah MRT Station,1.258851,Bedok Mall,0.487276,St. Anthony's Canossian Primary School,1,[Tanah Merah MRT Station],0,[],3,"[Red Swastika School, St. Anthony's Canossian ...",3
9425,450B BT BATOK WEST AVE 6,1.352358,103.744396,0.682807,Bukit Batok MRT Station,0.569993,West Mall,0.538538,Princess Elizabeth Primary School,1,[Bukit Batok MRT Station],1,[West Mall],0,[],0
9426,451B BT BATOK WEST AVE 6,1.352484,103.743415,0.783759,Bukit Batok MRT Station,0.676788,West Mall,0.471763,Princess Elizabeth Primary School,1,[Bukit Batok MRT Station],1,[West Mall],0,[],0


In [213]:
#hdb_coordinates_df[hdb_coordinates_df['mrt_shortest_dist_name'] == 'Bishan MRT Station']

#### Calculate distance to CBD

In [211]:
cbd_lat_long = (1.287953, 103.851784)

for hdb in hdb_coordinates_dict:
    hdb_lat_long = (hdb['latitude'], hdb['longitude'])
    distance_to_cbd = distance.distance(hdb_lat_long, cbd_lat_long).km
    hdb['distance_to_cbd_km'] = distance_to_cbd


In [212]:
hdb_coordinates_df = pd.DataFrame(hdb_coordinates_dict)
hdb_coordinates_df

Unnamed: 0,address,latitude,longitude,mrt_shortest_dist,mrt_shortest_dist_name,mall_shortest_dist,mall_shortest_dist_name,school_shortest_dist,school_shortest_dist_name,mrt_count_within_1km,mrt_list_within_1km,mall_count_within_1km,mall_list_within_1km,good_sch_count_within_1km,good_sch_list_within_1km,good_sch_score,distance_to_cbd_km
0,406 ANG MO KIO AVE 10,1.362005,103.853880,0.957270,Ang Mo Kio MRT Station,1.013992,AMK Hub,0.218125,Townsville Primary School,1,[Ang Mo Kio MRT Station],0,[],0,[],0,8.191561
1,108 ANG MO KIO AVE 4,1.370943,103.837975,1.288554,Ang Mo Kio MRT Station,0.894266,Broadway Plaza,0.257084,Ang Mo Kio Primary School,0,[],1,[Broadway Plaza],1,[CHIJ St. Nicholas Girls' School],2,9.304377
2,602 ANG MO KIO AVE 5,1.380709,103.835368,1.076299,Yio Chu Kang MRT Station,1.525573,Broadway Plaza,0.506849,Mayflower Primary School,0,[],0,[],1,[CHIJ St. Nicholas Girls' School],2,10.417900
3,465 ANG MO KIO AVE 10,1.366201,103.857201,0.932964,Ang Mo Kio MRT Station,0.893796,myVillage At Serangoon Garden,0.698165,Teck Ghee Primary School,1,[Ang Mo Kio MRT Station],1,[myVillage At Serangoon Garden],0,[],0,8.673244
4,601 ANG MO KIO AVE 5,1.381041,103.835132,1.099505,Yio Chu Kang MRT Station,1.569306,Broadway Plaza,0.546444,Mayflower Primary School,0,[],0,[],1,[CHIJ St. Nicholas Girls' School],2,10.458732
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9423,676A YISHUN RING RD,1.421452,103.843328,1.236360,Khatib MRT Station,0.443476,Wisteria Mall,0.272306,Northland Primary School,0,[],1,[Wisteria Mall],0,[],0,14.791651
9424,187B BEDOK NTH ST 4,1.330499,103.939996,0.796209,Tanah Merah MRT Station,1.258851,Bedok Mall,0.487276,St. Anthony's Canossian Primary School,1,[Tanah Merah MRT Station],0,[],3,"[Red Swastika School, St. Anthony's Canossian ...",3,10.886156
9425,450B BT BATOK WEST AVE 6,1.352358,103.744396,0.682807,Bukit Batok MRT Station,0.569993,West Mall,0.538538,Princess Elizabeth Primary School,1,[Bukit Batok MRT Station],1,[West Mall],0,[],0,13.912156
9426,451B BT BATOK WEST AVE 6,1.352484,103.743415,0.783759,Bukit Batok MRT Station,0.676788,West Mall,0.471763,Princess Elizabeth Primary School,1,[Bukit Batok MRT Station],1,[West Mall],0,[],0,14.013178


In [220]:
hdb_coordinates_df[hdb_coordinates_df['good_sch_score'] >3]

Unnamed: 0,address,latitude,longitude,mrt_shortest_dist,mrt_shortest_dist_name,mall_shortest_dist,mall_shortest_dist_name,school_shortest_dist,school_shortest_dist_name,mrt_count_within_1km,mrt_list_within_1km,mall_count_within_1km,mall_list_within_1km,good_sch_count_within_1km,good_sch_list_within_1km,good_sch_score,distance_to_cbd_km
114,22 SIN MING RD,1.357284,103.839397,0.948425,Marymount MRT Station,0.621564,Thomson V,0.672216,Catholic High School,1,[Marymount MRT Station],1,[Thomson V],2,"[Ai Tong School, Catholic High School]",4,7.789280
118,111 BISHAN ST 12,1.347068,103.848064,0.484791,Bishan MRT Station,0.341302,Junction 8,0.217054,Guangyang Primary School,4,"[Bishan MRT Station, Braddell MRT Station, Bis...",2,"[Junction 8, Junction 8]",2,"[Catholic High School, Kuo Chuan Presbyterian ...",4,6.549680
121,109 BISHAN ST 12,1.346751,103.847990,0.520771,Bishan MRT Station,0.377155,Junction 8,0.200654,Guangyang Primary School,4,"[Bishan MRT Station, Braddell MRT Station, Bis...",2,"[Junction 8, Junction 8]",2,"[Catholic High School, Kuo Chuan Presbyterian ...",4,6.515231
122,115 BISHAN ST 12,1.347918,103.848750,0.378169,Bishan MRT Station,0.245698,Junction 8,0.258518,Guangyang Primary School,3,"[Bishan MRT Station, Braddell MRT Station, Bis...",2,"[Junction 8, Junction 8]",2,"[Catholic High School, Kuo Chuan Presbyterian ...",4,6.639256
366,120 GEYLANG EAST CTRL,1.318212,103.884752,0.284450,Aljunied MRT Station,0.873780,Paya Lebar Square,0.121999,Geylang Methodist School (Primary),4,"[Paya Lebar MRT Station, Aljunied MRT Station,...",2,"[Paya Lebar Square, Paya Lebar Quarter]",4,"[Canossa Catholic Primary School, Geylang Meth...",4,4.965552
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9035,364B UPP SERANGOON RD,1.370514,103.897133,0.535806,Hougang MRT Station,0.431192,Hougang Mall,0.312269,CHIJ Our Lady of the Nativity,1,[Hougang MRT Station],2,"[Hougang Mall, Hougang Rivercourt]",3,"[CHIJ Our Lady of the Nativity, Holy Innocents...",4,10.431351
9037,364A UPP SERANGOON RD,1.370608,103.896776,0.494977,Hougang MRT Station,0.391837,Hougang Mall,0.310959,CHIJ Our Lady of the Nativity,1,[Hougang MRT Station],2,"[Hougang Mall, Hougang Rivercourt]",3,"[CHIJ Our Lady of the Nativity, Holy Innocents...",4,10.421326
9043,365D UPP SERANGOON RD,1.369617,103.896613,0.506110,Hougang MRT Station,0.446359,Hougang Mall,0.406229,Holy Innocents' Primary School,1,[Hougang MRT Station],2,"[Hougang Mall, Hougang Rivercourt]",3,"[CHIJ Our Lady of the Nativity, Holy Innocents...",4,10.316543
9203,525 HOUGANG AVE 6,1.374823,103.891367,0.406409,Hougang MRT Station,0.373583,Hougang Mall,0.254385,Montfort Junior School,2,"[Hougang MRT Station, Buangkok MRT Station]",2,"[Hougang Mall, Hougang Green Shopping Mall]",3,"[CHIJ Our Lady of the Nativity, Holy Innocents...",4,10.567612


In [214]:
# Save progress first
hdb_coordinates_df.to_csv('hdb_added_features.csv')

# Merge HDB coordinates data with HDB block price data

In [215]:
hdb_df = pd.read_csv('hdb-price-cleaned.csv', index_col=0)

In [216]:
hdb_df = hdb_df.merge(hdb_coordinates_df, on = ['address', 'longitude', 'latitude'], how = 'inner')

In [217]:
hdb_df

Unnamed: 0,month,town,flat_type,block,street_name,storey_range,floor_area_sqm,flat_model,lease_commence_date,remaining_lease,...,school_shortest_dist,school_shortest_dist_name,mrt_count_within_1km,mrt_list_within_1km,mall_count_within_1km,mall_list_within_1km,good_sch_count_within_1km,good_sch_list_within_1km,good_sch_score,distance_to_cbd_km
0,2017-01-01,ANG MO KIO,2 ROOM,406,ANG MO KIO AVE 10,10 TO 12,44.0,Improved,1979,61.333333,...,0.218125,Townsville Primary School,1,[Ang Mo Kio MRT Station],0,[],0,[],0,8.191561
1,2017-05-01,ANG MO KIO,2 ROOM,406,ANG MO KIO AVE 10,10 TO 12,44.0,Improved,1979,61.083333,...,0.218125,Townsville Primary School,1,[Ang Mo Kio MRT Station],0,[],0,[],0,8.191561
2,2018-03-01,ANG MO KIO,2 ROOM,406,ANG MO KIO AVE 10,01 TO 03,44.0,Improved,1979,60.166667,...,0.218125,Townsville Primary School,1,[Ang Mo Kio MRT Station],0,[],0,[],0,8.191561
3,2018-03-01,ANG MO KIO,2 ROOM,406,ANG MO KIO AVE 10,01 TO 03,44.0,Improved,1979,60.166667,...,0.218125,Townsville Primary School,1,[Ang Mo Kio MRT Station],0,[],0,[],0,8.191561
4,2018-05-01,ANG MO KIO,2 ROOM,406,ANG MO KIO AVE 10,07 TO 09,44.0,Improved,1979,60.083333,...,0.218125,Townsville Primary School,1,[Ang Mo Kio MRT Station],0,[],0,[],0,8.191561
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
148101,2023-02-01,YISHUN,4 ROOM,676A,YISHUN RING RD,07 TO 09,93.0,Model A,2018,94.666667,...,0.272306,Northland Primary School,0,[],1,[Wisteria Mall],0,[],0,14.791651
148102,2023-03-01,BEDOK,4 ROOM,187B,BEDOK NTH ST 4,07 TO 09,93.0,Model A,2018,94.833333,...,0.487276,St. Anthony's Canossian Primary School,1,[Tanah Merah MRT Station],0,[],3,"[Red Swastika School, St. Anthony's Canossian ...",3,10.886156
148103,2023-03-01,BUKIT BATOK,4 ROOM,450B,BT BATOK WEST AVE 6,04 TO 06,93.0,Model A,2019,95.083333,...,0.538538,Princess Elizabeth Primary School,1,[Bukit Batok MRT Station],1,[West Mall],0,[],0,13.912156
148104,2023-03-01,BUKIT BATOK,4 ROOM,451B,BT BATOK WEST AVE 6,04 TO 06,93.0,Model A,2019,95.083333,...,0.471763,Princess Elizabeth Primary School,1,[Bukit Batok MRT Station],1,[West Mall],0,[],0,14.013178


In [218]:
hdb_df.columns

Index(['month', 'town', 'flat_type', 'block', 'street_name', 'storey_range',
       'floor_area_sqm', 'flat_model', 'lease_commence_date',
       'remaining_lease', 'resale_price', 'address', 'latitude', 'longitude',
       'mrt_shortest_dist', 'mrt_shortest_dist_name', 'mall_shortest_dist',
       'mall_shortest_dist_name', 'school_shortest_dist',
       'school_shortest_dist_name', 'mrt_count_within_1km',
       'mrt_list_within_1km', 'mall_count_within_1km', 'mall_list_within_1km',
       'good_sch_count_within_1km', 'good_sch_list_within_1km',
       'good_sch_score', 'distance_to_cbd_km'],
      dtype='object')

## Add column "Mature" which shows 1 if the town is a matured estate

In [221]:
# url: 'https://www.propertyguru.com.sg/property-guides/non-mature-vs-mature-bto-55760'
# list of mature estate can be gotten from this url

list_of_matured_estate = ['ang mo kio', 'bedok', 'bishan', 'bukit merah', 'bukit timah', 'central', 'clementi', 'geylang', 'kallang/whampoa', 'marine parade', 'pasir ris', 'queenstown', 'serangoon', 'tampines', 'toa payoh']
list_of_matured_estate = [x.upper() for x in list_of_matured_estate]

hdb_df['matured'] = hdb_df['town'].apply(lambda x: 1 if x in list_of_matured_estate else 0)


In [222]:
hdb_df

Unnamed: 0,month,town,flat_type,block,street_name,storey_range,floor_area_sqm,flat_model,lease_commence_date,remaining_lease,...,school_shortest_dist_name,mrt_count_within_1km,mrt_list_within_1km,mall_count_within_1km,mall_list_within_1km,good_sch_count_within_1km,good_sch_list_within_1km,good_sch_score,distance_to_cbd_km,matured
0,2017-01-01,ANG MO KIO,2 ROOM,406,ANG MO KIO AVE 10,10 TO 12,44.0,Improved,1979,61.333333,...,Townsville Primary School,1,[Ang Mo Kio MRT Station],0,[],0,[],0,8.191561,1
1,2017-05-01,ANG MO KIO,2 ROOM,406,ANG MO KIO AVE 10,10 TO 12,44.0,Improved,1979,61.083333,...,Townsville Primary School,1,[Ang Mo Kio MRT Station],0,[],0,[],0,8.191561,1
2,2018-03-01,ANG MO KIO,2 ROOM,406,ANG MO KIO AVE 10,01 TO 03,44.0,Improved,1979,60.166667,...,Townsville Primary School,1,[Ang Mo Kio MRT Station],0,[],0,[],0,8.191561,1
3,2018-03-01,ANG MO KIO,2 ROOM,406,ANG MO KIO AVE 10,01 TO 03,44.0,Improved,1979,60.166667,...,Townsville Primary School,1,[Ang Mo Kio MRT Station],0,[],0,[],0,8.191561,1
4,2018-05-01,ANG MO KIO,2 ROOM,406,ANG MO KIO AVE 10,07 TO 09,44.0,Improved,1979,60.083333,...,Townsville Primary School,1,[Ang Mo Kio MRT Station],0,[],0,[],0,8.191561,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
148101,2023-02-01,YISHUN,4 ROOM,676A,YISHUN RING RD,07 TO 09,93.0,Model A,2018,94.666667,...,Northland Primary School,0,[],1,[Wisteria Mall],0,[],0,14.791651,0
148102,2023-03-01,BEDOK,4 ROOM,187B,BEDOK NTH ST 4,07 TO 09,93.0,Model A,2018,94.833333,...,St. Anthony's Canossian Primary School,1,[Tanah Merah MRT Station],0,[],3,"[Red Swastika School, St. Anthony's Canossian ...",3,10.886156,1
148103,2023-03-01,BUKIT BATOK,4 ROOM,450B,BT BATOK WEST AVE 6,04 TO 06,93.0,Model A,2019,95.083333,...,Princess Elizabeth Primary School,1,[Bukit Batok MRT Station],1,[West Mall],0,[],0,13.912156,0
148104,2023-03-01,BUKIT BATOK,4 ROOM,451B,BT BATOK WEST AVE 6,04 TO 06,93.0,Model A,2019,95.083333,...,Princess Elizabeth Primary School,1,[Bukit Batok MRT Station],1,[West Mall],0,[],0,14.013178,0


In [223]:
# final dataset with features
hdb_df.to_csv('hdb-price-data-with-features.csv')