# Quantitative strategy case interview for Dr. Li Zhihuan
## Section 1

Please run the Jupyter notebook from section 2 first.

In [1]:
import pandas as pd 
import numpy as np
import json
import requests
import matplotlib.pyplot as plt
import seaborn as sns
import re
from datetime import datetime
from bs4 import BeautifulSoup
from geopy.distance import great_circle

In [7]:
# The list of schools can be called through the data.gov.sg API, however, for the purpose of demonstrating skillsets, 
# I have chosen to perform webscraping

call = [
    'https://en.wikipedia.org/wiki/List_of_primary_schools_in_Singapore',
    'https://en.wikipedia.org/wiki/List_of_secondary_schools_in_Singapore'
]

data = []

for url in call:
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')
    table = soup.find('table', class_='wikitable sortable')


    for row in table.find_all('tr'):
        row_data = []
        for cell in row.find_all('td'):
            row_data.append(cell.text.strip())
        data.append(row_data)

school_df = pd.DataFrame(data)

In [8]:
school_df.tail(332)

Unnamed: 0,0,1,2,3,4,5,6
0,,,,,,,
1,Admiralty Primary School,Government,Mixed,Woodlands,,[1],1744
2,Ahmad Ibrahim Primary School,Government,Mixed,Yishun,,[2],1738
3,Ai Tong School,"Government-aided, SAP",Mixed,Bishan,Affiliated to Singapore Hokkien Huay Kuan[4],[3],5625
4,Alexandra Primary School,Government,Mixed,Bukit Merah,,[4],1266
...,...,...,...,...,...,...,...
327,Yuhua Secondary School,Government,3019,Jurong West,,[141],
328,Yusof Ishak Secondary School,Government,3307,Punggol,,[142],
329,Yuying Secondary School,Government-aided,7027,Hougang,,[143],
330,Zhenghua Secondary School,Government,3617,Bukit Panjang,,[144],


In [9]:
# Dropping the serial number from the school_df
school_df=school_df.drop(0)

# Copying the school names to a list
school_list=school_df[0].tolist()

print(school_list)

# We can see that some of the school names are wrong, we shall correct them before calling the coordinates from the OneMap API
wrong = ["CHIJ St. Nicholas Girls' School (Primary Section)",'Jing Shan Primary School [zh]']
correct = ["CHIJ St. Nicholas Girls' School", 'Jing Shan Primary School']

# Correcting the school names
for i in range(len(school_list)):
    if school_list[i] in wrong:
        school_list[i] = correct[wrong.index(school_list[i])]

['Admiralty Primary School', 'Ahmad Ibrahim Primary School', 'Ai Tong School', 'Alexandra Primary School', 'Anchor Green Primary School', 'Anderson Primary School', 'Anglo-Chinese School (Junior)', 'Anglo-Chinese School (Primary)', 'Angsana Primary School', 'Ang Mo Kio Primary School', 'Beacon Primary School', 'Bedok Green Primary School', 'Bendemeer Primary School', 'Blangah Rise Primary School', 'Boon Lay Garden Primary School', 'Bukit Panjang Primary School', 'Bukit Timah Primary School', 'Bukit View Primary School', 'Canberra Primary School', 'Canossa Catholic Primary School', 'Cantonment Primary School', 'Casuarina Primary School', 'Catholic High School (Primary)', 'Cedar Primary School', 'Changkat Primary School', 'CHIJ (Katong) Primary', 'CHIJ (Kellock)', 'CHIJ Our Lady of Good Counsel', 'CHIJ Our Lady of the Nativity', 'CHIJ Our Lady Queen of Peace', 'CHIJ Primary (Toa Payoh)', "CHIJ St. Nicholas Girls' School (Primary Section)", 'Chongfu School', 'Chongzheng Primary School', '

In [10]:
# Scraping the MRT station names from wikipedia

call = ['https://en.wikipedia.org/wiki/List_of_Singapore_MRT_stations']
data = []

for url in call:
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')
    table = soup.find('table', class_='wikitable sortable')


    for row in table.find_all('tr'):
        row_data = []
        for cell in row.find_all('td'):
            row_data.append(cell.text.strip())
        data.append(row_data)

mrt_df = pd.DataFrame(data)

In [11]:
# As there are multiple tables, we will remove the blank rows and empty cells from the dataframe
mrt_df = mrt_df[~mrt_df[0].isin([None, '—'])]
mrt_df = mrt_df[~mrt_df[1].isin([None])]

In [12]:
mrt_df.head(200)

Unnamed: 0,0,1,2,3,4,5,6,7,8
3,NS1 EW24,Jurong East,裕廊东,ஜூரோங் கிழக்கு,10 March 1990,Jurong East,JUR,Jurong East,East–West Line — Jurong East
4,NS2,Bukit Batok,武吉巴督,புக்கிட் பாத்தோக்,Bukit Batok South,BBT,Bukit Batok,Bukit Batok,
5,NS3,Bukit Gombak,武吉甘柏,புக்கிட் கோம்பாக்,Bukit Batok North,BGB,—,,
7,NS4 BP1,Choa Chu Kang,蔡厝港,சுவா சூ காங்,10 March 1990,Choa Chu Kang,CCK,Bukit Panjang LRT — Choa Chu Kang,
8,NS5,Yew Tee,油池,இயூ டீ,10 February 1996,Choa Chu Kang North,YWT,—,
...,...,...,...,...,...,...,...,...,...
250,TE7,Bright Hill,光明山,பிரைட் ஹில்,Bright Hill,BRH,Bishan,Thomson–East Coast Line,
253,DT6,King Albert Park,阿尔柏王园,கிங் ஆல்பர்ட் பார்க்,King Albert Park,KAP,Downtown Line,,
255,EW23,Clementi,金文泰,கிளிமெண்டி,Clementi,CLE,Clementi,East–West Line — Clementi,
260,PE4,Riviera,里维拉,ரிவியாரா,Riviera,RIV,Punggol,PGLRT East Loop,


In [13]:
# Writing it into a list and adding 'MRT Station' to it so that we can call the coordinates from the OneMap API
mrt_list=mrt_df[1].tolist()
mrt_list = [word + ' MRT Station' for word in mrt_list]
print(mrt_list)

['Jurong East MRT Station', 'Bukit Batok MRT Station', 'Bukit Gombak MRT Station', 'Choa Chu Kang MRT Station', 'Yew Tee MRT Station', 'Kranji MRT Station', 'Marsiling MRT Station', 'Woodlands MRT Station', 'Admiralty MRT Station', 'Sembawang MRT Station', 'Canberra MRT Station', 'Yishun MRT Station', 'Khatib MRT Station', 'Yio Chu Kang MRT Station', 'Ang Mo Kio MRT Station', 'Bishan MRT Station', 'Braddell MRT Station', 'Toa Payoh MRT Station', 'Novena MRT Station', 'Newton MRT Station', 'Orchard MRT Station', 'Somerset MRT Station', 'Dhoby Ghaut MRT Station', 'City Hall MRT Station', 'Raffles Place MRT Station', 'Marina Bay MRT Station', 'Marina South Pier MRT Station', 'Pasir Ris MRT Station', 'Tampines MRT Station', 'Simei MRT Station', 'Tanah Merah MRT Station', 'Bedok MRT Station', 'Kembangan MRT Station', 'Eunos MRT Station', 'Paya Lebar MRT Station', 'Aljunied MRT Station', 'Kallang MRT Station', 'Lavender MRT Station', 'Bugis MRT Station', 'City Hall MRT Station', 'Raffles Pla

In [14]:
# Some duplicates are present in the list as some MRT stations are interchanges that appears in multiple lines
# We shall filter them out by turning it into a set, then turning it into a list again
mrt_list = list(set(mrt_list))
print(mrt_list)

# We can see that some of the MRT station names are wrong, we shall correct them before calling the coordinates from the OneMap API
wrong = ['Botanic Gardens • Kebun Bunga MRT Station','Gardens by the Bay • Taman di Pesisiran MRT Station']
correct = ['Botanic Gardens MRT Station', 'Gardens by the Bay MRT Station']

for i in range(len(mrt_list)):
    if mrt_list[i] in wrong:
        mrt_list[i] = correct[wrong.index(mrt_list[i])]


['Potong Pasir MRT Station', 'Stevens MRT Station', 'Tiong Bahru MRT Station', 'Buona Vista MRT Station', 'Changi Airport MRT Station', 'Hougang MRT Station', 'Somerset MRT Station', 'Tuas Crescent MRT Station', 'Lavender MRT Station', 'Mountbatten MRT Station', 'Kallang MRT Station', 'Toa Payoh MRT Station', 'Expo MRT Station', 'Buangkok MRT Station', 'Kranji MRT Station', 'Bukit Batok MRT Station', 'King Albert Park MRT Station', 'Ang Mo Kio MRT Station', 'Esplanade MRT Station', 'Orchard MRT Station', 'Little India MRT Station', 'Telok Blangah MRT Station', 'Pasir Panjang MRT Station', 'Jurong East MRT Station', 'Bedok MRT Station', 'Sengkang MRT Station', 'Simei MRT Station', 'Maxwell MRT Station', 'Woodlands MRT Station', 'Dhoby Ghaut MRT Station', 'Springleaf MRT Station', 'Labrador Park MRT Station', 'Lakeside MRT Station', 'Yio Chu Kang MRT Station', 'Commonwealth MRT Station', 'Yew Tee MRT Station', 'Stadium MRT Station', 'Bugis MRT Station', 'Tampines East MRT Station', 'Gul 

In [15]:
# We will use the OneMap API to obtain the coordinates of every MRT station.
mrt_name= []
mrt_lat = []
mrt_long = []


for i in range(0, len(mrt_list)):
    query_address = mrt_list[i]
    query_string = 'https://www.onemap.gov.sg/api/common/elastic/search?searchVal='+str(query_address)+'&returnGeom=Y&getAddrDetails=Y'
    resp = requests.get(query_string)

    data_mrt=json.loads(resp.content)
    
    if data_mrt['found'] != 0:
        mrt_name.append(data_mrt["results"][0]["BUILDING"])
        mrt_lat.append(data_mrt["results"][0]["LATITUDE"])
        mrt_long.append(data_mrt["results"][0]["LONGITUDE"])

        print (str(query_address)+",Lat: "+data_mrt['results'][0]['LATITUDE'] +" Long: "+data_mrt['results'][0]['LONGITUDE'])

    else:
        mrt_name.append('NotFound')
        mrt_lat.append('NotFound')
        mrt_long.append('NotFound')
        print ("No Results")

# Store this information in a dataframe
mrt_location = pd.DataFrame({
    'MRT': mrt_list,
    'Building': mrt_name,
    'Latitude': mrt_lat,
    'Longitude': mrt_long
})

Potong Pasir MRT Station,Lat: 1.3318797140041 Long: 103.869057877586
Stevens MRT Station,Lat: 1.32006555750626 Long: 103.826024401924
Tiong Bahru MRT Station,Lat: 1.28612486477108 Long: 103.827075901697
Buona Vista MRT Station,Lat: 1.3072237082044 Long: 103.790253514502
Changi Airport MRT Station,Lat: 1.35747897447692 Long: 103.98788356959
Hougang MRT Station,Lat: 1.37129226620797 Long: 103.892380513029
Somerset MRT Station,Lat: 1.30026416739006 Long: 103.839085753124
Tuas Crescent MRT Station,Lat: 1.32102695598684 Long: 103.649078235627
Lavender MRT Station,Lat: 1.30737781747063 Long: 103.862767633888
Mountbatten MRT Station,Lat: 1.30620190525331 Long: 103.882528081052
Kallang MRT Station,Lat: 1.31148890998818 Long: 103.871386541754
Toa Payoh MRT Station,Lat: 1.33259672794159 Long: 103.847577286075
Expo MRT Station,Lat: 1.33538252614956 Long: 103.962374747451
Buangkok MRT Station,Lat: 1.38287001971672 Long: 103.893122569706
Kranji MRT Station,Lat: 1.42508698073648 Long: 103.7621374594

In [16]:
# We will use the OneMap API to obtain the coordinates of every primary and secondary school.
school_name= []
school_lat = []
school_long = []


for i in range(0, len(school_list)):
    query_address = school_list[i]
    query_string = 'https://www.onemap.gov.sg/api/common/elastic/search?searchVal='+str(query_address)+'&returnGeom=Y&getAddrDetails=Y'
    resp = requests.get(query_string)

    data_school=json.loads(resp.content)
    
    if data_school['found'] != 0:
        school_name.append(data_school["results"][0]["BUILDING"])
        school_lat.append(data_school["results"][0]["LATITUDE"])
        school_long.append(data_school["results"][0]["LONGITUDE"])

        print (str(query_address)+",Lat: "+data_school['results'][0]['LATITUDE'] +" Long: "+data_school['results'][0]['LONGITUDE'])

    else:
        school_name.append('NotFound')
        school_lat.append('NotFound')
        school_long.append('NotFound')
        print ("No Results")

# Store this information in a dataframe
school_location = pd.DataFrame({
    'School': school_list,
    'Building': school_name,
    'Latitude': school_lat,
    'Longitude': school_long
})

Admiralty Primary School,Lat: 1.44254963931583 Long: 103.800213682734
Ahmad Ibrahim Primary School,Lat: 1.43315271543517 Long: 103.832942401086
Ai Tong School,Lat: 1.3605834338904 Long: 103.833020333986
Alexandra Primary School,Lat: 1.29133439161334 Long: 103.824424680531
Anchor Green Primary School,Lat: 1.39036998654612 Long: 103.887165375933
Anderson Primary School,Lat: 1.38426429436736 Long: 103.841392081119
Anglo-Chinese School (Junior),Lat: 1.30932252730174 Long: 103.841552127373
Anglo-Chinese School (Primary),Lat: 1.31837054523521 Long: 103.835609732354
Angsana Primary School,Lat: 1.34846568919477 Long: 103.951577056231
Ang Mo Kio Primary School,Lat: 1.36932176584608 Long: 103.839630858752
Beacon Primary School,Lat: 1.38394936211823 Long: 103.773632022975
Bedok Green Primary School,Lat: 1.32344593287992 Long: 103.937878976352
Bendemeer Primary School,Lat: 1.32220848530291 Long: 103.865351336085
Blangah Rise Primary School,Lat: 1.27614522920998 Long: 103.80861316604
Boon Lay Garde

In [21]:
# import HDB data exported from section 2 as dataframe
hdb_df = pd.read_csv('HDB.csv')
hdb_df.sample(5)

Unnamed: 0,_id,month,town,flat_type,block,street_name,floor_area_sqm,flat_model,lease_commence_date,resale_price,min_storey,max_storey,average_storey,year,flat_age
259804,3761,2,PASIR RIS,4 ROOM,723,PASIR RIS ST 72,112.0,Model A,1996.0,285000.0,10,12,11,2000.0,4.0
413886,157843,9,BUKIT MERAH,4 ROOM,124B,BT MERAH VIEW,107.0,Model A,1996.0,388000.0,13,15,14,2004.0,8.0
268872,12829,5,YISHUN,4 ROOM,137,YISHUN RING RD,99.0,New Generation,1985.0,225000.0,1,3,2,2000.0,15.0
643194,17500,6,BUKIT BATOK,3 ROOM,244,BT BATOK EAST AVE 5,69.0,NEW GENERATION,1985.0,47000.0,4,6,5,1991.0,6.0
906683,280989,10,YISHUN,5 ROOM,322,YISHUN CTRL,122.0,IMPROVED,1988.0,370000.0,10,12,11,1999.0,11.0


In [30]:
hdb_df.shape

(912891, 15)

In [37]:
# Getting HDB addresses for geocoding
tempdf = pd.DataFrame()

tempdf['address'] = hdb_df['block'] + ' ' + hdb_df['street_name']

hdb_list = tempdf['address'].tolist()

# Removing duplicates from list so we can streamline the API call
hdb_list = list(set(hdb_list))

In [38]:
print(len(hdb_list))

9788


In [39]:
# We will use the OneMap API to obtain the coordinates of all the HDB blocks.
hdb_name= []
hdb_lat = []
hdb_long = []


for i in range(0, len(hdb_list)):
    query_address = hdb_list[i]
    query_string = 'https://www.onemap.gov.sg/api/common/elastic/search?searchVal='+str(query_address)+'&returnGeom=Y&getAddrDetails=Y'
    resp = requests.get(query_string)

    data_hdb=json.loads(resp.content)
    
    if data_hdb['found'] != 0:
        hdb_name.append(data_hdb["results"][0]["BUILDING"])
        hdb_lat.append(data_hdb["results"][0]["LATITUDE"])
        hdb_long.append(data_hdb["results"][0]["LONGITUDE"])
        print(i)
        print (str(query_address)+",Lat: "+data_hdb['results'][0]['LATITUDE'] +" Long: "+data_hdb['results'][0]['LONGITUDE'])

    else:
        hdb_name.append('NotFound')
        hdb_lat.append('NotFound')
        hdb_long.append('NotFound')
        print ("No Results")

# Store this information in a dataframe
hdb_location = pd.DataFrame({
    'HDB': hdb_list,
    'Building': hdb_name,
    'Latitude': hdb_lat,
    'Longitude': hdb_long
})

0
732 WOODLANDS CIRCLE,Lat: 1.44334742731535 Long: 103.798291752455
1
817 JURONG WEST ST 81,Lat: 1.34470598575848 Long: 103.695720951625
2
119A RIVERVALE DR,Lat: 1.38417530549894 Long: 103.903383602884
3
389 BT BATOK WEST AVE 5,Lat: 1.36534587041751 Long: 103.752142144334
4
777 WOODLANDS CRES,Lat: 1.44786059309623 Long: 103.799327953961
5
348 WOODLANDS AVE 3,Lat: 1.43293946017439 Long: 103.781881394457
6
456 YISHUN ST 41,Lat: 1.42036919149233 Long: 103.844403667721
7
414 SERANGOON CTRL,Lat: 1.34880894396479 Long: 103.874405968198
8
278 BT BATOK EAST AVE 3,Lat: 1.34785140092909 Long: 103.757774174346
9
113 PENDING RD,Lat: 1.37838181779224 Long: 103.769359493878
10
209 TOA PAYOH NTH,Lat: 1.34185348292638 Long: 103.84481468336
11
1 GHIM MOH RD,Lat: 1.31287975684307 Long: 103.786855017635
12
10 JLN RUMAH TINGGI,Lat: 1.28820018880462 Long: 103.808369387433
13
415 TAMPINES ST 41,Lat: 1.3594587451825 Long: 103.946965691919
14
125 BT MERAH VIEW,Lat: 1.28506119216141 Long: 103.823812011597
15
4

In [40]:
# Filter out invalid entries
mrt_location = mrt_location[mrt_location['Building'] != 'NotFound']
school_location = school_location[school_location['Building'] != 'NotFound']

In [None]:
# Exporting all coordinates to CSV files so that we do not have to call the APIs everytime we run the code
# hdb_location.to_csv('hdb_coordinates.csv', index=False)
# mrt_location.to_csv('mrt_coordinates.csv', index=False)
# school_location.to_csv('school_coordinates.csv', index=False)


In [53]:
address =[]
mrt_distance = []
nearest_mrt = []
school_distance = []
nearest_school = []


# looping through the entries in the HDB dataframe to find the closest MRT and school
for add,lat,lon in zip(hdb_location.HDB,hdb_location.Latitude,hdb_location.Longitude):
    distance_to_mrt = 99
    distance_to_school = 99
    temp = 0
    
     
    for mrt,mrt_lat,mrt_lon in zip(mrt_location.MRT, mrt_location.Latitude, mrt_location.Longitude):
        temp = great_circle((lat, lon), (mrt_lat, mrt_lon)).km
        if distance_to_mrt > temp:
            distance_to_mrt = temp
            closest_mrt = mrt
    for school,school_lat,school_lon in zip(school_location.School, school_location.Latitude, school_location.Longitude):
        temp = great_circle((lat, lon), (school_lat, school_lon)).km
        if distance_to_school > temp:
            distance_to_school = temp  
            closest_school = school
    
    address.append(add)
    mrt_distance.append(distance_to_mrt)
    nearest_mrt.append(closest_mrt)
    school_distance.append(distance_to_school)
    nearest_school.append(closest_school)
    
hdb_location['distance_to_mrt'] = mrt_distance
hdb_location['nearest_mrt'] = nearest_mrt
hdb_location['distance_to_school'] = school_distance
hdb_location['nearest_school'] = nearest_school

In [54]:
# Check to make sure the df turns out as expected
hdb_location.head()

Unnamed: 0,HDB,Building,Latitude,Longitude,distance_to_mrt,nearest_mrt,distance_to_school,nearest_school
0,639 YISHUN ST 61,NIL,1.42056,103.838449,0.703152,Khatib MRT Station,0.18935,Chung Cheng High School (Yishun)
1,229 SIMEI ST 4,NIL,1.343155,103.956105,0.303283,Simei MRT Station,0.515448,Changkat Changi Secondary School
2,706 HOUGANG AVE 2,NIL,1.36596,103.888646,0.723744,Hougang MRT Station,0.617538,Holy Innocents' Primary School
3,28 MARINE CRES,MARINE CRESCENT VILLE,1.303437,103.913349,1.957616,Kembangan MRT Station,0.420768,CHIJ (Katong) Primary
4,399 YISHUN AVE 6,NIL,1.427526,103.847084,1.359539,Yishun MRT Station,0.123006,North View Primary School


In [57]:
# Creating an address column so that we can merge df with hdb_location
df['address'] = df['block'].astype(str).str.cat(df['street_name'].astype(str), sep=' ')
df.head()

Unnamed: 0,_id,month,town,flat_type,block,street_name,floor_area_sqm,flat_model,lease_commence_date,resale_price,min_storey,max_storey,average_storey,year,flat_age,address
0,1,1,ANG MO KIO,2 ROOM,406,ANG MO KIO AVE 10,44.0,Improved,1979.0,232000.0,10,12,11,2017.0,38.0,406 ANG MO KIO AVE 10
1,2,1,ANG MO KIO,3 ROOM,108,ANG MO KIO AVE 4,67.0,New Generation,1978.0,250000.0,1,3,2,2017.0,39.0,108 ANG MO KIO AVE 4
2,3,1,ANG MO KIO,3 ROOM,602,ANG MO KIO AVE 5,67.0,New Generation,1980.0,262000.0,1,3,2,2017.0,37.0,602 ANG MO KIO AVE 5
3,4,1,ANG MO KIO,3 ROOM,465,ANG MO KIO AVE 10,68.0,New Generation,1980.0,265000.0,4,6,5,2017.0,37.0,465 ANG MO KIO AVE 10
4,5,1,ANG MO KIO,3 ROOM,601,ANG MO KIO AVE 5,67.0,New Generation,1980.0,265000.0,1,3,2,2017.0,37.0,601 ANG MO KIO AVE 5


In [58]:
# Merge hdb_location and df based on the 'address' column
merged_df = pd.merge(hdb_location, df, left_on='HDB', right_on='address')
merged_df.head()

Unnamed: 0,HDB,Building,Latitude,Longitude,distance_to_mrt,nearest_mrt,distance_to_school,nearest_school,_id,month,...,floor_area_sqm,flat_model,lease_commence_date,resale_price,min_storey,max_storey,average_storey,year,flat_age,address
0,639 YISHUN ST 61,NIL,1.42056,103.838449,0.703152,Khatib MRT Station,0.18935,Chung Cheng High School (Yishun),9678,6,...,84.0,Simplified,1987.0,300000.0,4,6,5,2017.0,30.0,639 YISHUN ST 61
1,639 YISHUN ST 61,NIL,1.42056,103.838449,0.703152,Khatib MRT Station,0.18935,Chung Cheng High School (Yishun),9682,6,...,84.0,Simplified,1987.0,320000.0,7,9,8,2017.0,30.0,639 YISHUN ST 61
2,639 YISHUN ST 61,NIL,1.42056,103.838449,0.703152,Khatib MRT Station,0.18935,Chung Cheng High School (Yishun),11491,7,...,104.0,Model A,1987.0,360000.0,4,6,5,2017.0,30.0,639 YISHUN ST 61
3,639 YISHUN ST 61,NIL,1.42056,103.838449,0.703152,Khatib MRT Station,0.18935,Chung Cheng High School (Yishun),40592,11,...,104.0,Model A,1987.0,325000.0,7,9,8,2018.0,31.0,639 YISHUN ST 61
4,639 YISHUN ST 61,NIL,1.42056,103.838449,0.703152,Khatib MRT Station,0.18935,Chung Cheng High School (Yishun),54428,7,...,84.0,Simplified,1987.0,268000.0,1,3,2,2019.0,32.0,639 YISHUN ST 61


In [59]:
# Import inflation data from Section 2
dfinflation = pd.read_csv('inflation.csv')
# Merge the inflation data into the previous df
merged_df = pd.merge(merged_df, dfinflation, on='year')

merged_df.head()

Unnamed: 0,HDB,Building,Latitude,Longitude,distance_to_mrt,nearest_mrt,distance_to_school,nearest_school,_id,month,...,flat_model,lease_commence_date,resale_price,min_storey,max_storey,average_storey,year,flat_age,address,CPI
0,639 YISHUN ST 61,NIL,1.42056,103.838449,0.703152,Khatib MRT Station,0.18935,Chung Cheng High School (Yishun),9678,6,...,Simplified,1987.0,300000.0,4,6,5,2017.0,30.0,639 YISHUN ST 61,102.372
1,639 YISHUN ST 61,NIL,1.42056,103.838449,0.703152,Khatib MRT Station,0.18935,Chung Cheng High School (Yishun),9682,6,...,Simplified,1987.0,320000.0,7,9,8,2017.0,30.0,639 YISHUN ST 61,102.372
2,639 YISHUN ST 61,NIL,1.42056,103.838449,0.703152,Khatib MRT Station,0.18935,Chung Cheng High School (Yishun),11491,7,...,Model A,1987.0,360000.0,4,6,5,2017.0,30.0,639 YISHUN ST 61,102.372
3,229 SIMEI ST 4,NIL,1.343155,103.956105,0.303283,Simei MRT Station,0.515448,Changkat Changi Secondary School,2072,2,...,Maisonette,1988.0,710000.0,1,3,2,2017.0,29.0,229 SIMEI ST 4,102.372
4,229 SIMEI ST 4,NIL,1.343155,103.956105,0.303283,Simei MRT Station,0.515448,Changkat Changi Secondary School,14866,9,...,Maisonette,1988.0,740000.0,1,3,2,2017.0,29.0,229 SIMEI ST 4,102.372


In [60]:
# Standardize all the resale price to 2014 dollars by using the housing CPI for 2014
merged_df['resale_price'] = merged_df['resale_price'] / merged_df['CPI'] * 113.517

merged_df.head()

Unnamed: 0,HDB,Building,Latitude,Longitude,distance_to_mrt,nearest_mrt,distance_to_school,nearest_school,_id,month,...,flat_model,lease_commence_date,resale_price,min_storey,max_storey,average_storey,year,flat_age,address,CPI
0,639 YISHUN ST 61,NIL,1.42056,103.838449,0.703152,Khatib MRT Station,0.18935,Chung Cheng High School (Yishun),9678,6,...,Simplified,1987.0,332660.297738,4,6,5,2017.0,30.0,639 YISHUN ST 61,102.372
1,639 YISHUN ST 61,NIL,1.42056,103.838449,0.703152,Khatib MRT Station,0.18935,Chung Cheng High School (Yishun),9682,6,...,Simplified,1987.0,354837.65092,7,9,8,2017.0,30.0,639 YISHUN ST 61,102.372
2,639 YISHUN ST 61,NIL,1.42056,103.838449,0.703152,Khatib MRT Station,0.18935,Chung Cheng High School (Yishun),11491,7,...,Model A,1987.0,399192.357285,4,6,5,2017.0,30.0,639 YISHUN ST 61,102.372
3,229 SIMEI ST 4,NIL,1.343155,103.956105,0.303283,Simei MRT Station,0.515448,Changkat Changi Secondary School,2072,2,...,Maisonette,1988.0,787296.037979,1,3,2,2017.0,29.0,229 SIMEI ST 4,102.372
4,229 SIMEI ST 4,NIL,1.343155,103.956105,0.303283,Simei MRT Station,0.515448,Changkat Changi Secondary School,14866,9,...,Maisonette,1988.0,820562.067753,1,3,2,2017.0,29.0,229 SIMEI ST 4,102.372


In [71]:
# Export to excel for use in Tableau
merged_df.to_excel('tableau.xlsx', index=False)