In [1]:
import pandas as pd
from haversine import haversine

hdb = pd.read_csv("data/coords/coords_HDB.csv")
mrt = pd.read_csv("data/coords/coords_mrt.csv")
bus = pd.read_csv("data/coords/coords_bus.csv")
primary = pd.read_csv("data/coords/coords_primary.csv")

In [2]:
#rename lat/long to that of hdb lat/long for clarity
hdb=hdb.rename(columns = {'lat':'hdb_lat'})
hdb=hdb.rename(columns = {'long':'hdb_long'})

In [3]:
#function for finding distance between 2 sets of lat/long
def dist(lat1, long1, lat2, long2):
    return int(haversine((lat1, long1), (lat2, long2), unit='m'))

In [4]:
def find_nearest_bus(lat, long):
    distances = bus.apply(lambda row: dist(lat, long, row['lat'], row['long']), axis=1)
    return bus.loc[distances.idxmin(), 'Interchange']


def find_nearest_mrt(lat, long):
    distances = mrt.apply(lambda row: dist(lat, long, row['lat'], row['long']), axis=1)
    return mrt.loc[distances.idxmin(), 'Station']


def find_nearest_school(lat, long):
    distances = primary.apply(lambda row: dist(lat, long, row['lat'], row['long']), axis=1)
    return primary.loc[distances.idxmin(), 'School']

In [5]:
#find nearest bus interchange
hdb['Interchange'] = hdb.apply(lambda row: find_nearest_bus(row['hdb_lat'], row['hdb_long']), axis=1)

In [6]:
#find nearest mrt/lrt station
hdb['Station'] = hdb.apply(lambda row: find_nearest_mrt(row['hdb_lat'], row['hdb_long']), axis=1)

In [7]:
#find nearest primary school
hdb['School'] = hdb.apply(lambda row: find_nearest_school(row['hdb_lat'], row['hdb_long']), axis=1)

In [8]:
#rename columns for clarity
merged = pd.merge(hdb, bus, how='left', on='Interchange')
#and rename the new lat/long accordingly
merged=merged.rename(columns = {'lat':'bus_lat'})
merged=merged.rename(columns = {'long':'bus_long'})

In [9]:
#rename columns for clarity
merged1 = pd.merge(merged, mrt, how='left', on='Station')
#and rename the new lat/long accordingly
merged1=merged1.rename(columns = {'lat':'mrt_lat'})
merged1=merged1.rename(columns = {'long':'mrt_long'})

In [10]:
#rename columns for clarity
merged2 = pd.merge(merged1, primary, how='left', on='School')
#and rename the new lat/long accordingly
merged2=merged2.rename(columns = {'lat':'sch_lat'})
merged2=merged2.rename(columns = {'long':'sch_long'})
#note: some locations have multiple schools such as childcare center

In [11]:
#rename column names for clarity
merged2=merged2.rename(columns = {'Interchange':'bus'})
merged2=merged2.rename(columns = {'Station':'mrt'})
merged2=merged2.rename(columns = {'School':'pri_sch'})
merged2.head(1)

Unnamed: 0,town,flat_type,block,street_name,storey_range,floor_area_sqm,flat_model,resale_price,year_sold,mth_sold,...,hdb_long,bus,mrt,pri_sch,bus_lat,bus_long,mrt_lat,mrt_long,sch_lat,sch_long
0,ANG MO KIO,4 ROOM,211,ANG MO KIO AVE 3,04 TO 06,81.0,NEW GENERATION,80000.0,1990,1,...,103.841667,Ang Mo Kio Bus Interchange,Mayflower MRT,ANG MO KIO PRIMARY SCHOOL,1.369519,103.848462,1.371463,103.836568,1.369322,103.839631


In [12]:
#calulate the distances
merged2['bus_dist'] = merged2.apply(lambda row: dist(row['hdb_lat'], row['hdb_long'],
                                                   row['bus_lat'], row['bus_long']), axis=1)
merged2['mrt_dist'] = merged2.apply(lambda row: dist(row['hdb_lat'], row['hdb_long'],
                                                   row['mrt_lat'], row['mrt_long']), axis=1)
merged2['pri_dist'] = merged2.apply(lambda row: dist(row['hdb_lat'], row['hdb_long'],
                                                   row['sch_lat'], row['sch_long']), axis=1)

In [13]:
merged2.to_csv("data/coords/coords_combine.csv", index=False)