In [123]:
import googlemaps
from landtransportsg import PublicTransport
import pandas as pd
import numpy as np
from math import radians
from sklearn.neighbors import BallTree
import pickle

In [2]:
datamall = PublicTransport('XXXX')
gmaps = googlemaps.Client(key='XXXX')

In [3]:
def get_lat_lng_from_address(gmaps_client, address, region='SG'):
    gc = gmaps_client.geocode(address=address, region = region)
    if gc:
        return tuple(gc[0]['geometry']['location'].values())
    else:
        return 0,0

In [4]:
def get_commute_time_between(gmaps_client, origin, destination, mode='transit'):
    response = gmaps_client.distance_matrix([origin], [destination], mode = mode)
    return round(response['rows'][0]['elements'][0]['duration']['value']/60)

In [5]:
def get_train_station_coords():
    trains = pd.read_csv('mrt_lrt_data.csv')
    return trains[['lat', 'lng']].to_numpy()

In [6]:
def get_malls_coords():
    malls = pd.read_csv('malls.csv')
    return malls[['latitude', 'longitude']].to_numpy()

In [7]:
def get_bus_stop_station_coords(datamall):
    bus_stops = datamall.bus_stops()
    bus = pd.DataFrame(bus_stops)
    return bus[['Latitude', 'Longitude']].to_numpy()

In [31]:
def get_parks_coords():
    parks = pd.read_csv('parks.csv')
    parks['coords'] = parks.apply(lambda row: get_lat_lng_from_address(gmaps, row['Name']), axis=1)
    return np.array([[x[0], x[1]] for x in parks['coords'] if x!=(0,0)])

In [8]:
def get_nearest_point_from(ball_tree, destinations_list, query, k=1):
    query_in_rad = np.radians(origin).reshape(1, -1)
    index = ball_tree.query(query_in_rad, k=k, return_distance=False)
    return destinations_list[index].squeeze()

In [60]:
def get_commute_time_to_nearest(gmaps_client, POI, origin, mode='transit', k=1):
    ball_tree = POI[1]
    destinations_list = POI[0]
    destinations = get_nearest_point_from(ball_tree, destinations_list, origin, k=k)
    return get_commute_time_between(gmaps_client, origin, destinations, mode=mode)

In [42]:
def get_offices(gmaps_client, list_of_addresses):
    coords = []
    for address in list_of_addresses:
        coords.append(list([*get_lat_lng_from_address(gmaps_client, address)]))
    return np.array(coords)

In [39]:
offices_and_commercial_centres = ['Raffles Place MRT','Marina Bay MRT','Tanjong Pagar MRT','Anson Road','Orchard Road Area','Shenton Way Area','Suntec City','River Valley','North Bridge Road','Beach Road','Cecil Street']


In [44]:
train_stations = get_train_station_coords()
malls = get_malls_coords()
bus_stops = get_bus_stop_station_coords(datamall)
parks = get_parks_coords()
offices = get_offices(gmaps, offices_and_commercial_centres)

In [45]:
bt_trains = BallTree(np.radians(train_stations), metric='haversine')
bt_buses = BallTree(np.radians(bus_stops), metric='haversine')
bt_malls = BallTree(np.radians(malls), metric='haversine')
bt_parks = BallTree(np.radians(parks), metric='haversine')
bt_offices = BallTree(np.radians(offices), metric='haversine')

In [57]:
points_of_interest = {}
points_of_interest['mrt'] = (train_stations, bt_trains)
points_of_interest['bus'] = (bus_stops, bt_buses)
points_of_interest['malls'] = (malls, bt_malls)
points_of_interest['parks'] = (parks, bt_parks)
points_of_interest['offices'] = (offices, bt_offices)

In [63]:
def get_livability_score(gmaps_client, points_of_interest, origin):
    mrt_time = get_commute_time_to_nearest(gmaps_client, points_of_interest['mrt'], origin, mode='walking')
    bus_time = get_commute_time_to_nearest(gmaps_client, points_of_interest['bus'], origin, mode='walking')
    malls_time = get_commute_time_to_nearest(gmaps_client, points_of_interest['malls'], origin, mode='walking')
    parks_time = get_commute_time_to_nearest(gmaps_client, points_of_interest['parks'], origin, mode='walking')
    office_time = get_commute_time_to_nearest(gmaps_client, points_of_interest['offices'], origin, mode='transit')
    
    livability_score = 5/mrt_time + 5/bus_time + 10/malls_time + 10/parks_time + 30/office_time
    return livability_score
    

In [95]:
livability_scores = []

In [137]:
training_data = pd.read_csv('../data/train_encoded_task3.csv')

In [94]:
def populate_livability_scores():
    for index, row in training_data.iterrows():
        origin = (row['lat'], row['lng'])
        try:
            score = get_livability_score(gmaps, points_of_interest, origin)
            livability_scores.append({origin: score})
            print(origin + ": " + score)
        except Exception:
            livability_scores.append({origin: 0})
        

In [97]:
populate_livability_scores()

In [125]:
with open('livability.obj', 'wb') as fp:
    pickle.dump(livability_scores, fp)

In [107]:
livability_dict = {}

In [112]:
for row in livability_scores:
    for key, value in row.items():
        if value!=0:
            livability_dict[(key[0], key[1])] = value
        

In [111]:
with open('livability_dict.obj', 'wb') as fp:
    pickle.dump(livability_dict, fp)

In [128]:
livability_dict

{(1.41439935, 103.83719568172816): 0.7662173049612184,
 (1.3725968000000002, 103.87562460126242): 0.9595645330672553,
 (1.2987726, 103.895798): 1.9139465195966743,
 (1.3123637, 103.80327091227252): 0.9774553244683727,
 (1.2739587, 103.84363531310518): 1.5777980466985606,
 (1.3393381, 103.7638932): 0.7923969305548253,
 (1.31064045, 103.85214926006608): 1.851291879848856,
 (1.3423402, 103.784489): 0.9450376430619664,
 (1.3155347, 103.825684432334): 1.4614233118906017,
 (1.310429, 103.80282102340084): 0.8879819478124136,
 (1.3221528, 103.94522262523984): 0.9034953997112861,
 (1.32970335, 103.90568292831622): 1.1810809353962322,
 (1.36432355, 103.83166833259988): 1.0049748149289917,
 (1.29988105, 103.88811428633838): 1.542469125129791,
 (1.3816301, 103.7421019293336): 0.6855027258800719,
 (1.328739, 103.84453365993072): 1.870216239540389,
 (1.3129331, 103.85686967571708): 1.855601946413553,
 (1.277361, 103.84933771170438): 1.5883863988869973,
 (1.3737939, 103.84901393695192): 1.14359823404

In [131]:
train_data = pd.read_csv('../data/train_encoded_task3.csv')

In [134]:
train_data['livability_score'] = train_data.apply(lambda row: livability_dict.get((row['lat'],row['lng']), 0), axis=1)

In [140]:
# min max normalize - min is 0
train_data['livability_score'] = 5* (train_data['livability_score'] / train_data['livability_score'].max()) 

In [143]:
train_data.to_csv('../data/train_encoded_task3.csv', index=False)

In [144]:
train_data['livability_score'].max()

5.0