# Where to live? Optimising commute time

--- 

In [4]:
import googlemaps
from datetime import datetime
from dateutil import parser
import numpy as np
import pandas as pd
import os
import geopy # pip install geopy

In [5]:
# store API key in the MacOS Keychain (optional)

import keyring

#keyring.set_password("googlemaps.api", "kmisiunas", "your_google_maps_API_key")
api_key = keyring.get_password("googlemaps.api", "kmisiunas")
# or
# api_key = "your_google_maps_API_key"

gmaps = googlemaps.Client(key=api_key)


In [3]:
def _parse_row_in_distance_matrix(x):
    ":return: [distance, time] and [NaN,NaN] if invalid"
    if x['elements'][0]['status'] == 'OK':
        return [ x['elements'][0]['distance']['value'], x['elements'][0]['duration']['value'] ] 
    else:
        return [np.nan, np.nan]

    
def _parse_distance_matrix(x):
    ":return: [[distance, time]...]"
    return list( map( _parse_row_in_distance_matrix, x['rows'] ))


def measure_times(origins, destinations, date = "2018-12-18"):
    """
    Measure distance and time for travelling in public transport to work
    
    Sample twice in the morning at 9am and 8:30am and report the shortest time. 
    This should remove some of the randomeness in wrong starting time.
    
    :param pos: startiong position for travel
    """
    times = ["09:00:00+01", "08:30:00+01"]
    
    res = []
    
    for t in times:
        distance = gmaps.distance_matrix(
            origins = origins,
            destinations = destinations,
            mode = "bicycling",  # "transit" or "bicycling"
            units = "metric",
            arrival_time = parser.parse(date + "T" + t),  
        )
        res.append(_parse_distance_matrix(distance))
        
    return np.apply_along_axis(arr=np.array(res), axis=0, func1d=np.min)
    
    

In [99]:
y = measure_times([test_loc, test_loc_2, test_loc_3], start_loc)

In [109]:
# testing

test_loc_1 = (47.3994233, 8.4818392)
test_loc_2 = (47.3894233, 8.4918392)
test_loc_3 = (47.504233, 8.4918392)

pd.DataFrame(
    measure_times_dict([test_loc_1, test_loc_2, test_loc_3], start_loc)
)

Unnamed: 0,distance,lat,lon,time
0,8530,8.481839,47.399423,42.333333
1,4738,8.491839,47.389423,27.1
2,23110,8.491839,47.504233,56.183333


---

# Create grid

In [4]:
def parse_coordinates(res):
    tmp = res[0]['geometry']['location']
    return (tmp['lat'], tmp['lng'])

job_loc = parse_coordinates( gmaps.geocode("Google Building 110, Brandschenkestrasse 110, 8002 Zürich") )
print("Job location:", job_loc)

Job location: (47.3653139, 8.524474699999999)


In [63]:
def _contains_not(df, lon, lat):
    return df.query('@lon - 0.0001 <= lon and @lon + 0.0001 >= lon and @lat - 0.0001 <= lat and @lat + 0.0001 >= lat').empty


def measure_times_dict(origins, destination):
    out = []
    for pos, dist_time in zip(origins, measure_times(origins, destination)):
        row = {"lon": pos[0], "lat": pos[1], "distance": dist_time[0], "time": dist_time[1]/60.0 }
        out.append( row )
    return out


def fill_values(destination, coordinates, file, filter_existing=True):
    df = None
    if os.path.isfile(file):
        df = pd.read_csv(file, index_col=0)
    else:
        df = pd.DataFrame(columns=['lon', 'lat', 'distance', 'time'])
    
    if filter_existing:
        coordinates = list(filter(lambda x: _contains_not(df, x[0], x[1]), coordinates))
        print("Elements to query:", len(coordinates))
    
    if len(coordinates) == 0:
        return df
    
    for i in range(int( np.ceil(len(coordinates)/60) )):
        range_min, range_max = i*60, min(len(coordinates), (i+1)*60)
        add = measure_times_dict(coordinates[range_min: range_max], destination)
        df = df.append( add )
        df.to_csv(file) # backup
        
    return df
    

In [207]:
l = 22
for i in range(int( np.ceil(l/20) )):
    range_min, range_max = i*20, min(l, (i+1)*20)
    print( list(range(l))[range_min: range_max] )
    

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
[20, 21]


In [213]:
# calculate the distance of 1 meter in lon/lat
# do not change it since want to avaoid small changes in positions 

earth_R = 6378137
dLon = 1.0/(earth_R * np.cos(job_loc[1] *np.pi/180)) * 180/np.pi
dLat = 1.0/earth_R * 180/np.pi
print("1 meter in lon is", dLon, ", and in lat is", dLat, "[deg/meter]")

def grid_gen(center, elements, spacing):
    assert elements[0] % 2 == 1 and elements[1] % 2 == 1
    out = []
    
    for j in range(-elements[0]//2+1 , elements[0]//2+1):
        for i in range(-elements[1]//2+1 , elements[1]//2+1):
            out.append( (center[0] + i*dLon*spacing, center[1] + j*dLat*spacing ) )
    
    return out


#tmp = grid_gen(job_loc, (21,21),  1000)
#pd.DataFrame(tmp).to_csv("/Users/kmisiunas/Downloads/test.csv")

1 meter in lon is 9.08350160565509e-06 , and in lat is 8.983152841195214e-06 [deg/meter]


In [222]:
# for public transport  

grid = grid_gen(job_loc, (41,41),  250)
grid = grid_gen( (47.383480903211314, 8.515491547158804), (41,41),  125)
data = fill_values(job_loc, grid, "where_to_live_for_google_zurich_public.csv")

Elements to query: 1224


  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)


In [225]:
# same for cycling

grid = grid_gen(job_loc, (41,41),  500)
#grid = grid_gen( (47.383480903211314, 8.515491547158804), (41,41),  125)
data = fill_values(job_loc, grid, "where_to_live_for_google_zurich_cycling.csv")

Elements to query: 1240


  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)


--- 

# Coming back home

In [82]:
# adaptation for comming back time

# different rows must be taken
def _parse_row_in_distance_matrix(x):
    ":return: [distance, time] and [NaN,NaN] if invalid"
    if x['status'] == 'OK':
        return [ x['distance']['value'], x['duration']['value'] ] 
    else:
        return [np.nan, np.nan]

    
def _parse_distance_matrix(x):
    ":return: [[distance, time]...]"
    return list( map( _parse_row_in_distance_matrix, x['rows'][0]["elements"] ))


def measure_times(origins, destinations, date = "2019-01-13"):
    """
    Measure distance and time for travelling in public transport to work
    
    Sample twice in the morning at 9am and 8:30am and report the shortest time. 
    This should remove some of the randomeness in wrong starting time.
    
    :param pos: startiong position for travel
    """
    times = ["23:40:00+01", "23:55:00+01"]
    
    res = []
    
    for t in times:
        distance = gmaps.distance_matrix(
            origins = destinations,
            destinations = origins,
            mode = "transit",  # "transit" or "bicycling"
            units = "metric",
            departure_time = parser.parse(date + "T" + t),  
        )
        res.append(_parse_distance_matrix(distance))
        
    return np.apply_along_axis(arr=np.array(res), axis=0, func1d=np.min)


In [84]:
city_center = parse_coordinates( gmaps.geocode("Zurich HB") )
print("city_center:", city_center)

grid = grid_gen( (47.378, 8.538), (55,41),  500)
data = fill_values(city_center, grid, "where_to_live_come_back_late.csv")

city_center: (47.37840449999999, 8.538403299999999)
Elements to query: 574


  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
