# Google Distance Matrix API is a service that provides travel distance and time for a matrix of origins and destinations, based on the recommended route between start and end points as calculated by the Google Maps API. 

# The returned API information consists of rows containing duration and distance values for each pair.

# Bear in mind that the API has limited free quota to use. For details, please refer https://developers.google.com/maps/documentation/distance-matrix/usage-and-billing

In [1]:
import googlemaps
import pandas as pd
from datetime import datetime

In [2]:
#Each Google Maps Web Service request requires an API key
api_key = 'AIzaSyCXP5Y0nQDp9_ZRlypxuce4GUiSyM5CnMQ'
gmaps = googlemaps.Client(key=api_key)

from googlemaps import convert
from googlemaps.convert import as_list

def distance_matrix(client, origins, destinations,
                    mode=None, language=None, avoid=None, units=None,
                    departure_time=None, arrival_time=None, transit_mode=None,
                    transit_routing_preference=None, traffic_model=None, region=None):

        """ Gets travel distance and time for a matrix of origins and destinations.
        
    :param origins: One or more locations and/or latitude/longitude values,
        from which to calculate distance and time. If you pass an address as
        a string, the service will geocode the string and convert it to a
        latitude/longitude coordinate to calculate directions.
    :type origins: a single location, or a list of locations, where a
        location is a string, dict, list, or tuple
    :param destinations: One or more addresses and/or lat/lng values, to
        which to calculate distance and time. If you pass an address as a
        string, the service will geocode the string and convert it to a
        latitude/longitude coordinate to calculate directions.
    :type destinations: a single location, or a list of locations, where a
        location is a string, dict, list, or tuple
    :param mode: Specifies the mode of transport to use when calculating
        directions. Valid values are "driving", "walking", "transit" or
        "bicycling".
    :type mode: string
    :param language: The language in which to return results.
    :type language: string
    :param avoid: Indicates that the calculated route(s) should avoid the
        indicated features. Valid values are "tolls", "highways" or "ferries".
    :type avoid: string
    :param units: Specifies the unit system to use when displaying results.
        Valid values are "metric" or "imperial".
    :type units: string
    :param departure_time: Specifies the desired time of departure.
    :type departure_time: int or datetime.datetime
    :param arrival_time: Specifies the desired time of arrival for transit
        directions. Note: you can't specify both departure_time and
        arrival_time.
    :type arrival_time: int or datetime.datetime
    :param transit_mode: Specifies one or more preferred modes of transit.
        This parameter may only be specified for requests where the mode is
        transit. Valid values are "bus", "subway", "train", "tram", "rail".
        "rail" is equivalent to ["train", "tram", "subway"].
    :type transit_mode: string or list of strings
    :param transit_routing_preference: Specifies preferences for transit
        requests. Valid values are "less_walking" or "fewer_transfers".
    :type transit_routing_preference: string
    :param traffic_model: Specifies the predictive travel time model to use.
        Valid values are "best_guess" or "optimistic" or "pessimistic".
        The traffic_model parameter may only be specified for requests where
        the travel mode is driving, and where the request includes a
        departure_time.
    :param region: Specifies the prefered region the geocoder should search
        first, but it will not restrict the results to only this region. Valid
        values are a ccTLD code.
    :type region: string
    :rtype: matrix of distances. Results are returned in rows, each row
        containing one origin paired with each destination.
    """
    
    params = {
        "origins": convert.location_list(origins),
        "destinations": convert.location_list(destinations)
    }

    if mode:
        if mode not in ["driving", "walking", "bicycling", "transit"]:
            raise ValueError("Invalid travel mode.")
        params["mode"] = mode

    if language:
        params["language"] = language

    if avoid:
        if avoid not in ["tolls", "highways", "ferries"]:
            raise ValueError("Invalid route restriction.")
        params["avoid"] = avoid

    if units:
        params["units"] = units

    if departure_time:
        params["departure_time"] = convert.time(departure_time)

    if arrival_time:
        params["arrival_time"] = convert.time(arrival_time)

    if departure_time and arrival_time:
        raise ValueError("Should not specify both departure_time and"
                         "arrival_time.")

    if transit_mode:
        params["transit_mode"] = convert.join_list("|", transit_mode)

    if transit_routing_preference:
        params["transit_routing_preference"] = transit_routing_preference

    if traffic_model:
        params["traffic_model"] = traffic_model

    if region:
        params["region"] = region

    return client._request("/maps/api/distancematrix/json", params)

### After data cleansing, there are still around 11K Airbnb apartments on the list. To avoid making high volumes of request to loop each MTR station per listing, Haversine Formula is used to calculate earth's surface distance for each pair of listing coordinate in order to get intuition of the nearest MTR station. 

### After that, all we need to do with Google Distance Matrix API is to input each pair of origin (p.s. approximate coordinate of Airbnb listing, measurement error up to 150m) and destination (the nearest MTR station) coordinates to fetch the walking distance and duration between two points.

In [1]:
listing = pd.read_csv('mtr mapping (prelim).csv')

#LatLongA: concatenate latitude and longitude of Airbnb listing
#LatLongB: name of the nearest MTR station (string)
ListingID,LatLongA,LatLongB = listing['ListingID'],listing['LatLongA'],listing['LatLongB']

NameError: name 'pd' is not defined

In [4]:
%%time

j = 0
distance_table = []

while j <= len(ListingID):   
    #origin = ''
    #destin = ''
    #dist = ''
    #duration = ''
    
    try:
        now = datetime.now()
        result = distance_matrix(gmaps,LatLongA[j],LatLongB[j],mode="walking", avoid="ferries")
        
        if result['rows'][0]['elements'][0]['status'] == 'ZERO_RESULTS':
            pass

        else:
            origin = LatLongA[j]
            destin = LatLongB[j]
            dist = result['rows'][0]['elements'][0]['distance']['text']
            duration = result['rows'][0]['elements'][0]['duration']['text']
            
        distance_table.append([ListingID[j], origin, destin, dist, duration])
        j+=1
        
    except:
        print("Input listings: " + str(len(listing)))
        print("Total fetched {} listings with their walking distance and duration".format(len(distance_table)))
        print("Listings which may not access to MTR on foot: {}".format(len(listing) - len(distance_table)))
        break

Input listings: 4544
Total fetched 4544 listings with their walking distance and duration
Listings which may not access to MTR on foot: 0
Wall time: 11min 24s


In [5]:
#Create dataframe to save the result
df = pd.DataFrame(distance_table, columns =['ListingID','Origin','Destination','Distance','Walking time']) 
df

Unnamed: 0,ListingID,Origin,Destination,Distance,Walking time
0,69074,"22.28352,114.15018","Sheung Wan Station, Hong Kong",0.5 km,7 mins
1,103760,"22.28407,114.1557","Central Station, Hong Kong",0.4 km,4 mins
2,132773,"22.28868,114.14494","Sai Ying Pun Station, Hong Kong",0.1 km,2 mins
3,133390,"22.28343,114.15539","Central Station, Hong Kong",0.4 km,4 mins
4,163214,"22.28494,114.15251","Sheung Wan Station, Hong Kong",0.2 km,3 mins
5,163664,"22.28651,114.14874","Sheung Wan Station, Hong Kong",0.4 km,5 mins
6,163742,"22.28694,114.14855","Sheung Wan Station, Hong Kong",0.4 km,5 mins
7,228510,"22.30983,114.16911","Yau Ma Tei Station, Hong Kong",0.4 km,5 mins
8,239788,"22.45758,114.0059","Tin Shui Wai Station, Hong Kong",1.2 km,16 mins
9,248140,"22.28291,114.15137","Sheung Wan Station, Hong Kong",0.6 km,8 mins


In [6]:
df.to_excel('mtr mapping (final)' + '.xlsx', sheet_name = 'mtr', index = False)