### Todo:
- function to get arrival time for upcoming week in decimal days from arrivalTime object 
    - Currently this will break if used in a different time zone
- Reverse Geocode, regeocode location information?
    - Currently if a point is in a body of water, it is ignored. Maybe I should just get the closest land point

# Estimating Transit Times
## Written by Stephen Scherrer on 6 April 2020

The following notebook queries google's transit API and returns the estimated transit time for each address to the point of interest.

In [23]:
import pandas as pd
import os
import time
from dotenv import load_dotenv, find_dotenv
import requests
import google.transit.gtfs_realtime_pb2 as gtfs_realtime_pb2
import requests
import simplejson
import urllib.parse 
import urllib.request
import datetime



In [24]:
#### Workspace Setup
## Establish Directory Heirarchy
projDir = os.path.dirname(os.getcwd())
dataDir = os.path.join(projDir, 'Data')

## Load API keys as environment variables
# load_dotenv(os.path.join(projDir, '/API Keys.env'))
googleApiKey = pd.read_csv('/Users/stephenscherrer/Desktop/API Keys.csv', header = None).loc[0,1]
# print(googleApiKey)

In [25]:
#### Defining Utility Functions
def next_weekday(d, weekday):
    ## Get current date as datetime.datetime object
    d = datetime.datetime(datetime.datetime.now().year, datetime.datetime.now().month, datetime.datetime.now().day)
    ## Find the nearest weekday
    days_ahead = weekday - d.weekday()
    ## If today isn't that weekday
    if days_ahead <= 0: # Target day already happened this week
        days_ahead += 7
    ## Get epoch time for start of selected next weekday
    nextWkDay = int((d + datetime.timedelta(days_ahead)).strftime('%s'))
    ## Add the arrival time (specified as hours past gmt)
    nextWkDay += arrivalTime * 60 * 60
    return (nextWkDay)


In [26]:
#### Defining global parameters
### Destination Address
destCoord = [37.7911, -122.3961] # array[Lat, Lon] in Decimal Degrees
arrivalTime = 10 # float hours in local time of computer. Specifies the desired time of arrival for transit directions
trafficModel = 'best_guess' # valid options include 'best_guess', 'pessimistic', 'optomistic'. See traffic_model in https://developers.google.com/maps/documentation/directions/intro#TravelModes

In [27]:
#### Loading Data
latLonDF = pd.read_csv(os.path.join(dataDir, 'interim/lat_lon_address_grid.csv'), index_col = 0)
latLonDF['cost'] = np.nan
latLonDF['time'] = np.nan

In [28]:
### Requesting Transit Info from Google's API
## Only get details when those details are missing
# np.where(np.isnan(latLonDF.))

for i in range(len(latLonDF['lat'])):
    ## Specifying API call parameters
    origin = 'origin=' + str(round(latLonDF.loc[i, 'lat'], 5)) + ',' + str(round(latLonDF.loc[i, 'lon'], 5))
    destination = '&destination=' + str(round(destCoord[0], 5)) + ',' + str(round(destCoord[1], 5))
    key = '&key=' + googleApiKey
    mode = '&mode=' + 'transit'
    arrival_time =  '&arrival_time=' + str(next_weekday(datetime.datetime.now(), 0)) # 0 = Monday, 1 = Tuesday, 2 = Wednesday...6 = Sunday
    traffic_model = '&traffic_model=' + str(trafficModel)

    ## Writing API Call URL
    parameters = origin + destination + mode + traffic_model + arrival_time
    url = 'https://maps.googleapis.com/maps/api/directions/json?' + parameters + key
    
    ## Get transit cost and time for a given location
    results = simplejson.load(urllib.request.urlopen(url))
    if(len(results['routes']) > 0):
        ## Get transit cost (Note if whole thing was a walk, then there is no cost so there is no 'fair' in the route)
        try:
            latLonDF.loc[i, 'cost'] = results['routes'][0]['fare']['value'] # in dolars
        except:
            None
        latLonDF.loc[i, 'time'] = results['routes'][0]['legs'][0]['duration']['value'] / 60 # in minutes

In [29]:
print(latLonDF)
print(latLonDF.shape)

            lat         lon                address  cost        time
0     37.656933 -122.530267         California, us   NaN         NaN
1     37.656933 -122.524304         California, us   NaN         NaN
2     37.656933 -122.518341         California, us   NaN         NaN
3     37.656933 -122.512378  California, 94015, us   NaN         NaN
4     37.656933 -122.506415  California, 94015, us  5.25   85.033333
...         ...         ...                    ...   ...         ...
2204  37.925267 -122.285785                    NaN   NaN   86.100000
2205  37.925267 -122.279822                    NaN   NaN  124.583333
2206  37.925267 -122.273859                    NaN   NaN         NaN
2207  37.925267 -122.267896                    NaN   NaN         NaN
2208  37.925267 -122.261933                    NaN   NaN         NaN

[2209 rows x 5 columns]
(2209, 5)


In [30]:
### Save dataframe as intermediate Data
latLonDF.to_csv(os.path.join(dataDir, 'interim/lat_lon_cost_time.csv'))