## Install Packages

In [None]:
# Setup
!pip install -U pyTigerGraph googlemaps

## Add Imports and Establish Initial Connection

In [None]:
# Imports
import os
import pandas as pd
from datetime import datetime
from getpass import getpass
import json
import time

from google.colab import drive
import googlemaps
import pandas as pd
import pyTigerGraph as tg
import requests

drive.mount('/gdrive')

In [None]:
# Connection parameters
hostName = ''
userName = ''
password = ''
conn = tg.TigerGraphConnection(host=hostName, username=userName, password=password)

conn.graphname='TigyoreGraph'
secret = conn.createSecret()
authToken = conn.getToken(secret)
authToken = authToken[0]
conn = tg.TigerGraphConnection(host=hostName, graphname='TigyoreGraph', username=userName, password=password, apiToken=authToken)

print('Connected')

# Configurations
> Main Parameters / Constants to be maintained in this notebook :)

In [None]:
## Google Colab / API Keys
apikey = ''

## Main Directories
GDRIVE_DIR = "/gdrive/My Drive/Tigyore"
DATA_DIR = os.path.join(GDRIVE_DIR, 'data')


## My Files to be loaded
CLEANED_TRIPS_PATH = os.path.join(DATA_DIR, 'nyc_trips', 'cleaned_trips.csv')
RESULTS_PATH = os.path.join(DATA_DIR, 'nyc_trips','results.csv')
DIRECTIONS_JSON_PATH = os.path.join(DATA_DIR, 'nyc_trips', 'nyc_trips_data', 'nyc_trips_directions_response.json')
TRIPS_PROCESSED_PATH = os.path.join(DATA_DIR, 'nyc_trips', 'nyc_trips_data', 'trips_processed.csv')

## Documentation 
> Sample codes for reference

```python
# help(googlemaps.directions)
 directions_dict = get_sg_transit_directions('Tanah Merah Ferry Road', 'DUO Tower', '2022-04-16 14:00:00 +0800', API_KEY)
# directions_dict
 with open('/gdrive/My Drive/Tigyore/data/sample_directions_response.json', 'w') as f:
     f.write(json.dumps(directions_dict))
```

```python
# API_KEY = getpass('Enter Google API Key:')

# API_KEY[:5] + '...'
```

```python
# https://towardsdatascience.com/geocoding-singapore-coordinates-onemap-api-3e1542bf26f7
def get_sg_lat_lng_coordinates(address):
    req = requests.get('https://developers.onemap.sg/commonapi/search?searchVal='+address+'&returnGeom=Y&getAddrDetails=Y&pageNum=1')
    resultsdict = eval(req.text)
    if len(resultsdict['results'])>0:
        return resultsdict['results'][0]['LATITUDE'], resultsdict['results'][0]['LONGITUDE']
    else:
        pass
```

```python
# https://github.com/googlemaps/google-maps-services-python
def get_sg_transit_directions(origin: str, destination: str, departure_time: str, api_key: str) -> dict:
    gmaps = googlemaps.Client(key=api_key)

    origin = get_sg_lat_lng_coordinates(origin)
    destination = get_sg_lat_lng_coordinates(destination)

    departure_time = datetime.strptime(departure_time, '%Y-%m-%d %H:%M:%S %z')

    directions_result = gmaps.directions(origin,
                                         destination,
                                         mode='transit',
                                         language='en',
                                         region='sg',
                                         departure_time=departure_time)
    return directions_result
```

## Code Starts Here: Preprocessing Loaded Data

In [None]:
def get_nyc_transit_directions(origin: tuple, destination: tuple, departure_time: str, api_key: str) -> dict:
    gmaps = googlemaps.Client(key=api_key)

    #departure_time = datetime.strptime(departure_time, '%Y-%m-%d %H:%M:%S %z')
    departure_time = pd.to_datetime(departure_time)
    #departure_time = time.mktime(departure_time.timetuple())
    directions_result = gmaps.directions(origin,
                                         destination,
                                         mode='transit',
                                         language='en',
                                         region='us',
                                         departure_time=departure_time)
    return directions_result

### Loaded Data

In [None]:
trip_df = pd.read_csv(CLEANED_TRIPS_PATH)

In [None]:
trip_df.head()

In [None]:
trip_df.dtypes

## Warning: LONG FUNCTION TO RUN - DO NOT RERUN

In [None]:
# ## Returns a series
# directions_dct = trip_df.apply(
#     lambda x: get_nyc_transit_directions(
#         (x['pickup_latitude'],x['pickup_longitude']), 
#         (x['dropoff_latitude'],x['dropoff_longitude']), 
#         x['pickup_datetime'], 
#         apikey
#         )
#     , axis=1)

In [None]:
# len(directions_dct)

In [None]:
## Output with index for future disassembling
## Export to CSV
results_df = pd.DataFrame(directions_dct).reset_index()
results_df.to_csv(RESULTS_PATH,index=False)

## Export to JSON
directions_dct = results_df.set_index('index').to_dict()['results']
with open(DIRECTIONS_JSON_PATH, 'w') as f:
    f.write(json.dumps(directions_dct))

In [None]:
results_df 

# Post processing starts here :(


In [None]:
directions_dict = dict()
with open(DIRECTIONS_JSON_PATH, 'r') as f:
    directions_dict = json.loads(f.readlines()[0])

In [None]:
from pprint import pprint
pprint(directions_dict['0'])

In [None]:
# id, trip_seq_no, mode, type, from_lat_lng, to_lat_lng, from_name, to_name, start_time, end_time
# 1, 1, WALKING, "", (1.3148164, 103.9881745), (1.3148164, 103.9881745), AAA, BBB, ...
# 1, 2, TRANSIT, "35", (1.3148164, 103.9881745), (1.3148164, 103.9881745), BBB, CCC, ...
# 1, 3, WALKING, "", (1.3148164, 103.9881745), (1.3148164, 103.9881745), CCC, DDD, ...
# 1, 4, TRANSIT, "East West Line", (1.3148164, 103.9881745), (1.3148164, 103.9881745), DDD, EEE, ...
# ...

In [None]:
def process_all(dir_dict):
  for data_row in dir_dict:
    print (data_row)


In [None]:
for l in directions_dict[0]['legs'][0]['steps']:
    print(l)

In [None]:
data_row = '7'

In [None]:
directions_dict[data_row][0]['legs'][0]

In [None]:
# def each_data_point(data_row,dir_dict, pls_wrk_df):
#   print(data_row)
#   # edge cases
#   if len(dir_dict[data_row])==0:
#     return pls_wrk_df
  
#   legs = dir_dict[data_row][0]['legs'][0]

#   if all (k in legs for k in ('departure_time','arrival_time','start_address','end_address')) ==False:
#     return pls_wrk_df

#   else:
#     start_journey_time = legs['departure_time']['value'] #start of journey in seconds
#     end_journey_time = legs['arrival_time']['value'] #end of journey in seconds

#     start_journey_location = legs['start_address'] #start journey location
#     end_journey_location = legs['end_address'] #end journey location

#     total_seq_no = len(legs['steps'])
#     trip_seq_no = 1
#     last_location = start_journey_location # start with this first 
#     last_time = start_journey_time # start with this first

#     for moves in legs['steps']:
#       curr_dist = moves['distance']['value'] # distance in metres
#       curr_duration = moves['duration']['value'] # duration in seconds
#       curr_origin_coord = (moves['start_location']['lat'], moves['start_location']['lng']) # origin lat long
#       curr_destination_coord = (moves['end_location']['lat'], moves['end_location']['lng']) # destination lat long
#       curr_mode = moves['travel_mode'] # walk or transit?

#       if curr_mode == 'WALKING':
#         #location
#         from_loc = last_location # no choice this information is not available, the reason for tracking last_location
#         to_loc = moves['html_instructions'][8:] # hacky way of removing the word 'walk to ', at this level, it is 'walk to', not 'head to', 'turn at' etc
#         last_location = to_loc #updating

#         #time
#         start_time = last_time # no choice this information is not available, the reason for tracking last_time
#         end_time = start_time + curr_duration
#         last_time = end_time

#         #type:
#         curr_type = ""
#         curr_type_specific = ""
      
#       elif curr_mode == 'TRANSIT':
#         #location
#         from_loc = moves['transit_details']['departure_stop']['name'] #departure_stop
#         to_loc = moves['transit_details']['arrival_stop']['name'] #arrival_stop
#         last_location = to_loc #updating

#         #time
#         start_time = moves['transit_details']['departure_time']['value'] #departure_time: in seconds 
#         end_time = moves['transit_details']['arrival_time']['value'] #arrival_time: also in seconds
#         last_time = end_time

#         #type: 
#         curr_type = moves['transit_details']['line']['vehicle']['name']

#         if 'short_name' in moves['transit_details']['line']:
#           curr_type_specific = moves['transit_details']['line']['short_name']        
#         else:
#           curr_type_specific = moves['transit_details']['line']['name']
#       data_row = int(data_row)
#       this_row = [data_row, trip_seq_no, curr_mode, curr_type, curr_type_specific, curr_origin_coord, curr_destination_coord, from_loc, to_loc, start_time, end_time]
#       pls_wrk_df.loc[len(pls_wrk_df)] = this_row
#       trip_seq_no+=1
#     return pls_wrk_df




In [None]:
def each_data_point(data_row,dir_dict, pls_wrk_df):
  print(data_row)
  # edge cases
  if len(dir_dict[data_row])==0:
    return pls_wrk_df
  
  legs = dir_dict[data_row][0]['legs'][0]

  if all (k in legs for k in ('departure_time','arrival_time','start_address','end_address','start_location','end_location')) ==False:
    return pls_wrk_df

  else:
    start_journey_time = pd.to_datetime(int(legs['departure_time']['value']), utc=True, unit='s') #start of journey in seconds
    end_journey_time = pd.to_datetime(int(legs['arrival_time']['value']), utc=True, unit='s') #end of journey in seconds

    start_journey_location = legs['start_address'] #start journey location
    end_journey_location = legs['end_address'] #end journey location

    start_lat, start_long = legs['start_location']['lat'],legs['start_location']['lng']
    end_lat, end_long = legs['end_location']['lat'],legs['end_location']['lng']

    total_seq_no = len(legs['steps'])
    trip_seq_no = 1
    last_location = start_journey_location # start with this first 
    last_time = start_journey_time # start with this first

    for moves in legs['steps']:
      curr_dist = moves['distance']['value'] # distance in metres
      curr_duration = moves['duration']['value'] # duration in seconds
      curr_origin_lat,curr_origin_long = moves['start_location']['lat'], moves['start_location']['lng'] # origin lat long
      curr_destination_lat, curr_destination_long = moves['end_location']['lat'], moves['end_location']['lng'] # destination lat long
      curr_mode = moves['travel_mode'] # walk or transit?

      if curr_mode == 'WALKING':
        #location
        from_loc = last_location # no choice this information is not available, the reason for tracking last_location
        to_loc = moves['html_instructions'][8:] # hacky way of removing the word 'walk to ', at this level, it is 'walk to', not 'head to', 'turn at' etc
        last_location = to_loc #updating

        #time
        #start_time = last_time # no choice this information is not available, the reason for tracking last_time
        #end_time = start_time + curr_duration
        #last_time = end_time

        #type:
        curr_type = ""
        curr_type_specific = ""
      
      elif curr_mode == 'TRANSIT':
        #location
        from_loc = moves['transit_details']['departure_stop']['name'] #departure_stop
        to_loc = moves['transit_details']['arrival_stop']['name'] #arrival_stop
        last_location = to_loc #updating

        #time
        #start_time = moves['transit_details']['departure_time']['value'] #departure_time: in seconds 
        #end_time = moves['transit_details']['arrival_time']['value'] #arrival_time: also in seconds
        #last_time = end_time

        #type: 
        curr_type = moves['transit_details']['line']['vehicle']['name']

        if 'short_name' in moves['transit_details']['line']:
          curr_type_specific = moves['transit_details']['line']['short_name']        
        else:
          curr_type_specific = moves['transit_details']['line']['name']
      data_row = int(data_row)
      if from_loc == to_loc:
        continue
      else:
        this_row = [data_row, trip_seq_no, curr_mode, curr_type, curr_type_specific, curr_origin_lat, curr_origin_long, from_loc, start_journey_time, end_journey_time]
        pls_wrk_df.loc[len(pls_wrk_df)] = this_row
        trip_seq_no+=1
    # destination row  
    this_row = [data_row, trip_seq_no,"","","", end_lat, end_long, end_journey_location,start_journey_time, end_journey_time ]
    pls_wrk_df.loc[len(pls_wrk_df)] = this_row
    return pls_wrk_df


In [None]:
# just to test
keys_to_extract = ["5", "7" ,"11"]
a_subset = {key: directions_dict[key] for key in keys_to_extract}

In [None]:
def process_all(dir_dict):
  pls_work_df = pd.DataFrame(columns=['id', 'trip_seq_no', 'mode', 'type', 'service', 'lat', 'long',  'location','trip_start_time', 'trip_end_time'])
  for i in dir_dict:
    each_data_point(i,directions_dict,pls_work_df)
  return pls_work_df

process_all(a_subset)

In [None]:
trips_processed_df = process_all(directions_dict)

In [None]:
trips_processed_df

In [None]:
from datetime import datetime
from dateutil import tz

from_zone = tz.gettz("UTC")
to_zone = tz.gettz('America/New_York')

In [None]:
from_zone

In [None]:
trips_processed_df['trip_start_time']

In [None]:
trips_processed_df['trip_start_time_us'] = trips_processed_df['trip_start_time'].apply(lambda x: x.replace(tzinfo=from_zone))
trips_processed_df['trip_end_time_us'] = trips_processed_df['trip_end_time'].apply(lambda x: x.replace(tzinfo=from_zone))

In [None]:
trips_processed_df['trip_start_time_us'] = trips_processed_df['trip_start_time_us'].apply(lambda x: x.astimezone(to_zone))
trips_processed_df['trip_end_time_us'] = trips_processed_df['trip_end_time_us'].apply(lambda x: x.astimezone(to_zone))

In [None]:
trips_processed_df

In [None]:
trips_processed_df.to_csv(TRIPS_PROCESSED_PATH,index=False)

In [None]:
directions_dict['23345'][0]['legs'][0]#['steps']

In [None]:
# Create csv of all trips - refer to notebook 1
for l in directions_dict[0]['legs'][0]['steps']:
    print(l)

In [None]:
with open('/gdrive/My Drive/Tigyore/data/sample_directions_response.json', 'r') as f:
    print(f.readlines())

In [None]:
singapore_directions_dict = dict()
with open('/gdrive/My Drive/Tigyore/data/sample_directions_response.json', 'r') as f:
    singapore_directions_dict = json.loads(f.readlines()[0])

In [None]:
pprint(singapore_directions_dict[0]['legs'][0]['steps'])