# Streetcar Delay Prediction - Data Preparation Geocode Specific

Use dataset covering Toronto Transit Commission (TTC) streetcar delays 2014 - present to predict future delays and come up with recommendations for avoiding delays.

Source dataset: : https://www.toronto.ca/city-government/data-research-maps/open-data/open-data-catalogue/#e8f359f0-2f47-3058-bf64-6ec488de52da

This notebook contains the data preparation steps specific to mapping free-form location descriptions to latitude and longitude

- use the Google Maps API Web Services for Python  https://github.com/googlemaps/google-maps-services-python
- generate the latitude and longitude values for locations and create new columns in the output dataset

# Streetcar routes

From https://www.ttc.ca/Routes/Streetcars.jsp

<table style="border: none" align="left">
   </tr>
   <tr style="border: none">
       <th style="border: none"><img src="https://raw.githubusercontent.com/ryanmark1867/streetcarnov3/master/streetcar%20routes.jpg" width="600" alt="Icon"> </th>
   </tr>
</table>

In [1]:
! pwd

/storage/manning/notebooks


# Get path and load dataframe saved from previous data preparation step

In [2]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
# import seaborn as sns
import datetime
import os

remove_bad_values = False
city_name = 'Toronto'


In [3]:
# get the directory for that this notebook is in
rawpath = os.getcwd()
print("raw path is",rawpath)

raw path is /storage/manning/notebooks


In [4]:
# data is in a directory called "data" that is a sibling to the directory containing the notebook
path = os.path.abspath(os.path.join(rawpath, '..', 'data')) + "/"
print("path is", path)

path is /storage/manning/data/


In [5]:
# constants for the streetcar problem
# same values saved in data_preparation notebook: pickled_input_dataframe, pickled_output_dataframe
pickled_data_file = '2014_2018.pkl'
#pickled_dataframe = '2014_2018_df.pkl'
pickled_dataframe = '2014_2018_df_cleaned_keep_bad_loc_geocoded_apr23.pkl'
# pickled_output_dataframe = '2014_2018_df_cleaned_keep_bad_loc_geocoded_apr23.pkl'

In [6]:
file_name = path + pickled_dataframe
df = pd.read_pickle(file_name)
df.head()

Unnamed: 0,Report Date,Route,Time,Day,Location,Incident,Min Delay,Min Gap,Direction,Vehicle,Report Date Time,lat_long,latitude,longitude
0,2016-01-01,505,00:00:00,Friday,dundas west stationt to broadview station,General Delay,7.0,14.0,w,4028,2016-01-01 00:00:00,"[0.0, 0.0]",0.0,0.0
1,2016-01-01,511,02:14:00,Friday,fleet st. and strachan,Mechanical,10.0,20.0,e,4018,2016-01-01 02:14:00,"[43.6362976, -79.4096351]",43.636298,-79.409635
2,2016-01-01,301,02:22:00,Friday,queen st. west and roncesvalles,Mechanical,9.0,18.0,w,4201,2016-01-01 02:22:00,"[43.64533489999999, -79.4131843]",43.645335,-79.413184
3,2016-01-01,301,03:28:00,Friday,lake shore blvd. and superior st.,Mechanical,20.0,40.0,e,4251,2016-01-01 03:28:00,"[43.61496169999999, -79.4886581]",43.614962,-79.488658
4,2016-01-01,501,14:28:00,Friday,roncesvalles to neville park,Mechanical,6.0,12.0,e,4242,2016-01-01 14:28:00,"[0.0, 0.0]",0.0,0.0


In [7]:
df.shape

(69603, 14)

# Set up Folium
Visualize geocoded data using Folium https://github.com/python-visualization/folium

In [1]:
! pip install folium

Collecting folium
[?25l  Downloading https://files.pythonhosted.org/packages/43/77/0287320dc4fd86ae8847bab6c34b5ec370e836a79c7b0c16680a3d9fd770/folium-0.8.3-py2.py3-none-any.whl (87kB)
[K    100% |████████████████████████████████| 92kB 7.6MB/s ta 0:00:011
Collecting branca>=0.3.0 (from folium)
  Downloading https://files.pythonhosted.org/packages/63/36/1c93318e9653f4e414a2e0c3b98fc898b4970e939afeedeee6075dd3b703/branca-0.3.1-py3-none-any.whl
Installing collected packages: branca, folium
Successfully installed branca-0.3.1 folium-0.8.3


In [8]:
import folium


m = folium.Map(location=[43.636298, -79.409635])

In [9]:
m

In [11]:
import googlemaps
from datetime import datetime

# API key comes from https://console.developers.google.com/google/maps-apis/apis/geocoding-backend.googleapis.com/credentials?project=streetcardec2018&duration=PT1H
gmaps = googlemaps.Client(key='AIzaSyBeIBLutP16BmXmXkb14v8gOUYCh9xMhVI')

# Geocoding an address
geocode_result = gmaps.geocode('roncesvalles and longbranch, Toronto')

print("geocode result",geocode_result)

geocode result [{'address_components': [{'long_name': 'Roncesvalles Village', 'short_name': 'Roncesvalles Village', 'types': ['neighborhood', 'political']}, {'long_name': 'Old Toronto', 'short_name': 'Old Toronto', 'types': ['political', 'sublocality', 'sublocality_level_1']}, {'long_name': 'Toronto', 'short_name': 'Toronto', 'types': ['locality', 'political']}, {'long_name': 'Toronto Division', 'short_name': 'Toronto Division', 'types': ['administrative_area_level_2', 'political']}, {'long_name': 'Ontario', 'short_name': 'ON', 'types': ['administrative_area_level_1', 'political']}, {'long_name': 'Canada', 'short_name': 'CA', 'types': ['country', 'political']}], 'formatted_address': 'Roncesvalles Village, Toronto, ON, Canada', 'geometry': {'bounds': {'northeast': {'lat': 43.6568814, 'lng': -79.4367312}, 'southwest': {'lat': 43.638107, 'lng': -79.4599582}}, 'location': {'lat': 43.6484365, 'lng': -79.4506989}, 'location_type': 'APPROXIMATE', 'viewport': {'northeast': {'lat': 43.6568814, 

In [15]:
df.shape

(69603, 11)

In [27]:
# pickle the cleansed dataframe
# file_name = path + pickled_output_dataframe
# df_out.to_pickle(file_name)

In [28]:
# dfn = pd.read_pickle(file_name)
# dfn.head()

Unnamed: 0,Report Date,Route,Time,Day,Location,Incident,Min Delay,Min Gap,Direction,Vehicle,Report Date Time,lat_long,latitude,longitude
0,2016-01-01,505,00:00:00,Friday,dundas west stationt to broadview station,General Delay,7.0,14.0,w,4028,2016-01-01 00:00:00,"[0.0, 0.0]",0.0,0.0
1,2016-01-01,511,02:14:00,Friday,fleet st. and strachan,Mechanical,10.0,20.0,e,4018,2016-01-01 02:14:00,"[43.6362976, -79.4096351]",43.636298,-79.409635
2,2016-01-01,301,02:22:00,Friday,queen st. west and roncesvalles,Mechanical,9.0,18.0,w,4201,2016-01-01 02:22:00,"[43.64533489999999, -79.4131843]",43.645335,-79.413184
3,2016-01-01,301,03:28:00,Friday,lake shore blvd. and superior st.,Mechanical,20.0,40.0,e,4251,2016-01-01 03:28:00,"[43.61496169999999, -79.4886581]",43.614962,-79.488658
4,2016-01-01,501,14:28:00,Friday,roncesvalles to neville park,Mechanical,6.0,12.0,e,4242,2016-01-01 14:28:00,"[0.0, 0.0]",0.0,0.0


# Visualize cleaned data

In [None]:
!pip install pixiedust

In [None]:
import pixiedust

In [None]:
display(df)