# 1: Download Data for Deadly Collision Analysis

The following file looks to download and convert the various files required for this study into dataframes for data cleaning and analysis.

In [1]:
# Import libraries
import os
import json
import pandas as pd
import seaborn as sns
from datetime import datetime
import requests
import geopandas as gpd

### 1.1 Downloading Deadly Collision Data

In [2]:
# Following code pulls dictionary (.json file) on motor collisions
motor_collisions = (requests.
                    get('https://ckan0.cf.opendata.inter.prod-toronto.ca/dataset/0b6d3a00-7de1-440b-b47c-7252fd13929f/resource/355d4464-eb3c-4780-af79-43dd533ae906/download/Motor%20Vehicle%20Collisions%20with%20KSI%20Data.geojson').
                    json()
                   )

In [3]:
# Initialize an empty list to store flattened dictionaries
flat_motor_collisions = []

# Iterate through the 'features' list in the JSON motor_collisions
for feature in motor_collisions['features']:
    # properties entail the characteristics of the crash
    properties = feature['properties']
    # geometry entails the location
    geometry = feature['geometry']
    flat_properties = properties.copy()
    flat_properties.update(geometry)
    flat_motor_collisions.append(flat_properties)
    
# Create a motor_collisions DataFrame from the flattened motor_collisions
motor_collisions_df = pd.DataFrame(flat_motor_collisions)

In [4]:
motor_collisions_df.head()

Unnamed: 0,_id,ACCNUM,YEAR,DATE,TIME,STREET1,STREET2,OFFSET,ROAD_CLASS,DISTRICT,...,REDLIGHT,ALCOHOL,DISABILITY,HOOD_158,NEIGHBOURHOOD_158,HOOD_140,NEIGHBOURHOOD_140,DIVISION,type,coordinates
0,1,892658,2006,2006-03-11,852,BLOOR ST W,DUNDAS ST W,,Major Arterial,Toronto and East York,...,,,,88,High Park North,88,High Park North (88),D11,MultiPoint,"[[-79.45249, 43.656345]]"
1,2,892658,2006,2006-03-11,852,BLOOR ST W,DUNDAS ST W,,Major Arterial,Toronto and East York,...,,,,88,High Park North,88,High Park North (88),D11,MultiPoint,"[[-79.45249, 43.656345]]"
2,3,892810,2006,2006-03-11,915,MORNINGSIDE AVE,SHEPPARD AVE E,,Major Arterial,Scarborough,...,Yes,,,146,Malvern East,132,Malvern (132),D42,MultiPoint,"[[-79.199786, 43.801943]]"
3,4,893184,2006,2006-01-01,236,WOODBINE AVE,O CONNOR DR,,Major Arterial,Toronto and East York,...,,Yes,,60,Woodbine-Lumsden,60,Woodbine-Lumsden (60),D55,MultiPoint,"[[-79.318797, 43.699595]]"
4,5,892810,2006,2006-03-11,915,MORNINGSIDE AVE,SHEPPARD AVE E,,Major Arterial,Scarborough,...,Yes,,,146,Malvern East,132,Malvern (132),D42,MultiPoint,"[[-79.199786, 43.801943]]"


### 1.2 Downloading Taffic and Intersection data
This section looks at downloading intersection and traffic volumes for the analysis. 

In [5]:
# Data that includes strictly intersection info
intersections = pd.read_csv('https://ckan0.cf.opendata.inter.prod-toronto.ca/dataset/traffic-volumes-at-intersections-for-all-modes/resource/b6b2cb6f-0528-48ef-b605-4b56e4ce49d6/download/locations.csv')
intersections.head()

Unnamed: 0,_id,location_id,location,lng,lat,centreline_type,centreline_id,px,latest_count_date
0,1,1146,ELLESMERE RD AT PARKINGTON CRES,-79.246254,43.773319,2.0,13446642.0,2296.0,2022-12-17
1,2,1981,YORK MILLS RD E/B & W/B TO DON VALLEY PKWY N/B,-79.334658,43.757336,1.0,440171.0,,2004-04-08
2,3,3468,PARKSIDE DR N/B S OF SPRING RD,-79.454442,43.640512,1.0,30010748.0,,2021-12-15
3,4,3925,RIPLEY AVE AT SOUTH KINGSWAY,-79.475274,43.63678,2.0,13468657.0,,2022-01-27
4,5,3926,BLOOR ST AT SOUTH KINGSWAY & RIVERVIEW GARDENS...,-79.485752,43.648312,2.0,13467247.0,334.0,2022-05-10


In [6]:
# Data from 2000 to 2009
vol_00to09 = pd.read_csv('https://ckan0.cf.opendata.inter.prod-toronto.ca/dataset/traffic-volumes-at-intersections-for-all-modes/resource/bb7554d9-cd5d-4fad-aa5b-97339a9018df/download/raw-data-2000-2009.csv')
vol_00to09.head()

Unnamed: 0,_id,count_id,count_date,location_id,location,lng,lat,centreline_type,centreline_id,px,...,ex_peds,wx_peds,nx_bike,sx_bike,ex_bike,wx_bike,nx_other,sx_other,ex_other,wx_other
0,1,8180,2000-01-18,4126,EGLINTON AVE AT PHARMACY AVE (PX 452),-79.297515,43.725651,2.0,13453978.0,452.0,...,7.0,17.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2,8180,2000-01-18,4126,EGLINTON AVE AT PHARMACY AVE (PX 452),-79.297515,43.725651,2.0,13453978.0,452.0,...,12.0,4.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,3,8180,2000-01-18,4126,EGLINTON AVE AT PHARMACY AVE (PX 452),-79.297515,43.725651,2.0,13453978.0,452.0,...,7.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,4,8180,2000-01-18,4126,EGLINTON AVE AT PHARMACY AVE (PX 452),-79.297515,43.725651,2.0,13453978.0,452.0,...,9.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,5,8180,2000-01-18,4126,EGLINTON AVE AT PHARMACY AVE (PX 452),-79.297515,43.725651,2.0,13453978.0,452.0,...,10.0,4.0,3.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0


In [7]:
vol_10to19 = pd.read_csv('https://ckan0.cf.opendata.inter.prod-toronto.ca/dataset/traffic-volumes-at-intersections-for-all-modes/resource/1f60c668-bb8e-4e1e-ac72-3c6558a03fea/download/raw-data-2010-2019.csv')
vol_10to19.head()

Unnamed: 0,_id,count_id,count_date,location_id,location,lng,lat,centreline_type,centreline_id,px,...,ex_peds,wx_peds,nx_bike,sx_bike,ex_bike,wx_bike,nx_other,sx_other,ex_other,wx_other
0,1,25081,2010-01-12,32187,LILLIAN ST AT SOUDAN AVE,-79.393259,43.705283,2.0,13457294.0,,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2,25081,2010-01-12,32187,LILLIAN ST AT SOUDAN AVE,-79.393259,43.705283,2.0,13457294.0,,...,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
2,3,25081,2010-01-12,32187,LILLIAN ST AT SOUDAN AVE,-79.393259,43.705283,2.0,13457294.0,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,4,25081,2010-01-12,32187,LILLIAN ST AT SOUDAN AVE,-79.393259,43.705283,2.0,13457294.0,,...,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
4,5,25081,2010-01-12,32187,LILLIAN ST AT SOUDAN AVE,-79.393259,43.705283,2.0,13457294.0,,...,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0


In [8]:
vol_20to29 = pd.read_csv('https://ckan0.cf.opendata.inter.prod-toronto.ca/dataset/traffic-volumes-at-intersections-for-all-modes/resource/71f08804-46ce-4a92-9e8f-9b0e67927ca6/download/raw-data-2020-2029.csv')
vol_20to29.head()

Unnamed: 0,_id,count_id,count_date,location_id,location,lng,lat,centreline_type,centreline_id,px,...,ex_peds,wx_peds,nx_bike,sx_bike,ex_bike,wx_bike,nx_other,sx_other,ex_other,wx_other
0,1,39337,2020-01-08,13060,BROADVIEW AVE AT ERINDALE AVE,-79.358652,43.677521,2,13462138,,...,60,0,4,0,0,0,0,0,0,0
1,2,39337,2020-01-08,13060,BROADVIEW AVE AT ERINDALE AVE,-79.358652,43.677521,2,13462138,,...,54,0,3,0,0,0,0,0,0,0
2,3,39337,2020-01-08,13060,BROADVIEW AVE AT ERINDALE AVE,-79.358652,43.677521,2,13462138,,...,86,0,2,0,0,0,0,0,0,0
3,4,39337,2020-01-08,13060,BROADVIEW AVE AT ERINDALE AVE,-79.358652,43.677521,2,13462138,,...,105,0,2,1,0,0,0,0,0,0
4,5,39337,2020-01-08,13060,BROADVIEW AVE AT ERINDALE AVE,-79.358652,43.677521,2,13462138,,...,104,0,1,2,0,0,0,0,0,0


## 2: Convert all dataframes into geodataframes
This conversion is done by acquiring the longitude and latitude of each dataframe and creating a point geometry columns using geopandas. Every single dataframe has a lon and lat with exception to the deadly collisions which has a geometry column (though this does not function as a geometry column), and as a result, some work has to be done to create the lon and latitude columns.

In [9]:
# The geometry column of the motor collisionss df contains a set of arrays embedded in each other. As a result, by
# repeatedly indexing the array, you can eventually extract the value itself (hence the 2 [0] followed by a [0] or [1]).
motor_collisions_df['lon'] = motor_collisions_df['coordinates'][0][0][0]
motor_collisions_df['lat'] = motor_collisions_df['coordinates'][0][0][1]
motor_collisions_df.head()

Unnamed: 0,_id,ACCNUM,YEAR,DATE,TIME,STREET1,STREET2,OFFSET,ROAD_CLASS,DISTRICT,...,DISABILITY,HOOD_158,NEIGHBOURHOOD_158,HOOD_140,NEIGHBOURHOOD_140,DIVISION,type,coordinates,lon,lat
0,1,892658,2006,2006-03-11,852,BLOOR ST W,DUNDAS ST W,,Major Arterial,Toronto and East York,...,,88,High Park North,88,High Park North (88),D11,MultiPoint,"[[-79.45249, 43.656345]]",-79.45249,43.656345
1,2,892658,2006,2006-03-11,852,BLOOR ST W,DUNDAS ST W,,Major Arterial,Toronto and East York,...,,88,High Park North,88,High Park North (88),D11,MultiPoint,"[[-79.45249, 43.656345]]",-79.45249,43.656345
2,3,892810,2006,2006-03-11,915,MORNINGSIDE AVE,SHEPPARD AVE E,,Major Arterial,Scarborough,...,,146,Malvern East,132,Malvern (132),D42,MultiPoint,"[[-79.199786, 43.801943]]",-79.45249,43.656345
3,4,893184,2006,2006-01-01,236,WOODBINE AVE,O CONNOR DR,,Major Arterial,Toronto and East York,...,,60,Woodbine-Lumsden,60,Woodbine-Lumsden (60),D55,MultiPoint,"[[-79.318797, 43.699595]]",-79.45249,43.656345
4,5,892810,2006,2006-03-11,915,MORNINGSIDE AVE,SHEPPARD AVE E,,Major Arterial,Scarborough,...,,146,Malvern East,132,Malvern (132),D42,MultiPoint,"[[-79.199786, 43.801943]]",-79.45249,43.656345


In [10]:
# We turn the df into a gdf so that it has geometry.
motor_collisions_gdf = gpd.GeoDataFrame(motor_collisions_df, 
                                          geometry = gpd.points_from_xy(motor_collisions_df['lon'], 
                                                                        motor_collisions_df['lat'])
                                       )
motor_collisions_gdf.head()

Unnamed: 0,_id,ACCNUM,YEAR,DATE,TIME,STREET1,STREET2,OFFSET,ROAD_CLASS,DISTRICT,...,HOOD_158,NEIGHBOURHOOD_158,HOOD_140,NEIGHBOURHOOD_140,DIVISION,type,coordinates,lon,lat,geometry
0,1,892658,2006,2006-03-11,852,BLOOR ST W,DUNDAS ST W,,Major Arterial,Toronto and East York,...,88,High Park North,88,High Park North (88),D11,MultiPoint,"[[-79.45249, 43.656345]]",-79.45249,43.656345,POINT (-79.45249 43.65635)
1,2,892658,2006,2006-03-11,852,BLOOR ST W,DUNDAS ST W,,Major Arterial,Toronto and East York,...,88,High Park North,88,High Park North (88),D11,MultiPoint,"[[-79.45249, 43.656345]]",-79.45249,43.656345,POINT (-79.45249 43.65635)
2,3,892810,2006,2006-03-11,915,MORNINGSIDE AVE,SHEPPARD AVE E,,Major Arterial,Scarborough,...,146,Malvern East,132,Malvern (132),D42,MultiPoint,"[[-79.199786, 43.801943]]",-79.45249,43.656345,POINT (-79.45249 43.65635)
3,4,893184,2006,2006-01-01,236,WOODBINE AVE,O CONNOR DR,,Major Arterial,Toronto and East York,...,60,Woodbine-Lumsden,60,Woodbine-Lumsden (60),D55,MultiPoint,"[[-79.318797, 43.699595]]",-79.45249,43.656345,POINT (-79.45249 43.65635)
4,5,892810,2006,2006-03-11,915,MORNINGSIDE AVE,SHEPPARD AVE E,,Major Arterial,Scarborough,...,146,Malvern East,132,Malvern (132),D42,MultiPoint,"[[-79.199786, 43.801943]]",-79.45249,43.656345,POINT (-79.45249 43.65635)


In [11]:
# Doing this check finds that there is no coordinate system assigned, so we can pick one to assign it.
print(motor_collisions_gdf.crs)

None
