In [1]:
# Our key should be the ward name to connect various data together

# Red Light Camera has Ward Number, Ward Name, Intersection Street 1 and Intersection Street 2
# https://open.toronto.ca/dataset/red-light-cameras/
# Useful Columns: INTERSECTION_ID, Ward Num, WARD_1, WARD_2, WARD_3, District, LINEAR_NAME_FULL_1, LINEAR_NAME_FULL_2, ACTIVATION_DATE

# Traffic Volume has Intersection Street 1 and Intersection Street 2 which can be used to connect to the red light camera data
# https://open.toronto.ca/dataset/traffic-signal-vehicle-and-pedestrian-volumes/
# Useful Columns: Main, Side 1 Route, Activation Date, Latitude, Longitude, Count Date, 8 Peak Hr Vehicle Volume, 8 Peak Hr Pedestrian Volume 

# Motor Vehicle Collision data has Neightbourhood and Wardnum, which can be used as a dictionary to translate neighbourhood to wards. Addiitonally, this data set 
# has lot more information on collision and may be useful to use this data set. 
# https://open.toronto.ca/dataset/motor-vehicle-collisions-involving-killed-or-seriously-injured-persons/
# Useful Columns (assuming we are only using this data to connect two dataset): NEIGHBOURHOOD, DISTRICT, WARDNUM

# Traffic Collision data has Neighbourhoods which can be translated to ward and hence connect to the red light and traffic volume dataset. 
# https://open.toronto.ca/dataset/police-annual-statistical-report-traffic-collisions/
# Useful Columns: OccurrenceDate, Month, Day_of_Week,Year, Hour, Fatalities, Injury_Collisions, FTR_Collisions, PD_Collisions, Neighbourhood

In [2]:
import pandas as pd
import re

In [3]:
# function definition
def convert_dtype(x):
    if not x:
        return ''
    try:
        return str(x)   
    except:        
        return ''
    
def ward_rename(ward):
    # [0] for numeric value
    # [1] for name
    # convert str to int
    return int(ward.split(",")[0])

In [4]:
# Red Light Camera
df_redLight = pd.read_csv('Red Light Cameras Data - 4326.csv')
dfs_redLight = df_redLight[["INTERSECTION_ID", "WARD_1","WARD_2","WARD_3","DISTRICT","LINEAR_NAME_FULL_1","LINEAR_NAME_FULL_2","ACTIVATION_DATE"]]
dfs_redLight

Unnamed: 0,INTERSECTION_ID,WARD_1,WARD_2,WARD_3,DISTRICT,LINEAR_NAME_FULL_1,LINEAR_NAME_FULL_2,ACTIVATION_DATE
0,13465959.0,Toronto Centre(13),,,Toronto and East York,Richmond St E,Parliament St,2007-11-09T05:00:00
1,13467993.0,Spadina-Fort York(10),,,Toronto and East York,Lake Shore Blvd W,York St,2007-11-09T05:00:00
2,13444656.0,York Centre(6),,,North York,Steeles Ave W,Carpenter Rd,2007-11-09T05:00:00
3,13444138.0,Willowdale(18),,,North York,Steeles Ave W,Hilda Ave,2007-11-09T05:00:00
4,13451893.0,Etobicoke North(1),,,Etobicoke York,Albion Rd,Silverstone Dr,2007-11-09T05:00:00
...,...,...,...,...,...,...,...,...
213,,Toronto-Danforth(14),,,Toronto and East York,,,2022-03-17T04:00:00
214,13442843.0,Scarborough-Rouge Park(25),,,Scarborough,Meadowvale Rd,Dean Park Rd,2022-03-30T04:00:00
215,13445257.0,Scarborough-Agincourt(22),,,Scarborough,Warden Ave,Huntingwood Dr,2022-03-17T04:00:00
216,13467463.0,Etobicoke-Lakeshore(3),,,Etobicoke York,Bloor St W,Royal York Rd,2022-05-20T04:00:00


In [5]:
# Traffic Volume
df_trafficVolume = pd.read_excel('traffic-signal-vehicle-and-pedestrian-volumes-data.xlsx')
dfs_trafficVolume = df_trafficVolume[["Main", "Side 1 Route","Activation Date","Latitude","Longitude","Count Date","8 Peak Hr Vehicle Volume","8 Peak Hr Pedestrian Volume"]]
dfs_trafficVolume.head()

Unnamed: 0,Main,Side 1 Route,Activation Date,Latitude,Longitude,Count Date,8 Peak Hr Vehicle Volume,8 Peak Hr Pedestrian Volume
0,JARVIS ST,FRONT ST E,11/15/1948,43.649418,-79.371446,2017-06-21,15662,13535
1,KING ST E,JARVIS ST,08/23/1950,43.650461,-79.371924,2016-09-17,12960,7333
2,JARVIS ST,ADELAIDE ST E,09/12/1958,43.651534,-79.37236,2016-11-08,17770,7083
3,JARVIS ST,RICHMOND ST E,04/21/1962,43.652718,-79.372824,2015-12-08,19678,4369
4,JARVIS ST,QUEEN ST E,08/24/1928,43.653704,-79.373238,2016-09-17,14487,3368


In [43]:
# Motor Vehicle Collision
df_motorVehicleCollision = pd.read_csv('Motor Vehicle Collisions with KSI Data - 4326.csv')
dfs_motorVehicleCollision = df_motorVehicleCollision[["WARDNUM","NEIGHBOURHOOD", "DISTRICT","YEAR","DATE","TIME","HOUR","STREET1","STREET2","ROAD_CLASS", "LOCCOORD","ACCLOC","TRAFFCTL","VISIBILITY","LIGHT","RDSFCOND","ACCLASS","IMPACTYPE","INVTYPE","INVAGE","INJURY","VEHTYPE","PEDESTRIAN","CYCLIST","AUTOMOBILE","MOTORCYCLE","TRUCK","REDLIGHT","ALCOHOL"]]

# rename columns to lowercase
dfs_motorVehicleCollision.rename(str.lower, axis='columns', inplace=True)

# convert date to datetime formate
dfs_motorVehicleCollision['date'] = pd.to_datetime(dfs_motorVehicleCollision['date'], format='%Y-%m-%d')

# filter date starting at 2018
dfs_motorVehicleCollision = dfs_motorVehicleCollision.loc[(dfs_motorVehicleCollision['date'] >= '2018-01-01')]

# filter injury if fatal
dfs_motorVehicleCollision = dfs_motorVehicleCollision[dfs_motorVehicleCollision['injury'] == 'Fatal']

#df = pd.DataFrame(dfs_motorVehicleCollision, columns = ['wardnum'])
#my_series = df.squeeze()
#my_series

#print(type(my_series))

#x = my_series.str.split(pat=',', n=-1, expand=False)
#print(type(x))

#dfs_motorVehicleCollision.set_index("WARDNUM", inplace = True)

#print(dfs_motorVehicleCollision.dtypes)

# rename ward_1 to ward
dfs_motorVehicleCollision.rename(columns={"wardnum": "WardNumber"}, inplace=True)

dfs_motorVehicleCollision

  exec(code_obj, self.user_global_ns, self.user_ns)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_motorVehicleCollision['date'] = pd.to_datetime(dfs_motorVehicleCollision['date'], format='%Y-%m-%d')


Unnamed: 0,WardNumber,neighbourhood,district,year,date,time,hour,street1,street2,road_class,...,invage,injury,vehtype,pedestrian,cyclist,automobile,motorcycle,truck,redlight,alcohol
7664,3,New Toronto (18),Etobicoke York,2018,2018-05-15,2237,22,LAKE SHORE BLVD W,THIRTEENTH ST,Major Arterial,...,25 to 29,Fatal,,,,Yes,,,,Yes
14028,1516,Banbury-Don Mills (42),North York,2018,2018-08-16,1555,15,LESLIE ST,EGLINTON AVE E,Major Arterial,...,85 to 89,Fatal,Passenger Van,,,Yes,,Yes,,
14104,3,Long Branch (19),Etobicoke York,2018,2018-09-05,1405,14,TWENTY SIXTH ST,LABURNHAM AVE,Local,...,50 to 54,Fatal,,Yes,,,,Yes,,
14254,11,Kensington-Chinatown (78),Toronto and East York,2018,2018-03-15,226,2,COLLEGE ST,HURON ST,Major Arterial,...,20 to 24,Fatal,,Yes,,Yes,,,,Yes
14261,1,West Humber-Clairville (1),Etobicoke York,2018,2018-05-04,2354,23,DIXON RD,27 S DIXON W RAMP,Major Arterial,...,60 to 64,Fatal,,Yes,,Yes,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16780,6,Clanton Park (33),North York,2020,2020-07-26,2119,21,881 SHEPPARD AVE W,,Major Arterial,...,25 to 29,Fatal,,Yes,,Yes,,,,
16781,3,Mimico (17),Etobicoke York,2020,2020-07-28,700,7,STANLEY AVE,STATION RD,Local,...,80 to 84,Fatal,,Yes,,Yes,,,,
16807,2,Islington-City Centre West (14),Etobicoke York,2020,2020-08-11,1109,11,THE EAST MALL,MONTEBELLO GDNS,Minor Arterial,...,0 to 4,Fatal,,Yes,,Yes,,,,
16817,,Waterfront Communities-The Island (77),Toronto and East York,2020,2020-08-14,445,4,LAKE SHORE BLVD E,CHERRY ST,Major Arterial,...,60 to 64,Fatal,,Yes,,,,,,


In [59]:
# Automated Speed Enforcement
df_automatedSpeed = pd.read_csv('Automated Speed Enforcement Locations - 4326.csv')
dfs_automatedSpeed = df_automatedSpeed[["ward", "location"]]

new = dfs_automatedSpeed["ward"].str.split(" - ", n = 1, expand = True)

dfs_automatedSpeed.insert(0, 'Ward_Num', new[0])
dfs_automatedSpeed.insert(1, 'Ward_Name', new[1])
dfs_automatedSpeed.insert(2, 'Ward_Name_Num', new[1] + "(" + new[0] + ")")

dfs_automatedSpeed = dfs_automatedSpeed.apply(lambda x: x.replace({'North of':';', 'South of':';', 'East of':';', 'West of':';', 'Near':';'}, regex=True))

newLocation = df2["location"].str.split(";", n = 1, expand = True)

dfs_automatedSpeed.insert(loc=3, column='Enforce_Street1', value=newLocation[0])
dfs_automatedSpeed.insert(loc=4, column='Enforce_Street2', value=newLocation[1])

dfs_automatedSpeed.drop(columns=['location', 'ward'], axis=1, inplace=True)

dfs_automatedSpeed.set_index("Ward_Num", inplace = True)

dfs_automatedSpeed

Unnamed: 0_level_0,Ward_Name,Ward_Name_Num,Enforce_Street1,Enforce_Street2
Ward_Num,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,Etobicoke North,Etobicoke North(1),Martin Grove Rd.,Rampart Rd
1,Etobicoke North,Etobicoke North(1),Redgrave Dr.,Martin Grove Rd.
2,Etobicoke Centre,Etobicoke Centre(2),Royal York Rd.,La Rose Ave.
2,Etobicoke Centre,Etobicoke Centre(2),Mill Rd.,Burnhamthorpe Rd.
3,Etobicoke-Lakeshore,Etobicoke-Lakeshore(3),The East Mall,Faludon Court
3,Etobicoke-Lakeshore,Etobicoke-Lakeshore(3),Ourland Ave.,Merriday St
4,Parkdale-High Park,Parkdale-High Park(4),Parkside Dr.,Algonquin Ave
4,Parkdale-High Park,Parkdale-High Park(4),Runnymede Rd.,Colbeck St.
5,York South-Weston,York South-Weston(5),Scarlett Rd.,Ellins Ave.
5,York South-Weston,York South-Weston(5),Gary Dr.,Deerhurst Ave.


In [60]:
# City wards
df_cityWard = pd.read_csv('City Wards Data - 4326.csv')
dfs_cityWard = df_cityWard[["AREA_SHORT_CODE", "AREA_NAME"]]

dfs_cityWard['Ward_Num'] = dfs_cityWard['AREA_SHORT_CODE'].astype(int)
dfs_cityWard['WardNumber'] = dfs_cityWard['AREA_SHORT_CODE'].astype(int)
dfs_cityWard['Ward_Name'] = dfs_cityWard['AREA_NAME'].astype(str)

dfs_cityWard.set_index("Ward_Num", inplace = True)

dfs_cityWard.drop(columns=['AREA_SHORT_CODE'], axis=0, inplace=True)
dfs_cityWard.drop(columns=['AREA_NAME'], axis=0, inplace=True)

dfs_cityWard

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_cityWard['Ward_Num'] = dfs_cityWard['AREA_SHORT_CODE'].astype(int)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_cityWard['WardNumber'] = dfs_cityWard['AREA_SHORT_CODE'].astype(int)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_cityWard['Ward_Name'] = dfs_cityWard['AREA_NAME'].astype(

Unnamed: 0_level_0,WardNumber,Ward_Name
Ward_Num,Unnamed: 1_level_1,Unnamed: 2_level_1
7,7,Humber River-Black Creek
6,6,York Centre
18,18,Willowdale
11,11,University-Rosedale
19,19,Beaches-East York
20,20,Scarborough Southwest
25,25,Scarborough-Rouge Park
23,23,Scarborough North
24,24,Scarborough-Guildwood
21,21,Scarborough Centre


In [62]:
# Build one data frame from all the data. 

# City ward to Speed Enforcement => AREA_SHORT_CODE (index) = WardNum (index)
data_join = pd.merge(dfs_cityWard, dfs_automatedSpeed,left_index=False, right_index=False)
# rename columns to lowercase
#data_join.rename(str.lower, axis='columns', inplace=True)
#data_join

# Joined Data to dfs_motorVehicleCollision => WardNumber (non-index) = WARDNUM (non-index)
#test = pd.merge(data_join, dfs_motorVehicleCollision, left_on='WardNumber', right_on='ward')
#result = pd.concat([data_join, dfs_motorVehicleCollision], axis=1, join="inner")
#result

#df1.merge(df2, left_on='lkey', right_on='rkey')
#data_join.merge(dfs_motorVehicleCollision, left_on='WardNumber', right_on='ward_num')
#data_join

df_join = pd.concat([data_join, dfs_motorVehicleCollision], ignore_index=True, sort=False)
df_join

#test = pd.merge(data_join, dfs_motorVehicleCollision, on='WardNumber', how='outer')
#test

#test = pd.concat([data_join.set_index('WardNumber'),dfs_motorVehicleCollision.set_index('WardNumber')], axis=1, join='inner').reset_index()
#test

#df3 = data_join.merge(dfs_motorVehicleCollision, on='ward_num', how='inner')

Unnamed: 0,WardNumber,Ward_Name,Ward_Name_Num,Enforce_Street1,Enforce_Street2,neighbourhood,district,year,date,time,...,invage,injury,vehtype,pedestrian,cyclist,automobile,motorcycle,truck,redlight,alcohol
0,7,Humber River-Black Creek,Humber River-Black Creek(7),Weston Rd.,Coronado Court,,,,NaT,,...,,,,,,,,,,
1,7,Humber River-Black Creek,Humber River-Black Creek(7),Spenvalley Dr.,Whitbread Cres.,,,,NaT,,...,,,,,,,,,,
2,6,York Centre,York Centre(6),Dufferin St.,Stanstead Dr.,,,,NaT,,...,,,,,,,,,,
3,6,York Centre,York Centre(6),Stilecroft Dr.,Sharpecroft Blvd.,,,,NaT,,...,,,,,,,,,,
4,11,University-Rosedale,University-Rosedale(11),Spadina Ave.,Bloor St. W.,,,,NaT,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
207,6,,,,,Clanton Park (33),North York,2020.0,2020-07-26,2119.0,...,25 to 29,Fatal,,Yes,,Yes,,,,
208,3,,,,,Mimico (17),Etobicoke York,2020.0,2020-07-28,700.0,...,80 to 84,Fatal,,Yes,,Yes,,,,
209,2,,,,,Islington-City Centre West (14),Etobicoke York,2020.0,2020-08-11,1109.0,...,0 to 4,Fatal,,Yes,,Yes,,,,
210,,,,,,Waterfront Communities-The Island (77),Toronto and East York,2020.0,2020-08-14,445.0,...,60 to 64,Fatal,,Yes,,,,,,


In [57]:
# merge motor vehicle collision
dfs_motorVehicleCollision

Unnamed: 0,WardNumber,neighbourhood,district,year,date,time,hour,street1,street2,road_class,...,invage,injury,vehtype,pedestrian,cyclist,automobile,motorcycle,truck,redlight,alcohol
7664,3,New Toronto (18),Etobicoke York,2018,2018-05-15,2237,22,LAKE SHORE BLVD W,THIRTEENTH ST,Major Arterial,...,25 to 29,Fatal,,,,Yes,,,,Yes
14028,1516,Banbury-Don Mills (42),North York,2018,2018-08-16,1555,15,LESLIE ST,EGLINTON AVE E,Major Arterial,...,85 to 89,Fatal,Passenger Van,,,Yes,,Yes,,
14104,3,Long Branch (19),Etobicoke York,2018,2018-09-05,1405,14,TWENTY SIXTH ST,LABURNHAM AVE,Local,...,50 to 54,Fatal,,Yes,,,,Yes,,
14254,11,Kensington-Chinatown (78),Toronto and East York,2018,2018-03-15,226,2,COLLEGE ST,HURON ST,Major Arterial,...,20 to 24,Fatal,,Yes,,Yes,,,,Yes
14261,1,West Humber-Clairville (1),Etobicoke York,2018,2018-05-04,2354,23,DIXON RD,27 S DIXON W RAMP,Major Arterial,...,60 to 64,Fatal,,Yes,,Yes,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16780,6,Clanton Park (33),North York,2020,2020-07-26,2119,21,881 SHEPPARD AVE W,,Major Arterial,...,25 to 29,Fatal,,Yes,,Yes,,,,
16781,3,Mimico (17),Etobicoke York,2020,2020-07-28,700,7,STANLEY AVE,STATION RD,Local,...,80 to 84,Fatal,,Yes,,Yes,,,,
16807,2,Islington-City Centre West (14),Etobicoke York,2020,2020-08-11,1109,11,THE EAST MALL,MONTEBELLO GDNS,Minor Arterial,...,0 to 4,Fatal,,Yes,,Yes,,,,
16817,,Waterfront Communities-The Island (77),Toronto and East York,2020,2020-08-14,445,4,LAKE SHORE BLVD E,CHERRY ST,Major Arterial,...,60 to 64,Fatal,,Yes,,,,,,
