In [1]:
import numpy as np
import pandas as pd
import geopandas as gpd
import keplergl
#from keplergl import KeplerGL
import matplotlib as plt
import plotly.express as px
import h3

In [70]:
traffic_volumes = pd.read_csv('traffic volumes_2020_2029.csv')

In [71]:
#converting column headers to lowercase
traffic_volumes.columns = [s.strip().lower() for s in traffic_volumes.columns]

In [72]:
#dropping unneccesary columns
traffic_volumes = traffic_volumes.drop(columns=['_id','count_id','location_id','centreline_type','px'])

In [73]:
#dropping more unneccessary columns, as we have aggregate values of traffic counts
traffic_volumes.drop(traffic_volumes.columns[7:55],axis=1,inplace=True)

In [74]:
#check remaining columns
traffic_volumes.columns

Index(['count_date', 'location', 'lng', 'lat', 'centreline_id', 'time_start',
       'time_end', 'total_count', 'pedestrians_count', 'cyclists_count',
       'vehicle_count', 'trucks_count'],
      dtype='object')

In [75]:
traffic_volumes.head()

Unnamed: 0,count_date,location,lng,lat,centreline_id,time_start,time_end,total_count,pedestrians_count,cyclists_count,vehicle_count,trucks_count
0,08/01/2020,BROADVIEW AVE AT ERINDALE AVE,-79.358652,43.677521,13462138,2020-01-08T07:30:00,2020-01-08T07:45:00,274,63,4,207,1
1,08/01/2020,BROADVIEW AVE AT ERINDALE AVE,-79.358652,43.677521,13462138,2020-01-08T07:45:00,2020-01-08T08:00:00,244,54,3,187,0
2,08/01/2020,BROADVIEW AVE AT ERINDALE AVE,-79.358652,43.677521,13462138,2020-01-08T08:00:00,2020-01-08T08:15:00,309,89,2,218,3
3,08/01/2020,BROADVIEW AVE AT ERINDALE AVE,-79.358652,43.677521,13462138,2020-01-08T08:15:00,2020-01-08T08:30:00,371,108,3,260,1
4,08/01/2020,BROADVIEW AVE AT ERINDALE AVE,-79.358652,43.677521,13462138,2020-01-08T08:30:00,2020-01-08T08:45:00,339,104,3,232,3


In [77]:
#we want to convert the latitude and longitude values to h3 hex cells

#first, we need to combine the latitude and longitude values into a coordinates column
traffic_volumes['coordinates'] = traffic_volumes['lat'].astype('str') + ',' + traffic_volumes['lng'].astype('str')

#then we define the function to get h3 hex cells from coordinates data
def get_hex_cell(coordinates):
    a = float(coordinates.split(',')[0])
    b = float(coordinates.split(',')[1])
    return h3.latlng_to_cell(a,b,res=10)

#finally, create a new column for hex cells by applying the function on coordinates column
traffic_volumes['hex_cell'] = traffic_volumes['coordinates'].apply(get_hex_cell)

In [78]:
traffic_volumes.head()

Unnamed: 0,count_date,location,lng,lat,centreline_id,time_start,time_end,total_count,pedestrians_count,cyclists_count,vehicle_count,trucks_count,coordinates,hex_cell
0,08/01/2020,BROADVIEW AVE AT ERINDALE AVE,-79.358652,43.677521,13462138,2020-01-08T07:30:00,2020-01-08T07:45:00,274,63,4,207,1,"43.67752083,-79.35865235",8a2b9bc7528ffff
1,08/01/2020,BROADVIEW AVE AT ERINDALE AVE,-79.358652,43.677521,13462138,2020-01-08T07:45:00,2020-01-08T08:00:00,244,54,3,187,0,"43.67752083,-79.35865235",8a2b9bc7528ffff
2,08/01/2020,BROADVIEW AVE AT ERINDALE AVE,-79.358652,43.677521,13462138,2020-01-08T08:00:00,2020-01-08T08:15:00,309,89,2,218,3,"43.67752083,-79.35865235",8a2b9bc7528ffff
3,08/01/2020,BROADVIEW AVE AT ERINDALE AVE,-79.358652,43.677521,13462138,2020-01-08T08:15:00,2020-01-08T08:30:00,371,108,3,260,1,"43.67752083,-79.35865235",8a2b9bc7528ffff
4,08/01/2020,BROADVIEW AVE AT ERINDALE AVE,-79.358652,43.677521,13462138,2020-01-08T08:30:00,2020-01-08T08:45:00,339,104,3,232,3,"43.67752083,-79.35865235",8a2b9bc7528ffff


In [79]:
traffic_volumes_pivot = pd.pivot_table(traffic_volumes,values=['total_count','pedestrians_count','cyclists_count','vehicle_count','trucks_count'],index='hex_cell',aggfunc=sum)

In [80]:
#adding columns for calcualting percentage of traffic by each mode
traffic_volumes_pivot['pct_vehicles'] = traffic_volumes_pivot['vehicle_count']/traffic_volumes_pivot['total_count']*100
traffic_volumes_pivot['pct_pedestrians'] = traffic_volumes_pivot['pedestrians_count']/traffic_volumes_pivot['total_count']*100
traffic_volumes_pivot['pct_cyclists'] = traffic_volumes_pivot['cyclists_count']/traffic_volumes_pivot['total_count']*100
traffic_volumes_pivot['pct_trucks'] = traffic_volumes_pivot['trucks_count']/traffic_volumes_pivot['total_count']*100

In [81]:
traffic_volumes_pivot

Unnamed: 0_level_0,cyclists_count,pedestrians_count,total_count,trucks_count,vehicle_count,pct_vehicles,pct_pedestrians,pct_cyclists,pct_trucks
hex_cell,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
8a2b986c8477fff,7,2,21055,246,21046,99.957255,0.009499,0.033246,1.168369
8a2b986ca71ffff,0,0,8783,149,8783,100.000000,0.000000,0.000000,1.696459
8a2b986d9297fff,7,3,24301,655,24291,99.958849,0.012345,0.028805,2.695362
8a2b986d96effff,5,1,25822,648,25816,99.976764,0.003873,0.019363,2.509488
8a2b986db25ffff,26,2021,61237,1412,59190,96.657250,3.300292,0.042458,2.305796
...,...,...,...,...,...,...,...,...,...
8a2b9bd7562ffff,26,115,3119,43,2978,95.479320,3.687079,0.833601,1.378647
8a2b9bd75807fff,51,279,26571,401,26241,98.758044,1.050017,0.191939,1.509164
8a2b9bd75967fff,30,568,26096,395,25498,97.708461,2.176579,0.114960,1.513642
8a2b9bd75977fff,52,663,25750,404,25035,97.223301,2.574757,0.201942,1.568932


In [31]:
traffic_volumes_pivot.to_csv('traffic_volumes_h3.csv')