In [6]:
import pandas as pd
import numpy as np

import folium
from folium.plugins import HeatMap


In [2]:
df = pd.read_csv('Collisions.csv', low_memory=False, parse_dates=["INCDATE"])
df["MONTH"] = df["INCDATE"].dt.month
df["YEAR"] = df["INCDATE"].dt.year

print(df.shape)
df.head()

(222581, 42)


Unnamed: 0,X,Y,OBJECTID,INCKEY,COLDETKEY,REPORTNO,STATUS,ADDRTYPE,INTKEY,LOCATION,...,PEDROWNOTGRNT,SDOTCOLNUM,SPEEDING,ST_COLCODE,ST_COLDESC,SEGLANEKEY,CROSSWALKKEY,HITPARKEDCAR,MONTH,YEAR
0,-122.315658,47.675815,1,330129,331629,EA22982,Matched,Intersection,24501.0,12TH AVE NE AND NE 65TH ST,...,Y,,,2.0,Vehicle turning left hits pedestrian,0,0,N,3,2020
1,-122.31678,47.608643,2,1288,1288,3476163,Matched,Block,,12TH AVE BETWEEN E CHERRY ST AND E COLUMBIA ST,...,,,,12.0,From same direction - both going straight - on...,0,0,N,3,2013
2,-122.344569,47.694547,3,1142,1142,3507856,Matched,Block,,AURORA AVE N BETWEEN N 90TH ST AND N 91ST ST,...,,,,13.0,From same direction - both going straight - bo...,0,0,N,3,2013
3,-122.365999,47.691729,4,330015,331515,C823869,Unmatched,Block,,8TH AVE NW BETWEEN NW 86TH ST AND NW 87TH ST,...,,,,,,0,0,Y,8,2019
4,,,5,19800,19800,1060128,Matched,Block,,ALASKAN WY VI NB BETWEEN S ROYAL BROUGHAM WAY ...,...,,4358043.0,,50.0,Fixed object,0,0,N,12,2004


#### Severity code
0 - unknown; 1 - only property damage; 2 - with injury; 2b - Serious injury; 3 - with fatality

In [14]:
len(np.where((df['SEVERITYCODE']=='0'))[0])

21781

## Visualization
For each grid, show accident frequency (given a certain constrain), providing possibility to inspect interaction of factors led to severe collision.

In [63]:
def visualize_collision_freq(df_filtered, zoom_start=13):
    '''Utility function to overlay accident density.
    
    '''
    seattle_map = folium.Map(location=[df["Y"].mean(), df["X"].mean()],  
                             zoom_start=zoom_start, control_scale=True, min_zoom=10)
    HeatMap(data=df_filtered[["X","Y"]].groupby(['Y', 'X']).size().reset_index().values.tolist(), 
            radius=8, max_zoom=13).add_to(seattle_map)

    return seattle_map

In [61]:
# fatal accident
df_filtered = df[(df["SEVERITYCODE"]=='3')]
seattle_map = visualize_collision_freq(df_filtered)
seattle_map

#### Bike related accident

In [65]:
df_filtered = df[(df["SEVERITYCODE"]=='2b') & (df['PEDCYLCOUNT']>0)]
df_filtered.shape[0]/df[(df["SEVERITYCODE"]=='2b')].shape[0]

0.13797508783136378

In [67]:
seattle_map = visualize_collision_freq(df_filtered, zoom_start=14)
seattle_map

#### Pedestrian related accident

In [69]:
df_filtered = df[(df["SEVERITYCODE"]=='2b') & (df['PEDCOUNT']>0)]
print('Total of ' + str(df[(df["SEVERITYCODE"]=='2b')].shape[0]) + ' serious injury, ' + 
      str(np.round(df_filtered.shape[0]/df[(df["SEVERITYCODE"]=='2b')].shape[0]*100,2)) +
       '% involving pedestrian.')
seattle_map = visualize_collision_freq(df_filtered, zoom_start=14)
seattle_map

Total of 3131 serious injury, 29.42% involving pedestrian.
