# Crash data

Run this notebook to get the data on crashes involving at least 1 bike or 1 e-scooter, including geolocation.

In [None]:
import pandas as pd
import numpy as np
import geopandas as gpd
import osmnx as ox
from shapely.geometry import Point
import matplotlib.pyplot as plt
import shapely
import time
import random
from IPython.core.display import clear_output
import geopy

In [None]:
# load paths

%run -i path.py

placeid = 'turin'

In [None]:
# gdf of the edges of Turin street network
gdf_edges = gpd.read_file(PATH['data']+'graph_shapefile/Turin/edges.shp')

## Crashes in Turin (2020)

Download the csv file with all 2020 crashes in Turin available here:

http://aperto.comune.torino.it/dataset/elenco-incidenti-2020/resource/2975b2cd-cd02-4d24-9a3d-e44d0e68f891

In [None]:
accidents2020 = pd.read_csv(PATH['data'] + 'Elenco_Incidenti_2020.csv', sep = ",",header=2)

In [None]:
# get only crashes involving at least 1 bike ('velocipede') 
bike_crashes = accidents2020[accidents2020["Velocipede"] >= 1]

# get only crashes involving at least 1 e-scooter ('Micromobilita' elettrica') 
escoo_crashes = accidents2020[accidents2020["Micromobilita' elettrica"] >= 1]

# concat bike and e-scooter crashes
softmobility2020 = pd.concat([bike_crashes,escoo_crashes])

# sort the crashes by date
softmobility2020.sort_values('N. Ord',inplace=True)
softmobility2020 = softmobility2020.reset_index(drop=True)

In [None]:
# convert lon and lat from str to float and replace ',' with '.'

commas = []
for i in range(len(softmobility2020)):
    if type(softmobility2020.iloc[i]['Latitudine']) == type(softmobility2020.iloc[0]['Latitudine']):
        if softmobility2020.iloc[i]['Latitudine'].find(',') != -1:
            commas.append(i)

for c in commas:
    softmobility2020.at[c,'Latitudine'] = float(softmobility2020.iloc[c]['Latitudine'].replace(',','.'))
    softmobility2020.at[c,'Longitudine'] = float(softmobility2020.iloc[c]['Longitudine'].replace(',','.'))   

In [None]:
# identify the index of missing locations

missing_loc = []
for i in range(len(softmobility2020)):
    if i not in commas:
        missing_loc.append(i)

In [None]:
# get the missing locations manually, using the annotations of column "Denominazione strada o centro abitato"

softmobility2020.at[ 15   ,'Latitudine']=45.0607070007085
softmobility2020.at[ 15   ,'Longitudine']=7.684978111262758
softmobility2020.at[ 78   ,'Latitudine']=45.08275811314921
softmobility2020.at[ 78   ,'Longitudine']=7.659489755440858
softmobility2020.at[ 89   ,'Latitudine']=45.06567607842894
softmobility2020.at[ 89   ,'Longitudine']=7.693580440098414
softmobility2020.at[ 297   ,'Latitudine']=45.10521775158704
softmobility2020.at[ 297   ,'Longitudine']=7.683001997770805
softmobility2020.at[ 299   ,'Latitudine']=45.116153386878366
softmobility2020.at[ 299   ,'Longitudine']=7.657704268935772
softmobility2020.at[ 301   ,'Latitudine']=45.07685864720485
softmobility2020.at[ 301   ,'Longitudine']=7.683674927897282
softmobility2020.at[ 310   ,'Latitudine']=45.09581172153073
softmobility2020.at[ 310   ,'Longitudine']=7.672257397770439
softmobility2020.at[ 322   ,'Latitudine']=45.07612878448956
softmobility2020.at[ 322   ,'Longitudine']=7.655983440098793
softmobility2020.at[ 324   ,'Latitudine']=45.08943975201474
softmobility2020.at[ 324   ,'Longitudine']=7.677871282428348
softmobility2020.at[ 340   ,'Latitudine']=45.06794811139389
softmobility2020.at[ 340   ,'Longitudine']=7.670548526604855
softmobility2020.at[ 348   ,'Latitudine']=45.08104830416534
softmobility2020.at[ 348   ,'Longitudine']=7.671097652057089
softmobility2020.at[ 358   ,'Latitudine']=45.06703948118164
softmobility2020.at[ 358   ,'Longitudine']=7.650882453592099
softmobility2020.at[ 367   ,'Latitudine']=45.08701943229711
softmobility2020.at[ 367   ,'Longitudine']=7.666188875513294

In [None]:
# rename Latitudine->latitude and Longitudine->longitude
softmobility2020 = softmobility2020.rename({'Latitudine':'latitude', 'Longitudine':'longitude'}, axis=1)

In [None]:
# export the manipulated dataset
softmobility2020.to_csv(PATH["data"] + placeid + "/" +'accidents_softmobility2020.csv')

In [None]:
# add geometry to the crashes dataframe

geometry = [Point(xy) for xy in zip(softmobility2020.longitude, softmobility2020.latitude)]
gdf_crashes_softmobility20 = gpd.GeoDataFrame(softmobility2020, crs="EPSG:4326", geometry=geometry)

In [None]:
# plot the crashes on Turin map

ax = gdf_edges.plot(figsize=(15,8),alpha=.1, color='navy')

gdf_crashes_softmobility20.plot(ax=ax,figsize=(24,16),color='darkgreen',alpha=.7)

## Crashes in Turin (2019)

Download the csv file with all 2019 crashes in Turin available here:

http://aperto.comune.torino.it/dataset/elenco-incidenti-nell-anno-2019-nella-citta-di-torino

In this dataset there is no "Micromobilita' elettrica" column, so there is no distinction between crashes involving bike and e-scooters. 2019 was the first year of usage of a new system to store the data, so a little more data processing is needed to set up our crash dataset.

## Adjust the dataset

In [None]:
accidents2019 = pd.read_csv(PATH['data'] + 'Elenco_Incidenti_2019.csv', sep = ";",header=2)

In [None]:
# get only crashes involving at least 1 bike ('velocipede') 
bike_crashes = accidents2019[accidents2019['Velocipede'] >= 1]

In [None]:
bike_crashes = bike_crashes.reset_index(drop=True)
bike_crashes

In [None]:
# convert lon and lat from str to float and replace ',' with '.'

commas = []
for i in range(len(bike_crashes)):
    if type(bike_crashes.iloc[i]['Latitudine']) == type(bike_crashes.iloc[0]['Latitudine']):
        if bike_crashes.iloc[i]['Latitudine'].find(',') != -1:
            commas.append(i)

for c in commas:
    bike_crashes.at[c,'Latitudine'] = float(bike_crashes.iloc[c]['Latitudine'].replace(',','.'))
    bike_crashes.at[c,'Longitudine'] = float(bike_crashes.iloc[c]['Longitudine'].replace(',','.'))   

In [None]:
# get the coordinates from the address
# use geopy to get the coordinates of address with the sintax "via xxxxxx number"

to_check = []
indices_ok = []
double_address = []
lat = []
lon = []

for i in range(len(bike_crashes)):
    
    print(i+1,'/',len(bike_crashes))
    if i not in commas:
    
        # add the indices of NaN values to a list 
        if bike_crashes['Denominazione strada o centro abitato'].iloc[i] != bike_crashes['Denominazione strada o centro abitato'].iloc[i]:
            to_check.append(i)
    
        else:
            if bike_crashes['Denominazione strada o centro abitato'].iloc[i].find(',') == -1:
                locator = geopy.geocoders.Nominatim(user_agent='myGeocoder')
                location = locator.geocode(bike_crashes['Denominazione strada o centro abitato'].iloc[i]+', Turin, Italy')
                if location != None:
                    lat.append(location.latitude)
                    lon.append(location.longitude)
                    indices_ok.append(i)
        
                # if geopy does not understand the address, it will return a "NoneType" location
                # for these values add the indices to a list and then check using other methods
                else:
                    double_address.append(i)
            else:
                double_address.append(i)
            
    clear_output(wait=True)

# fix the values of lat lon found with the address
for j,i in enumerate(indices_ok):
    bike_crashes.at[i,'Latitudine']=lat[j]
    bike_crashes.at[i,'Longitudine']=lon[j]

In [None]:
# these crashes are geolocated manually

bike_crashes.at[ 1 ,'Latitudine']=45.08397867996193
bike_crashes.at[ 1 ,'Longitudine']=7.6331809840222835
bike_crashes.at[ 2 ,'Latitudine']=45.062731480180126
bike_crashes.at[ 2 ,'Longitudine']=7.675381853324719
bike_crashes.at[ 3 ,'Latitudine']=45.0953810903701
bike_crashes.at[ 3 ,'Longitudine']=7.7016295686854805
bike_crashes.at[ 4 ,'Latitudine']=45.08666506427556
bike_crashes.at[ 4 ,'Longitudine']=7.6664945840237095
bike_crashes.at[ 5 ,'Latitudine']=45.0614095129451
bike_crashes.at[ 5 ,'Longitudine']=7.679731630039038
bike_crashes.at[ 6 ,'Latitudine']=45.10343434740111
bike_crashes.at[ 6 ,'Longitudine']=7.665395353347183
bike_crashes.at[ 7 ,'Latitudine']=45.07458629677788
bike_crashes.at[ 7 ,'Longitudine']=7.675906568674329
bike_crashes.at[ 11 ,'Latitudine']=45.082292367083184
bike_crashes.at[ 11 ,'Longitudine']=7.651418789895025
bike_crashes.at[ 12 ,'Latitudine']=45.108700735691194
bike_crashes.at[ 12 ,'Longitudine']=7.663120707321082
bike_crashes.at[ 15 ,'Latitudine']=45.03315612193116
bike_crashes.at[ 15 ,'Longitudine']=7.608621353308836
bike_crashes.at[ 16 ,'Latitudine']=45.0665234970732
bike_crashes.at[ 16 ,'Longitudine']=7.662149184012897
bike_crashes.at[ 17 ,'Latitudine']=45.089818940854144
bike_crashes.at[ 17 ,'Longitudine']=7.683439507310729
bike_crashes.at[ 23 ,'Latitudine']=45.03864839350646
bike_crashes.at[ 23 ,'Longitudine']=7.628703737968763
bike_crashes.at[ 24 ,'Latitudine']=45.06251120518977
bike_crashes.at[ 24 ,'Longitudine']=7.679944937981754
bike_crashes.at[ 25 ,'Latitudine']=45.07842477393402
bike_crashes.at[ 25 ,'Longitudine']=7.62333126867647
bike_crashes.at[ 27 ,'Latitudine']=45.065656676264496
bike_crashes.at[ 27 ,'Longitudine']=7.682180784012162
bike_crashes.at[ 28 ,'Latitudine']=45.0780465374868
bike_crashes.at[ 28 ,'Longitudine']=7.684441637990146
bike_crashes.at[ 30 ,'Latitudine']=45.077126730030734
bike_crashes.at[ 30 ,'Longitudine']=7.698149253332636
bike_crashes.at[ 31 ,'Latitudine']=45.06141312496538
bike_crashes.at[ 31 ,'Longitudine']=7.685231137981133
bike_crashes.at[ 48 ,'Latitudine']=45.0747549527113
bike_crashes.at[ 48 ,'Longitudine']=7.6853669686742565
bike_crashes.at[ 49 ,'Latitudine']=45.07792720152072
bike_crashes.at[ 49 ,'Longitudine']=7.646738414704918
bike_crashes.at[ 52 ,'Latitudine']=45.08222971421292
bike_crashes.at[ 52 ,'Longitudine']=7.6866550840211625
bike_crashes.at[ 53 ,'Latitudine']=45.09219891738017
bike_crashes.at[ 53 ,'Longitudine']=7.654388668683753
bike_crashes.at[ 54 ,'Latitudine']=45.05932559631996
bike_crashes.at[ 54 ,'Longitudine']=7.65572575332287
bike_crashes.at[ 55 ,'Latitudine']=45.08674286411487
bike_crashes.at[ 55 ,'Longitudine']=7.681648799366786
bike_crashes.at[ 56 ,'Latitudine']=45.076782958828
bike_crashes.at[ 56 ,'Longitudine']=7.656850607303557
bike_crashes.at[ 57 ,'Latitudine']=45.07845425178405
bike_crashes.at[ 57 ,'Longitudine']=7.6609859379903025
bike_crashes.at[ 107 ,'Latitudine']=45.06476705437338
bike_crashes.at[ 107 ,'Longitudine']=7.680401166734652
bike_crashes.at[ 277 ,'Latitudine']=45.08774215573367
bike_crashes.at[ 277 ,'Longitudine']=7.691090275540141
bike_crashes.at[ 281 ,'Latitudine']=45.08609848010586
bike_crashes.at[ 281 ,'Longitudine']=7.655025413625322

In [None]:
# rename Latitudine->latitude and Longitudine->longitude
bike_crashes = bike_crashes.rename({'Latitudine':'latitude', 'Longitudine':'longitude'}, axis=1)

In [None]:
# export the manipulated dataset
bike_crashes.to_csv(PATH["data"] + placeid + "/" +'accidents_softmobility2019.csv')

In [None]:
# add geometry to the crashes dataframe

geometry = [Point(xy) for xy in zip(bike_crashes.longitude, bike_crashes.latitude)]
gdf_crashes_softmobility = gpd.GeoDataFrame(bike_crashes, crs="EPSG:4326", geometry=geometry)

In [None]:
# plot the crashes on Turin map

ax = gdf_edges.plot(figsize=(15,8),alpha=.1, color='navy')

gdf_crashes_softmobility.plot(ax=ax,figsize=(24,16),color='darkgreen',alpha=.7)