# Crash data
After downloading the csv file with all 2019 crashes in Turin available here:

http://aperto.comune.torino.it/dataset/elenco-incidenti-nell-anno-2019-nella-citta-di-torino

Run this notebook to get the data on crashes involving at least 1 bike, including geolocation.

In [None]:
import pandas as pd
import numpy as np
import geopandas as gpd
import osmnx as ox
from shapely.geometry import Point
import matplotlib.pyplot as plt
import shapely
import time
import random
from IPython.core.display import clear_output
import geopy

In [None]:
# load paths

%run -i path.py

In [None]:
# gdf of the edges of Turin street network
gdf_edges = gpd.read_file(PATH['data']+'graph_shapefile/Turin/edges.shp')

## Adjust the dataset

In [None]:
accidents2019 = pd.read_csv(PATH['data'] + 'Elenco_Incidenti_2019.csv', sep = ";",header=2)

In [None]:
# keep only crashes involving at least 1 bike ('velocipede') 
bike_crashes = accidents2019[accidents2019['Velocipede'] >= 1]

In [None]:
bike_crashes = bike_crashes.reset_index(drop=True)
bike_crashes

In [None]:
commas = []
for i in range(len(bike_crashes)):
    if type(bike_crashes.iloc[i]['Latitudine']) == type(bike_crashes.iloc[0]['Latitudine']):
        if bike_crashes.iloc[i]['Latitudine'].find(',') != -1:
            commas.append(i)

for c in commas:
    bike_crashes.at[c,'Latitudine'] = float(bike_crashes.iloc[c]['Latitudine'].replace(',','.'))
    bike_crashes.at[c,'Longitudine'] = float(bike_crashes.iloc[c]['Longitudine'].replace(',','.'))   

In [None]:
# get the coordinates from the address
# use geopy to get the coordinates of address with the sintax "via xxxxxx number"

to_check = []
indices_ok = []
double_address = []
lat = []
lon = []

for i in range(len(bike_crashes)):
    
    print(i+1,'/',len(bike_crashes))
    if i not in commas:
    
        # add the indices of NaN values to a list 
        if bike_crashes['Denominazione strada o centro abitato'].iloc[i] != bike_crashes['Denominazione strada o centro abitato'].iloc[i]:
            to_check.append(i)
    
        else:
            if bike_crashes['Denominazione strada o centro abitato'].iloc[i].find(',') == -1:
                locator = geopy.geocoders.Nominatim(user_agent='myGeocoder')
                location = locator.geocode(bike_crashes['Denominazione strada o centro abitato'].iloc[i]+', Turin, Italy')
                if location != None:
                    lat.append(location.latitude)
                    lon.append(location.longitude)
                    indices_ok.append(i)
        
                # if geopy does not understand the address, it will return a "NoneType" location
                # for these values add the indices to a list and then check using other methods
                else:
                    double_address.append(i)
            else:
                double_address.append(i)
            
    clear_output(wait=True)

# fix the values of lat lon found with the address
for j,i in enumerate(indices_ok):
    bike_crashes.at[i,'Latitudine']=lat[j]
    bike_crashes.at[i,'Longitudine']=lon[j]

In [None]:
# these crashes are fixed manually

bike_crashes.at[ 1 ,'Latitudine']=45.08397867996193
bike_crashes.at[ 1 ,'Longitudine']=7.6331809840222835
bike_crashes.at[ 2 ,'Latitudine']=45.062731480180126
bike_crashes.at[ 2 ,'Longitudine']=7.675381853324719
bike_crashes.at[ 3 ,'Latitudine']=45.0953810903701
bike_crashes.at[ 3 ,'Longitudine']=7.7016295686854805
bike_crashes.at[ 4 ,'Latitudine']=45.08666506427556
bike_crashes.at[ 4 ,'Longitudine']=7.6664945840237095
bike_crashes.at[ 5 ,'Latitudine']=45.0614095129451
bike_crashes.at[ 5 ,'Longitudine']=7.679731630039038
bike_crashes.at[ 6 ,'Latitudine']=45.10343434740111
bike_crashes.at[ 6 ,'Longitudine']=7.665395353347183
bike_crashes.at[ 7 ,'Latitudine']=45.07458629677788
bike_crashes.at[ 7 ,'Longitudine']=7.675906568674329
bike_crashes.at[ 11 ,'Latitudine']=45.082292367083184
bike_crashes.at[ 11 ,'Longitudine']=7.651418789895025
bike_crashes.at[ 12 ,'Latitudine']=45.108700735691194
bike_crashes.at[ 12 ,'Longitudine']=7.663120707321082
bike_crashes.at[ 15 ,'Latitudine']=45.03315612193116
bike_crashes.at[ 15 ,'Longitudine']=7.608621353308836
bike_crashes.at[ 16 ,'Latitudine']=45.0665234970732
bike_crashes.at[ 16 ,'Longitudine']=7.662149184012897
bike_crashes.at[ 17 ,'Latitudine']=45.089818940854144
bike_crashes.at[ 17 ,'Longitudine']=7.683439507310729
bike_crashes.at[ 23 ,'Latitudine']=45.03864839350646
bike_crashes.at[ 23 ,'Longitudine']=7.628703737968763
bike_crashes.at[ 24 ,'Latitudine']=45.06251120518977
bike_crashes.at[ 24 ,'Longitudine']=7.679944937981754
bike_crashes.at[ 25 ,'Latitudine']=45.07842477393402
bike_crashes.at[ 25 ,'Longitudine']=7.62333126867647
bike_crashes.at[ 27 ,'Latitudine']=45.065656676264496
bike_crashes.at[ 27 ,'Longitudine']=7.682180784012162
bike_crashes.at[ 28 ,'Latitudine']=45.0780465374868
bike_crashes.at[ 28 ,'Longitudine']=7.684441637990146
bike_crashes.at[ 30 ,'Latitudine']=45.077126730030734
bike_crashes.at[ 30 ,'Longitudine']=7.698149253332636
bike_crashes.at[ 31 ,'Latitudine']=45.06141312496538
bike_crashes.at[ 31 ,'Longitudine']=7.685231137981133
bike_crashes.at[ 48 ,'Latitudine']=45.0747549527113
bike_crashes.at[ 48 ,'Longitudine']=7.6853669686742565
bike_crashes.at[ 49 ,'Latitudine']=45.07792720152072
bike_crashes.at[ 49 ,'Longitudine']=7.646738414704918
bike_crashes.at[ 52 ,'Latitudine']=45.08222971421292
bike_crashes.at[ 52 ,'Longitudine']=7.6866550840211625
bike_crashes.at[ 53 ,'Latitudine']=45.09219891738017
bike_crashes.at[ 53 ,'Longitudine']=7.654388668683753
bike_crashes.at[ 54 ,'Latitudine']=45.05932559631996
bike_crashes.at[ 54 ,'Longitudine']=7.65572575332287
bike_crashes.at[ 55 ,'Latitudine']=45.08674286411487
bike_crashes.at[ 55 ,'Longitudine']=7.681648799366786
bike_crashes.at[ 56 ,'Latitudine']=45.076782958828
bike_crashes.at[ 56 ,'Longitudine']=7.656850607303557
bike_crashes.at[ 57 ,'Latitudine']=45.07845425178405
bike_crashes.at[ 57 ,'Longitudine']=7.6609859379903025
bike_crashes.at[ 107 ,'Latitudine']=45.06476705437338
bike_crashes.at[ 107 ,'Longitudine']=7.680401166734652
bike_crashes.at[ 277 ,'Latitudine']=45.08774215573367
bike_crashes.at[ 277 ,'Longitudine']=7.691090275540141
bike_crashes.at[ 281 ,'Latitudine']=45.08609848010586
bike_crashes.at[ 281 ,'Longitudine']=7.655025413625322

In [None]:
# rename Latitudine->latitude and Longitudine->longitude
bike_crashes = bike_crashes.rename({'Latitudine':'latitude', 'Longitudine':'longitude'}, axis=1)

In [None]:
# export the manipulated dataset
bike_crashes.to_csv(PATH["data"] + placeid + "/" +'accidents_softmobility2019.csv')

In [None]:
# add geometry to the crashes dataframe

geometry = [Point(xy) for xy in zip(bike_crashes.longitude, bike_crashes.latitude)]
gdf_crashes_softmobility = gpd.GeoDataFrame(bike_crashes, crs="EPSG:4326", geometry=geometry)

In [None]:
# plot the crashes on Turin map

ax = gdf_edges.plot(figsize=(15,8),alpha=.1, color='navy')

gdf_crashes_softmobility.plot(ax=ax,figsize=(24,16),color='darkgreen',alpha=.7)