# ACCIDENTS AND VICTIMS BY TIMEFRAMES

In [1]:
#Import libraries
import pandas as pd
import numpy as np
import sidetable

In [2]:
#Import dataframe and storing raw data as copy.
acc_raw = pd.read_csv('../datasets/2019_accidents_gu_bcn.csv')
acc = acc_raw.copy()

In [3]:
acc.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10027 entries, 0 to 10026
Data columns (total 27 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   Numero_expedient           10027 non-null  object 
 1   Codi_districte             10027 non-null  int64  
 2   Nom_districte              10027 non-null  object 
 3   Codi_barri                 10027 non-null  int64  
 4   Nom_barri                  10027 non-null  object 
 5   Codi_carrer                10027 non-null  int64  
 6   Nom_carrer                 10027 non-null  object 
 7   Num_postal_caption         10027 non-null  object 
 8   Descripcio_dia_setmana     10027 non-null  object 
 9   Dia_setmana                10027 non-null  object 
 10  Descripcio_tipus_dia       10027 non-null  object 
 11  NK_Any                     10027 non-null  int64  
 12  Mes_any                    10027 non-null  int64  
 13  Nom_mes                    10027 non-null  obj

In [4]:
# the type of day, shown in 'Descripcio_tipus_dia' is not filled correctly, ass all days are classified as labour.
print(acc['Descripcio_tipus_dia'].value_counts())
# therefore we will drop this column
acc.drop(columns='Descripcio_tipus_dia', inplace=True)

Laboral    10027
Name: Descripcio_tipus_dia, dtype: int64


In [5]:
#creating a new column for the type of day (if weekday or weekend/holiday). 1st set all as weekday
acc['Type_day'] = 'Weekday'

#setting the weekends as Weekend/Holiday
acc.loc[acc['Dia_setmana'].isin(['Ds','Dg']),'Type_day'] = 'Weekend/Holiday'

# Creating another new column with the complete date of the accident as a string
acc['Date'] = acc['Dia_mes'].astype(str)+'/'+acc['Mes_any'].astype(str)+'/'+acc['NK_Any'].astype(str)

# Importing the official Holidays of Barcelona during 2019, as a list.
holidays = pd.read_csv('../datasets/WeekendsHolidays.csv',delimiter=';')['Date'].astype(str).to_list()

# setting the holidays as Weekend/Holiday
acc.loc[acc['Date'].isin(holidays),'Type_day'] = 'Weekend/Holiday'

In [6]:
acc_clean = acc.drop(columns=['Codi_districte', 'Codi_barri', 'Nom_carrer', 'Num_postal_caption', 
                              'Descripcio_dia_setmana', 'Descripcio_causa_vianant', 'Coordenada_UTM_X', 
                              'Coordenada_UTM_Y', 'Longitud', 'Latitud'])

# we will store the cleaned data in a csv file in order to be used in other Notebooks.
acc_clean.to_csv('../datasets/acc_clean.csv')

## Accidents by hour of the day

In [7]:
number_acc_hour_typeday = acc_clean.groupby(['Type_day', 'Hora_dia'])['Numero_expedient'].count()
acc_hour_typeday = acc_clean.groupby(['Type_day', 'Hora_dia']).aggregate({'Numero_expedient': 'count', 'Numero_victimes': 'sum', 'Numero_vehicles_implicats': 'sum', 'Numero_lesionats_lleus':'sum', 'Numero_lesionats_greus':'sum', 'Numero_morts':'sum'})
acc_hour_typeday.columns = ['Accidents', 'Total_Victims', 'Total_Vehicles', 'Minor_Injuries', 'Severe_Injuries', 'Deaths']
acc_hour_typeday.index.names = ['Type_Day', 'Hour_Day']


acc_hour_typeday_perc = acc_hour_typeday.groupby(level=0).apply(lambda x: round (100 * x / x.sum(),2))
acc_hour_typeday_perc.columns = ['%Accidents', '%Total_Victims', '%Total_Vehicles', '%Minor_Injuries', '%Severe_Injuries', '%Deaths']


acc_hour_typeday = pd.concat([acc_hour_typeday,acc_hour_typeday_perc], axis =1)
acc_hour_typeday = acc_hour_typeday[['Accidents', '%Accidents', 'Total_Victims', '%Total_Victims', 'Total_Vehicles', '%Total_Vehicles', 'Minor_Injuries',
       '%Minor_Injuries', 'Severe_Injuries', '%Severe_Injuries', 'Deaths', '%Deaths']]
acc_hour_typeday[['Accidents', '%Accidents', 'Total_Victims', '%Total_Victims', 'Severe_Injuries', '%Severe_Injuries', 'Deaths', '%Deaths']]

Unnamed: 0_level_0,Unnamed: 1_level_0,Accidents,%Accidents,Total_Victims,%Total_Victims,Severe_Injuries,%Severe_Injuries,Deaths,%Deaths
Type_Day,Hour_Day,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Weekday,0,94,1.19,107,1.16,3,2.22,0,0.0
Weekday,1,63,0.8,74,0.8,5,3.7,0,0.0
Weekday,2,45,0.57,39,0.42,1,0.74,0,0.0
Weekday,3,28,0.35,17,0.18,0,0.0,1,7.14
Weekday,4,28,0.35,21,0.23,0,0.0,1,7.14
Weekday,5,46,0.58,51,0.55,0,0.0,0,0.0
Weekday,6,93,1.18,103,1.12,4,2.96,0,0.0
Weekday,7,305,3.86,353,3.83,2,1.48,1,7.14
Weekday,8,519,6.56,587,6.38,6,4.44,2,14.29
Weekday,9,526,6.65,632,6.86,2,1.48,0,0.0


### Hours-day sorted by accidents

In [49]:
accidents_hour = acc_hour_typeday[['Accidents','%Accidents']].sort_values(['Type_Day','Accidents'], ascending = [True,False])
accidents_hour.groupby(level=0).apply(lambda x : x.head(5).reset_index(level=0)).drop(columns='Type_Day')

Unnamed: 0_level_0,Unnamed: 1_level_0,Accidents,%Accidents
Type_Day,Hour_Day,Unnamed: 2_level_1,Unnamed: 3_level_1
Weekday,14,614,7.76
Weekday,15,553,6.99
Weekday,18,550,6.95
Weekday,9,526,6.65
Weekday,8,519,6.56
Weekend/Holiday,14,157,7.42
Weekend/Holiday,13,135,6.38
Weekend/Holiday,18,124,5.86
Weekend/Holiday,19,121,5.72
Weekend/Holiday,15,118,5.57


### Hours-day sorted by victims

In [50]:
victims_hour= acc_hour_typeday[['Total_Victims','%Total_Victims']].sort_values(['Type_Day','Total_Victims'], ascending = [True,False])
victims_hour.groupby(level=0).apply(lambda x : x.head(5).reset_index(level=0)).drop(columns='Type_Day')

Unnamed: 0_level_0,Unnamed: 1_level_0,Total_Victims,%Total_Victims
Type_Day,Hour_Day,Unnamed: 2_level_1,Unnamed: 3_level_1
Weekday,14,708,7.69
Weekday,18,680,7.39
Weekday,15,642,6.97
Weekday,9,632,6.86
Weekday,13,619,6.72
Weekend/Holiday,14,212,7.98
Weekend/Holiday,18,184,6.93
Weekend/Holiday,13,183,6.89
Weekend/Holiday,12,154,5.8
Weekend/Holiday,15,154,5.8


### Hours-day sorted by severe injuries

In [51]:
severe_hour = acc_hour_typeday[['Severe_Injuries','%Severe_Injuries']].sort_values(['Type_Day','Severe_Injuries'], ascending = [True,False])
severe_hour.groupby(level=0).apply(lambda x : x.head(5).reset_index(level=0)).drop(columns='Type_Day')

Unnamed: 0_level_0,Unnamed: 1_level_0,Severe_Injuries,%Severe_Injuries
Type_Day,Hour_Day,Unnamed: 2_level_1,Unnamed: 3_level_1
Weekday,18,14,10.37
Weekday,15,12,8.89
Weekday,19,12,8.89
Weekday,13,10,7.41
Weekday,22,8,5.93
Weekend/Holiday,18,10,14.93
Weekend/Holiday,17,6,8.96
Weekend/Holiday,21,5,7.46
Weekend/Holiday,5,4,5.97
Weekend/Holiday,12,4,5.97


### Hours-day sorted by deaths

In [52]:
deaths_hour = acc_hour_typeday[['Deaths','%Deaths']].sort_values(['Type_Day','Deaths'], ascending = [True,False])
deaths_hour.groupby(level=0).apply(lambda x : x.head(5).reset_index(level=0)).drop(columns='Type_Day')

Unnamed: 0_level_0,Unnamed: 1_level_0,Deaths,%Deaths
Type_Day,Hour_Day,Unnamed: 2_level_1,Unnamed: 3_level_1
Weekday,16,3,21.43
Weekday,8,2,14.29
Weekday,3,1,7.14
Weekday,4,1,7.14
Weekday,7,1,7.14
Weekend/Holiday,6,3,37.5
Weekend/Holiday,5,2,25.0
Weekend/Holiday,4,1,12.5
Weekend/Holiday,13,1,12.5
Weekend/Holiday,22,1,12.5


## Accidents by day of the week

In [12]:
acc_week = acc_clean.groupby('Dia_setmana').aggregate({'Numero_expedient': 'count', 'Numero_victimes': 'sum',
                                                    'Numero_vehicles_implicats': 'sum', 'Numero_lesionats_lleus':'sum',
                                                    'Numero_lesionats_greus':'sum', 'Numero_morts':'sum'})
acc_week.columns = ['Accidents', 'Total_Victims', 'Total_Vehicles', 'Minor_Injuries', 'Severe_Injuries', 'Deaths']
acc_week.index.name = 'Day_week'
acc_week.index = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
acc_week_perc = acc_week.apply(lambda x: round (100 * x / x.sum(),2))
acc_week_perc.columns = ['%Accidents', '%Total_Victims', '%Total_Vehicles', 
                          '%Minor_Injuries', '%Severe_Injuries', '%Deaths']

acc_week = pd.concat([acc_week, acc_week_perc], axis =1)
acc_week = acc_week[['Accidents', '%Accidents', 'Total_Victims', 
                       '%Total_Victims', 'Total_Vehicles', '%Total_Vehicles', 'Minor_Injuries', 
                       '%Minor_Injuries', 'Severe_Injuries', '%Severe_Injuries', 'Deaths', '%Deaths']]
acc_week

Unnamed: 0,Accidents,%Accidents,Total_Victims,%Total_Victims,Total_Vehicles,%Total_Vehicles,Minor_Injuries,%Minor_Injuries,Severe_Injuries,%Severe_Injuries,Deaths,%Deaths
Monday,1680,16.75,1947,16.41,3155,16.57,1913,16.43,30,14.85,4,18.18
Tuesday,834,8.32,1040,8.77,1580,8.3,1015,8.72,20,9.9,5,22.73
Wednesday,1648,16.44,1972,16.62,3123,16.4,1936,16.63,32,15.84,4,18.18
Thursday,1505,15.01,1754,14.78,2847,14.96,1733,14.89,18,8.91,3,13.64
Friday,1612,16.08,1895,15.97,3101,16.29,1861,15.99,34,16.83,0,0.0
Saturday,1087,10.84,1367,11.52,2067,10.86,1322,11.36,43,21.29,2,9.09
Sunday,1661,16.57,1889,15.92,3164,16.62,1860,15.98,25,12.38,4,18.18


### Days of the week sorted by accidents

In [13]:
acc_week[['Accidents','%Accidents']].sort_values('Accidents', ascending = False)

Unnamed: 0,Accidents,%Accidents
Monday,1680,16.75
Sunday,1661,16.57
Wednesday,1648,16.44
Friday,1612,16.08
Thursday,1505,15.01
Saturday,1087,10.84
Tuesday,834,8.32


### Days of the week sorted by victims

In [14]:
acc_week[['Total_Victims','%Total_Victims']].sort_values('Total_Victims', ascending = False)

Unnamed: 0,Total_Victims,%Total_Victims
Wednesday,1972,16.62
Monday,1947,16.41
Friday,1895,15.97
Sunday,1889,15.92
Thursday,1754,14.78
Saturday,1367,11.52
Tuesday,1040,8.77


### Days of the week sorted by severe injuries

In [15]:
acc_week[['Severe_Injuries','%Severe_Injuries']].sort_values('Severe_Injuries', ascending = False)

Unnamed: 0,Severe_Injuries,%Severe_Injuries
Saturday,43,21.29
Friday,34,16.83
Wednesday,32,15.84
Monday,30,14.85
Sunday,25,12.38
Tuesday,20,9.9
Thursday,18,8.91


### Days of the week sorted by deaths

In [16]:
acc_week[['Deaths','%Deaths']].sort_values('Deaths', ascending = False)

Unnamed: 0,Deaths,%Deaths
Tuesday,5,22.73
Monday,4,18.18
Wednesday,4,18.18
Sunday,4,18.18
Thursday,3,13.64
Saturday,2,9.09
Friday,0,0.0


## Accidents by month of the year

In [17]:
acc_month = acc_clean.groupby('Mes_any').aggregate({'Numero_expedient': 'count', 'Numero_victimes': 'sum',
                                                    'Numero_vehicles_implicats': 'sum', 'Numero_lesionats_lleus':'sum',
                                                    'Numero_lesionats_greus':'sum', 'Numero_morts':'sum'})
acc_month.columns = ['Accidents', 'Total_Victims', 'Total_Vehicles', 'Minor_Injuries', 'Severe_Injuries', 'Deaths']
acc_month.index.name = 'Month'
acc_month['Month_Name'] = pd.Series(['','January', 'February', 'March', 'April', 'May', 'June', 
                                     'July', 'August', 'September', 'October', 'November', 'December'])
acc_month = acc_month[['Month_Name','Accidents', 'Total_Victims', 'Total_Vehicles', 
                       'Minor_Injuries', 'Severe_Injuries', 'Deaths'] ]

acc_month_perc = acc_month[['Accidents', 'Total_Victims', 'Total_Vehicles', 'Minor_Injuries', 
                           'Severe_Injuries', 'Deaths']].apply(lambda x: round (100 * x / x.sum(),2))
acc_month_perc.columns = ['%Accidents', '%Total_Victims', '%Total_Vehicles', 
                          '%Minor_Injuries', '%Severe_Injuries', '%Deaths']


acc_month = pd.concat([acc_month, acc_month_perc], axis =1)
acc_month = acc_month[['Month_Name','Accidents', '%Accidents', 'Total_Victims', '%Total_Victims', 'Total_Vehicles', '%Total_Vehicles', 'Minor_Injuries', '%Minor_Injuries', 'Severe_Injuries', '%Severe_Injuries', 'Deaths', '%Deaths']]
acc_month

Unnamed: 0_level_0,Month_Name,Accidents,%Accidents,Total_Victims,%Total_Victims,Total_Vehicles,%Total_Vehicles,Minor_Injuries,%Minor_Injuries,Severe_Injuries,%Severe_Injuries,Deaths,%Deaths
Month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
1,January,845,8.43,965,8.13,1600,8.4,944,8.11,19,9.41,2,9.09
2,February,781,7.79,919,7.75,1564,8.22,903,7.76,15,7.43,1,4.55
3,March,885,8.83,1063,8.96,1695,8.9,1046,8.99,16,7.92,1,4.55
4,April,813,8.11,988,8.33,1570,8.25,968,8.32,18,8.91,2,9.09
5,May,944,9.41,1111,9.36,1775,9.32,1094,9.4,16,7.92,1,4.55
6,June,886,8.84,1064,8.97,1687,8.86,1044,8.97,19,9.41,1,4.55
7,July,947,9.44,1124,9.47,1800,9.46,1098,9.43,22,10.89,4,18.18
8,August,655,6.53,798,6.73,1217,6.39,784,6.74,13,6.44,1,4.55
9,September,753,7.51,912,7.69,1409,7.4,895,7.69,16,7.92,1,4.55
10,October,917,9.15,1057,8.91,1684,8.85,1033,8.87,21,10.4,3,13.64


### Months sorted by accidents

In [18]:
acc_month[['Month_Name','Accidents','%Accidents']].sort_values('Accidents', ascending = False)

Unnamed: 0_level_0,Month_Name,Accidents,%Accidents
Month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
7,July,947,9.44
5,May,944,9.41
10,October,917,9.15
6,June,886,8.84
3,March,885,8.83
11,November,846,8.44
1,January,845,8.43
4,April,813,8.11
2,February,781,7.79
12,December,755,7.53


### Months sorted by victims

In [19]:
acc_month[['Month_Name','Total_Victims','%Total_Victims']].sort_values('Total_Victims', ascending = False)

Unnamed: 0_level_0,Month_Name,Total_Victims,%Total_Victims
Month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
7,July,1124,9.47
5,May,1111,9.36
6,June,1064,8.97
3,March,1063,8.96
10,October,1057,8.91
4,April,988,8.33
1,January,965,8.13
11,November,963,8.12
2,February,919,7.75
9,September,912,7.69


### Months sorted by severe injuries

In [20]:
acc_month[['Month_Name','Severe_Injuries','%Severe_Injuries']].sort_values('Severe_Injuries', ascending = False)

Unnamed: 0_level_0,Month_Name,Severe_Injuries,%Severe_Injuries
Month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
7,July,22,10.89
10,October,21,10.4
1,January,19,9.41
6,June,19,9.41
4,April,18,8.91
3,March,16,7.92
5,May,16,7.92
9,September,16,7.92
2,February,15,7.43
11,November,14,6.93


### Months sorted by deaths

In [21]:
acc_month[['Month_Name','Deaths','%Deaths']].sort_values('Deaths', ascending = False)

Unnamed: 0_level_0,Month_Name,Deaths,%Deaths
Month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
7,July,4,18.18
10,October,3,13.64
11,November,3,13.64
1,January,2,9.09
4,April,2,9.09
12,December,2,9.09
2,February,1,4.55
3,March,1,4.55
5,May,1,4.55
6,June,1,4.55
