In [1]:
import pandas as pd
import numpy as np
from siuba import *
from plotnine import *
import plotly.express as px

In [2]:
# Load data for downloaded roads and select ours
df = pd.read_parquet("../data/preprocessed/accidents_from_mapper.parquet")
df_90 = df[df.source_road==90].copy()

In [3]:
df_90 = (
    df
    >> filter(_.source_road==90)
    >> mutate(part=
        if_else(_.location <= 49.5, 'phase_1', 
        if_else((_.location > 49.5) & (_.location <= 60.4), 'phase_2', 
        if_else(_.location <= 180.5, 'not_extended', 'out_of_scope'))),
        after=(((_.location < 49.5) & (_.year >= 2012)) | ((_.location >= 49.5) & (_.location < 60.4) & (_.year > 2018)))
    )
    >> mutate(section=_.part + if_else(_.after, '_two_lane', '_one_lane'))
    >> mutate(section_time=_.part + if_else(_.year < 2012, '_before_2012', '_after_2012'))
    >> mutate(section_time_late=_.part + if_else(_.year < 2018, '_before_2018', '_after_2018'))
)
df_90 = df_90 >> filter(_.part != 'out_of_scope')
df_90.section_time.value_counts()

not_extended_before_2012    120
not_extended_after_2012      76
phase_1_before_2012          69
phase_1_after_2012           41
phase_2_before_2012          23
phase_2_after_2012           10
Name: section_time, dtype: int64

In [4]:
junctions = pd.read_csv("../data/preprocessed/junctions.csv")
junctions

Unnamed: 0,name,location
0,Eilot,11.9
1,Meches,15.6
2,Beer Ora,27.0
3,Timna,36.0
4,Samar,41.3
5,Yotvata,49.5
6,Grofit,54.0
7,Ktora,60.4
8,Yahel,72.3
9,Menuha,97.6


In [5]:
casualty_cols = [
    'killed',
    'severly_injured',
    'lightly_injured',
    'injured_pedestrians',
    'casualties_ages_0-19',
    'casualties_ages_20-64',
    'casualties_ages_65_plus',
    'total_casualties',
    'vehicle_count',
    'drivers',
]
summary = df_90.groupby('section_time')[casualty_cols].sum().reset_index() #>> filter(_.section_time.str.contains('phase_2') == False)
summary.to_excel("summary.xlsx")
summary

Unnamed: 0,section_time,killed,severly_injured,lightly_injured,injured_pedestrians,casualties_ages_0-19,casualties_ages_20-64,casualties_ages_65_plus,total_casualties,vehicle_count,drivers
0,not_extended_after_2012,24.0,69.0,202.0,2.0,53.0,198.0,35.0,295.0,135.0,135.0
1,not_extended_before_2012,33.0,62.0,423.0,0.0,187.0,314.0,16.0,518.0,208.0,208.0
2,phase_1_after_2012,2.0,13.0,91.0,1.0,27.0,75.0,2.0,106.0,60.0,60.0
3,phase_1_before_2012,13.0,15.0,246.0,1.0,70.0,197.0,5.0,274.0,147.0,147.0
4,phase_2_after_2012,7.0,0.0,28.0,1.0,10.0,23.0,2.0,35.0,18.0,18.0
5,phase_2_before_2012,8.0,17.0,78.0,0.0,27.0,74.0,2.0,103.0,40.0,40.0


In [6]:
df_90.section_time.value_counts()

not_extended_before_2012    120
not_extended_after_2012      76
phase_1_before_2012          69
phase_1_after_2012           41
phase_2_before_2012          23
phase_2_after_2012           10
Name: section_time, dtype: int64

In [52]:

df_90.rename({'section_time':{
        'not_extended_before_2012': 'ערבה תיכונה לפני 2012',
        'not_extended_after_2012': 'ערבה תיכונה אחרי 2012',
        'phase_1_before_2012': 'מקטע 1 לפני 2012',
        'phase_1_after_2012': 'מקטע 1 אחרי 2012'
}})

Unnamed: 0,year,month,day of week,day/night,accident_severity,accident_type,killed,severly_injured,lightly_injured,injured_pedestrians,...,location,road2,road3,road4,source_road,part,after,section,section_time,section_time_late
70,2003,אפריל,ראשון,לילה,קשה,התנגשות חזית בצד,0.0,1.0,8.0,0.0,...,100.3,,,,90,not_extended,False,not_extended_one_lane,not_extended_before_2012,not_extended_before_2018
74,2003,מאי,שישי,לילה,קלה,התהפכות,0.0,0.0,3.0,0.0,...,75.1,,,,90,not_extended,False,not_extended_one_lane,not_extended_before_2012,not_extended_before_2018
84,2003,מאי,שבת,לילה,קלה,התהפכות,0.0,0.0,4.0,0.0,...,39.9,,,,90,phase_1,False,phase_1_one_lane,phase_1_before_2012,phase_1_before_2018
86,2003,יוני,חמישי,לילה,קלה,התנגשות חזית באחור,0.0,0.0,2.0,0.0,...,135.8,,,,90,not_extended,False,not_extended_one_lane,not_extended_before_2012,not_extended_before_2018
88,2003,מרס,שני,לילה,קטלנית,פגיעה בהולך רגל,1.0,4.0,22.0,1.0,...,43.0,,,,90,phase_1,False,phase_1_one_lane,phase_1_before_2012,phase_1_before_2018
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2563,2021,אוגוסט,רביעי,יום,קלה,התנגשות חזית בחזית,0.0,0.0,4.0,0.0,...,129.8,,,,90,not_extended,False,not_extended_one_lane,not_extended_after_2012,not_extended_after_2018
2571,2021,דצמבר,רביעי,לילה,קלה,התנגשות חזית בחזית,0.0,0.0,3.0,0.0,...,134.7,,,,90,not_extended,False,not_extended_one_lane,not_extended_after_2012,not_extended_after_2018
2572,2021,אוגוסט,חמישי,לילה,קלה,התנגשות חזית בצד,0.0,0.0,5.0,0.0,...,60.1,,,,90,phase_2,True,phase_2_two_lane,phase_2_after_2012,phase_2_after_2018
2591,2022,פברואר,שני,יום,קלה,התנגשות חזית בצד,0.0,0.0,2.0,0.0,...,38.3,,,,90,phase_1,True,phase_1_two_lane,phase_1_after_2012,phase_1_after_2018


In [53]:
fig = (
    df_90.replace({'section_time':{
            'not_extended_before_2012': 'ערבה תיכונה לפני 2012',
            'not_extended_after_2012': 'ערבה תיכונה אחרי 2012',
            'phase_1_before_2012': 'מקטע 1 לפני 2012',
            'phase_1_after_2012': 'מקטע 1 אחרי 2012'
    }})
    >> filter(_.part!='phase_2')
    >> group_by(_.section_time)
    >> count(_.accident_type)
    >> pipe(lambda x: px.bar(x, x='accident_type', y='n', color='section_time', barmode='group',
        title='סוג תאונה בחלוקה למקטעים ותקופות', labels={'accident_type':'סוג תאונה', 'n': 'מספר תאונות',
        'section_time': 'מקטע'},
        category_orders={'section_time': ['ערבה תיכונה לפני 2012', 'ערבה תיכונה אחרי 2012', 'מקטע 1 לפני 2012', 'מקטע 1 אחרי 2012']}),
        )
)

fig.update_layout(
    legend=dict(
        yanchor='top',
        y=0.96,
        xanchor='right',
        x=0.99
    )
)
fig.show()

# Fatality of accidents

In [8]:
df.columns.tolist()

['year',
 'month',
 'day of week',
 'day/night',
 'accident_severity',
 'accident_type',
 'killed',
 'severly_injured',
 'lightly_injured',
 'injured_pedestrians',
 'casualties_ages_0-19',
 'casualties_ages_20-64',
 'casualties_ages_65_plus',
 'total_casualties',
 'vehicle_count',
 'drivers',
 'road_type',
 'localization_quality',
 'settlement',
 'road1',
 'location',
 'road2',
 'road3',
 'road4',
 'source_road']

In [54]:
df.source_road = df.source_road.astype(str)
fig = ( 
    (df >> filter(_.year >= 2018))
    .groupby('source_road')
    .accident_severity
    .value_counts(normalize=True)
    .reset_index(name='severity') 
    >> filter(_.accident_severity=='קטלנית')
    >> mutate(color=_.source_road=='90')
    >> pipe(lambda x: px.bar(x, 
        x='source_road', 
        y='severity', 
        color='color',
        title='אחוז תאונות קטלניות בין 2003 ל-2022 לפי כביש',
        text='severity',
        text_auto=',.1%',
        labels={'severity': 'אחוז תאונות קטלניות מתוך סה"כ תאונות', 'source_road': 'מספר כביש'}
        ))
)
fig.update_traces(textfont_size=20)

In [31]:

fig = ( 
    df_90
    .groupby('section_time')
    .accident_severity
    .value_counts(normalize=True)
    .reset_index(name='severity') 
    .replace({'section_time': {
            'not_extended_before_2012': 'ערבה תיכונה לפני 2012',
            'not_extended_after_2012': 'ערבה תיכונה אחרי 2012',
            'phase_1_before_2012': 'מקטע 1 לפני 2012',
            'phase_1_after_2012': 'מקטע 1 אחרי 2012'
        }})
    >> filter(_.accident_severity=='קטלנית')
    >> filter(_.section_time.str.contains("phase_2") != True)
    >> mutate(color = if_else(_.severity > 0.21, 'red', 'blue'))
    >> pipe(lambda x: px.bar(x, 
        x='section_time', 
        y='severity', 
        color='color',
        title='אחוז תאונות קטלניות בכביש 90 עד צומת הערבה',
        text='severity',
        text_auto=',.1%',
        labels={'severity': 'אחוז תאונות קטלניות מתוך סה"כ תאונות', 'section_time': 'מקטע',
        },
        color_discrete_map={'red': '#EF553B', 'blue': '#636EFA'}
        ))
)
fig.update_traces(textfont_size=20)

In [42]:
(
    df
    # >> filter(_.source_road == '90')
    >> group_by(_.source_road, _.year)
    >> summarize(severity=(_.accident_severity=='קטלנית').sum() / (_.year != 0).sum())
    >> pipe(lambda x: px.line(x, x='year', y='severity', color='source_road'))
)

In [46]:
(
    df_90
    >> group_by(_.section, _.year)
    >> summarize(severity=(_.accident_severity=='קטלנית').sum() / (_.year != 0).sum())
    >> pipe(lambda x: px.line(x, x='year', y='severity', color='section'))
)

# Fatalities per accident

In [48]:
(
    df_90
    >> filter(_.part != 'phase_2')
    >> group_by('section_time')
    >> summarize(fatalities_per_accidents=_.killed.sum() / (_.year!=0).sum())
)

Unnamed: 0,section_time,fatalities_per_accidents
0,not_extended_after_2012,0.315789
1,not_extended_before_2012,0.275
2,phase_1_after_2012,0.04878
3,phase_1_before_2012,0.188406


# Continuous metric along the road

Unnamed: 0,year,month,day of week,day/night,accident_severity,accident_type,killed,severly_injured,lightly_injured,injured_pedestrians,...,location,road2,road3,road4,source_road,part,after,section,section_time,section_time_late
70,2003,אפריל,ראשון,לילה,קשה,התנגשות חזית בצד,0.0,1.0,8.0,0.0,...,100.3,,,,90,not_extended,False,not_extended_one_lane,not_extended_before_2012,not_extended_before_2018
74,2003,מאי,שישי,לילה,קלה,התהפכות,0.0,0.0,3.0,0.0,...,75.1,,,,90,not_extended,False,not_extended_one_lane,not_extended_before_2012,not_extended_before_2018
84,2003,מאי,שבת,לילה,קלה,התהפכות,0.0,0.0,4.0,0.0,...,39.9,,,,90,phase_1,False,phase_1_one_lane,phase_1_before_2012,phase_1_before_2018
86,2003,יוני,חמישי,לילה,קלה,התנגשות חזית באחור,0.0,0.0,2.0,0.0,...,135.8,,,,90,not_extended,False,not_extended_one_lane,not_extended_before_2012,not_extended_before_2018
88,2003,מרס,שני,לילה,קטלנית,פגיעה בהולך רגל,1.0,4.0,22.0,1.0,...,43.0,,,,90,phase_1,False,phase_1_one_lane,phase_1_before_2012,phase_1_before_2018
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2563,2021,אוגוסט,רביעי,יום,קלה,התנגשות חזית בחזית,0.0,0.0,4.0,0.0,...,129.8,,,,90,not_extended,False,not_extended_one_lane,not_extended_after_2012,not_extended_after_2018
2571,2021,דצמבר,רביעי,לילה,קלה,התנגשות חזית בחזית,0.0,0.0,3.0,0.0,...,134.7,,,,90,not_extended,False,not_extended_one_lane,not_extended_after_2012,not_extended_after_2018
2572,2021,אוגוסט,חמישי,לילה,קלה,התנגשות חזית בצד,0.0,0.0,5.0,0.0,...,60.1,,,,90,phase_2,True,phase_2_two_lane,phase_2_after_2012,phase_2_after_2018
2591,2022,פברואר,שני,יום,קלה,התנגשות חזית בצד,0.0,0.0,2.0,0.0,...,38.3,,,,90,phase_1,True,phase_1_two_lane,phase_1_after_2012,phase_1_after_2018
