In [26]:
import pandas as pd
import numpy as np
from siuba import *
from plotnine import *
import plotly.express as px

In [93]:
# Load data for downloaded roads and select ours
df = pd.read_parquet("../data/preprocessed/accidents_from_mapper.parquet")
df_90 = df[df.source_road==90].copy()

In [121]:
df_90 = (
    df
    >> filter(_.source_road==90)
    >> mutate(part=
        if_else(_.location <= 49.5, 'phase_1', 
        if_else((_.location > 49.5) & (_.location <= 60.4), 'phase_2', 
        if_else(_.location <= 180.5, 'not_extended', 'out_of_scope'))),
        after=(((_.location < 49.5) & (_.year >= 2012)) | ((_.location >= 49.5) & (_.location < 60.4) & (_.year > 2018)))
    )
    >> mutate(section=_.part + if_else(_.after, '_two_lane', '_one_lane'))
    >> mutate(section_time=_.part + if_else(_.year < 2012, '_before_2012', '_after_2012'))
    >> mutate(section_time_late=_.part + if_else(_.year < 2018, '_before_2018', '_after_2018'))
)
df_90 = df_90 >> filter(_.part != 'out_of_scope')
df_90.section_time.value_counts()

not_extended_before_2012    120
not_extended_after_2012      76
phase_1_before_2012          69
phase_1_after_2012           41
phase_2_before_2012          23
phase_2_after_2012           10
Name: section_time, dtype: int64

In [28]:
junctions = pd.read_csv("../data/preprocessed/junctions.csv")
junctions

Unnamed: 0,name,location
0,Eilot,11.9
1,Meches,15.6
2,Beer Ora,27.0
3,Timna,36.0
4,Samar,41.3
5,Yotvata,49.5
6,Grofit,54.0
7,Ktora,60.4
8,Yahel,72.3
9,Menuha,97.6


In [124]:
casualty_cols = [
    'killed',
    'severly_injured',
    'lightly_injured',
    'injured_pedestrians',
    'casualties_ages_0-19',
    'casualties_ages_20-64',
    'casualties_ages_65_plus',
    'total_casualties',
    'vehicle_count',
    'drivers',
]
summary = df_90.groupby('section_time')[casualty_cols].sum()
summary#.to_excel("summary.xlsx")

Unnamed: 0_level_0,killed,severly_injured,lightly_injured,injured_pedestrians,casualties_ages_0-19,casualties_ages_20-64,casualties_ages_65_plus,total_casualties,vehicle_count,drivers
section_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
not_extended_after_2012,24.0,69.0,202.0,2.0,53.0,198.0,35.0,295.0,135.0,135.0
not_extended_before_2012,33.0,62.0,423.0,0.0,187.0,314.0,16.0,518.0,208.0,208.0
phase_1_after_2012,2.0,13.0,91.0,1.0,27.0,75.0,2.0,106.0,60.0,60.0
phase_1_before_2012,13.0,15.0,246.0,1.0,70.0,197.0,5.0,274.0,147.0,147.0
phase_2_after_2012,7.0,0.0,28.0,1.0,10.0,23.0,2.0,35.0,18.0,18.0
phase_2_before_2012,8.0,17.0,78.0,0.0,27.0,74.0,2.0,103.0,40.0,40.0


In [117]:
(
    df_90
    >> filter(_.after)
    >> filter(_.killed > 0)
)

Unnamed: 0,year,month,day of week,day/night,accident_severity,accident_type,killed,severly_injured,lightly_injured,injured_pedestrians,...,settlement,road1,location,road2,road3,road4,source_road,part,after,section
2176,2017,אוגוסט,שני,יום,קטלנית,התהפכות,1.0,1.0,1.0,0.0,...,,90.0,45.0,,,,90,phase_1,True,phase_1_two_lane
2343,2019,אוקטובר,חמישי,יום,קטלנית,התנגשות צד בצד,2.0,0.0,0.0,0.0,...,,90.0,60.2,,,,90,phase_2,True,phase_2_two_lane
2400,2019,ינואר,שישי,לילה,קטלנית,התנגשות חזית בחזית,3.0,0.0,0.0,0.0,...,,90.0,53.6,,,,90,phase_2,True,phase_2_two_lane
2534,2021,יולי,ראשון,לילה,קטלנית,פגיעה בהולך רגל,1.0,0.0,0.0,1.0,...,,90.0,31.0,,,,90,phase_1,True,phase_1_two_lane


In [40]:
df_90.groupby(['part', 'year']).sum().reset_index() >> pipe(lambda x: px.bar(x, x='year', y='killed', color='part', barmode='group'))

In [49]:
(
    df_90
    >> filter(_.part=='arava_dromit', _.year>=2012, _.killed>0, _.location < 49.5)
    >> select('year', 'month', 'day of week', 'day/night', 'accident_type', 'killed', 'location')
    
)

Unnamed: 0,year,month,day of week,day/night,accident_type,killed,location
2176,2017,אוגוסט,שני,יום,התהפכות,1.0,45.0
2534,2021,יולי,ראשון,לילה,פגיעה בהולך רגל,1.0,31.0


In [6]:
df.value_counts('section')

section
arava_tihona_before_2012    125
arava_dromit_before_2012     89
arava_tihona_after_2012      84
arava_dromit_after_2012      50
dtype: int64

In [7]:
df[casualty_cols].sum()

killed                       87
severly_injured             178
lightly_injured            1099
injured_pedestrians           7
casualties_ages_0-19        379
casualties_ages_20-64       906
casualties_ages_65_plus      65
total_casualties           1364
vehicle_count               629
drivers                     629
dtype: int64

In [128]:
fig = (
    df_90
    # >> filter(_.part=='south_')
    # >> filter(_.location >= 60.4, _.location <= 180)
    >> group_by(_.section_time)
    >> count(_.accident_type)
    >> pipe(lambda x: px.bar(x, x='accident_type', y='n', color='section_time', barmode='group',
        title='סוג תאונה בחלוקה למקטעים ותקופות', labels={'accident_type':'סוג תאונה', 'n': 'מספר תאונות'},
        category_orders={'section': ['arava_tihona_before_2012', 'arava_tihona_after_2012', 'arava_dromit_before_2012', 'arava_tihona_after_2012']}))
    # >> group_by(_.year)
    # >> summarize(killed=_.killed.sum(), lightly_injured=_.lightly_injured.sum())
)

fig.update_layout(
    legend=dict(
        yanchor='top',
        y=0.96,
        xanchor='right',
        x=0.99
    )
)
fig.show()

In [9]:
df.groupby('accident_type').sum()[casualty_cols].to_excel("acc_type.xlsx")

In [10]:

(
    df
    >> mutate(part=if_else(_.location <= 60.4, 'south', 'mid'),
        after=_.year >= 2012
    )
    >> mutate(part_time=_.part + if_else(_.after, '_after', '_before'))
    >> filter(_.part=='mid')
    # >> filter(_.location >= 60.4, _.location <= 180)
    >> group_by(_.part_time)
    >> count(_.accident_type)
    >> pipe(lambda x: px.bar(x, x='accident_type', y='n', color='part_time', barmode='group'))
    # >> group_by(_.year)
    # >> summarize(killed=_.killed.sum(), lightly_injured=_.lightly_injured.sum())
)

# Fatality of accidents

In [11]:
df.columns.tolist()

['year',
 'month',
 'day of week',
 'day/night',
 'accident_severity',
 'accident_type',
 'killed',
 'severly_injured',
 'lightly_injured',
 'injured_pedestrians',
 'casualties_ages_0-19',
 'casualties_ages_20-64',
 'casualties_ages_65_plus',
 'total_casualties',
 'vehicle_count',
 'drivers',
 'road_type',
 'localization_quality',
 'settlement',
 'road1',
 'location',
 'road2',
 'road3',
 'road4',
 'part',
 'after',
 'section']

In [134]:
(
    df_90
    >> filter(_.part != 'phase_2')
).groupby('section_time').accident_severity.value_counts(normalize=True).reset_index(name='severity') >> filter(_.accident_severity == 'קטלנית')

Unnamed: 0,section_time,accident_severity,severity
2,not_extended_after_2012,קטלנית,0.25
5,not_extended_before_2012,קטלנית,0.2
8,phase_1_after_2012,קטלנית,0.04878
10,phase_1_before_2012,קטלנית,0.144928


In [129]:
df_90.groupby('section_time').accident_severity.value_counts(normalize=True).reset_index(name='severity')

Unnamed: 0,section_time,accident_severity,severity
0,not_extended_after_2012,קשה,0.394737
1,not_extended_after_2012,קלה,0.355263
2,not_extended_after_2012,קטלנית,0.25
3,not_extended_before_2012,קלה,0.508333
4,not_extended_before_2012,קשה,0.291667
5,not_extended_before_2012,קטלנית,0.2
6,phase_1_after_2012,קלה,0.682927
7,phase_1_after_2012,קשה,0.268293
8,phase_1_after_2012,קטלנית,0.04878
9,phase_1_before_2012,קלה,0.782609


In [25]:
(
    df.groupby(['year', 'part']).accident_severity.value_counts(normalize=True).reset_index(name='severity')
    >> filter(_.accident_severity=='קטלנית')
    >> pipe(lambda x: px.bar(x, x='year', y='severity', color='part', barmode='group'))
)