In [36]:
import os
import geopandas as gpd
import pandas as pd

from config import rpath
os.chdir(rpath)

In [77]:
# Read incident data
incid = pd.read_csv("data/Berlin_2017/incidence_21_08_13a.csv", encoding="ISO-8859-1", index_col=0)
# incid = pd.read_csv("data/Berlin_2017/incidents_raw.csv", encoding="ISO-8859-1", error_bad_lines=False)

In [64]:
incid.columns

Index(['Incident.Id', 'severity', 'Road.Closure', 'Category.Ids',
       'Description', 'Planned', 'Earliest.Start.Time.local.',
       'Latest.End.Time.local.', 'Direction', 'Event.Text', 'Location.Type',
       'lat', 'lon', 'Segment.Ids', 'Causes', 'Effects', 'Info', 'AlertCs',
       'Schedule', 'road.closure', 'construction', 'incident', 'planned',
       'point', 'direction', 'miles', 'bidirectional', 'frc1', 'frc2', 'frc3',
       'frc4', 'frc5', 'reason1', 'reason2', 'reason3', 'reason4', 'reason5',
       'reason6', 'reason9', 'reason11', 'reason12', 'reason13', 'reason17',
       'reason18', 'reason19', 'reason20', 'reason21', 'reason22', 'reason23',
       'reason26', 'reason27', 'reason32', 'highway', 'start', 'end',
       'startdate', 'enddate', 'startbezirk', 'startbezkey'],
      dtype='object')

In [78]:
# Filter to include only accidents
if filter_type=="nolan":
    incid = incid[incid["reason2"]]
else:
    incid = incid[incid["reason2"]]
    print(incid[~((incid['Description'].str.contains('[Aa]ccident',na=False))|
              (incid['Event.Text'].str.contains('[Aa]ccident',na=False)))]["Description"].tolist())
    incid = incid[(incid['Description'].str.contains('[Aa]ccident',na=False))|
              (incid['Event.Text'].str.contains('[Aa]ccident',na=False))]   

['Lane closed due to maintenance work on A11 Southbound between 15 L304 Wandlitzer Chaussee and 16 L200 Schwanebecker Chaussee.', 'Ice on A115 Northbound between 9 E51 and 8 L78. Expected. Drive with extreme caution.', 'Object in the road and debris on road on A10 Berliner Ring Clockwise between 7 L38 and 8 E30. Danger. Drive with extreme caution.', 'Vehicle driving on the wrong side of the road on A11 Both ways between 15 L304 Wandlitzer Chaussee and 16 L200 Schwanebecker Chaussee.', 'Stalled vehicle on A10 Berliner Ring Clockwise between 4 B5 B1 and 5. Danger. Drive with extreme caution.', 'Vehicle driving on the wrong side of the road on A113 Both ways between 7 B96a Am Seegraben and 8 SchÃ¶nefelder Allee.', 'Vehicle driving on the wrong side of the road on B1 Both ways between A100 and A103.', 'Lane blocked due to object in the road and debris on road on A10 Berliner Ring Anticlockwise between 4 B5 B1 and 3 L33 HÃ¶nower Chaussee. Danger. Clear a lane for emergency vehicles.', 'Vehi

In [79]:
len(incid)

3224

In [80]:
# Only keep Accidents within berlin_uc
incid = gpd.GeoDataFrame(incid, geometry=gpd.points_from_xy(incid.lon, incid.lat)).set_crs(4326)    

berlin_uc = gpd.read_file("data/geodata/GHS/GHS_STAT_UCDB2015MT_GLOBE_R2019A_V1_2.gpkg")
berlin_uc = berlin_uc[berlin_uc.UC_NM_MN=="Berlin"][["UC_NM_MN","geometry"]]

incid = gpd.sjoin(incid, berlin_uc, how="left", op="intersects")
incid = incid[incid.UC_NM_MN=="Berlin"].drop(columns=["UC_NM_MN", "index_right"])

In [81]:
len(incid)

1176

In [83]:
tf = incid["Latest.End.Time.local."].apply(lambda x: x.split("+")[1][1]) 
tf = (tf == '2')

# Convert timestamp to datetime objects
incid["start_timestamp_CET"] = pd.to_datetime(
    incid["Earliest.Start.Time.local."].apply(lambda x: x.split("+")[0]),
    format='%Y-%m-%dT%H:%M:%S'
).dt.tz_localize('CET') 
incid["end_timestamp_CET"] = pd.to_datetime(
    incid["Latest.End.Time.local."].apply(lambda x: x.split("+")[0]),
    format='%Y-%m-%dT%H:%M:%S'
).dt.tz_localize('CET', ambiguous=tf) # ambiguous parameter for when the

# Filter dataframe to only include peak hours during weekdays
incid = incid[(incid.start_timestamp_CET.dt.weekday < 5) | (incid.end_timestamp_CET.dt.weekday < 5) ] # 5 = Saturday, 6 = Sunday
incid = incid[((incid.start_timestamp_CET.dt.hour >= 6) & (incid.start_timestamp_CET.dt.hour < 10)) |
          ((incid.start_timestamp_CET.dt.hour >= 14) & (incid.start_timestamp_CET.dt.hour < 20)) |
          ((incid.end_timestamp_CET.dt.hour >= 6) & (incid.end_timestamp_CET.dt.hour < 10)) |
          ((incid.end_timestamp_CET.dt.hour >= 14) & (incid.end_timestamp_CET.dt.hour < 20))]

In [84]:
len(incid)

792

In [37]:

# Nolan's filter accidents by 'reason2', mine by strings in certain variables. Mine is more restrictive
filter_type = "my" # "nolan". 

# Create a dummy vector to feed into the "ambiguous" argument one step below. 
# It basically stores whether or not something is in summer or winter time.
tf = incid["Latest.End.Time.local."].apply(lambda x: x.split("+")[1][1]) 
tf = (tf == '2')

# Convert timestamp to datetime objects
incid["start_timestamp_CET"] = pd.to_datetime(
    incid["Earliest.Start.Time.local."].apply(lambda x: x.split("+")[0]),
    format='%Y-%m-%dT%H:%M:%S'
).dt.tz_localize('CET') 
incid["end_timestamp_CET"] = pd.to_datetime(
    incid["Latest.End.Time.local."].apply(lambda x: x.split("+")[0]),
    format='%Y-%m-%dT%H:%M:%S'
).dt.tz_localize('CET', ambiguous=tf) # ambiguous parameter for when the

# Filter dataframe to only include peak hours during weekdays
incid = incid[(incid.start_timestamp_CET.dt.weekday < 5) | (incid.end_timestamp_CET.dt.weekday < 5) ] # 5 = Saturday, 6 = Sunday
incid = incid[((incid.start_timestamp_CET.dt.hour >= 6) & (incid.start_timestamp_CET.dt.hour < 10)) |
          ((incid.start_timestamp_CET.dt.hour >= 14) & (incid.start_timestamp_CET.dt.hour < 20)) |
          ((incid.end_timestamp_CET.dt.hour >= 6) & (incid.end_timestamp_CET.dt.hour < 10)) |
          ((incid.end_timestamp_CET.dt.hour >= 14) & (incid.end_timestamp_CET.dt.hour < 20))]

# Filter to include only accidents
if filter_type=="nolan":
    incid = incid[incid["reason2"]]
else:
    incid = incid[(incid['Causes'].str.contains('[Aa]ccident', na=False))|
              (incid['Description'].str.contains('[Aa]ccident',na=False))|
              (incid['Event.Text'].str.contains('[Aa]ccident',na=False))]

# Make GDF
incid = gpd.GeoDataFrame(incid, geometry=gpd.points_from_xy(incid.lon, incid.lat)).set_crs(4326)    

# Only keep Accidents within berlin_uc
berlin_uc = gpd.read_file("data/geodata/GHS/GHS_STAT_UCDB2015MT_GLOBE_R2019A_V1_2.gpkg")
berlin_uc = berlin_uc[berlin_uc.UC_NM_MN=="Berlin"][["UC_NM_MN","geometry"]]

incid = gpd.sjoin(incid, berlin_uc, how="left", op="intersects")
incid = incid[incid.UC_NM_MN=="Berlin"].drop(columns=["UC_NM_MN", "index_right"])

incid.head()

Unnamed: 0,Incident.Id,severity,Road.Closure,Category.Ids,Description,Planned,Earliest.Start.Time.local.,Latest.End.Time.local.,Direction,Event.Text,...,highway,start,end,startdate,enddate,startbezirk,startbezkey,start_timestamp_CET,end_timestamp_CET,geometry
98,289aa381-259c-47a9-a03d-6dcc048100f8,3,True,4-Incident,Road closed due to accident on Mariendorfer We...,False,2017-01-02T13:56:30+01:00,2017-01-02T17:56:30+01:00,Both ways,"Road closed, accident",...,False,2017-01-02T13:56:30,2017-01-02T17:56:30,2017-01-02 13:56:30,2017-01-02 17:56:30,Neukölln,8,2017-01-02 13:56:30+01:00,2017-01-02 17:56:30+01:00,POINT (13.43291 52.46470)
104,8183292b-6296-4448-b5f4-fc1a915d2bc1,3,False,4-Incident,Roadway reduced from lanes to one lane due to...,False,2017-01-02T16:13:39+01:00,2017-01-02T20:13:39+01:00,Both ways,"Accident, roadway reduced from lanes to one lane",...,False,2017-01-02T16:13:39,2017-01-02T20:13:39,2017-01-02 16:13:39,2017-01-02 20:13:39,Spandau,5,2017-01-02 16:13:39+01:00,2017-01-02 20:13:39+01:00,POINT (13.18474 52.51547)
150,65e2e901-45e4-4b01-bfd5-48517de5c112,4,True,4-Incident,Road closed due to accident on B96 Mehringdamm...,False,2017-01-03T19:50:18+01:00,2017-01-03T23:50:18+01:00,Northbound,"Road closed, accident",...,False,2017-01-03T19:50:18,2017-01-03T23:50:18,2017-01-03 19:50:18,2017-01-03 23:50:18,Friedrichshain-Kreuzberg,2,2017-01-03 19:50:18+01:00,2017-01-03 23:50:18+01:00,POINT (13.38767 52.49295)
164,b2f49ded-01fc-45cb-a0b2-5b1ee4edf91d,4,False,4-Incident,Lane closed due to accident on A111 Northbound...,False,2017-01-04T06:35:45+01:00,2017-01-04T10:35:45+01:00,Northbound,"Lane (or Lanes) closed, accident",...,True,2017-01-04T06:35:45,2017-01-04T10:35:45,2017-01-04 06:35:45,2017-01-04 10:35:45,Reinickendorf,12,2017-01-04 06:35:45+01:00,2017-01-04 10:35:45+01:00,POINT (13.31069 52.56524)
166,694c5548-7f0f-4c6d-94d6-826470765409,4,False,4-Incident,Lane closed due to accident on A100 Eastbound ...,False,2017-01-04T06:53:15+01:00,2017-01-04T10:53:15+01:00,Eastbound,"Lane (or Lanes) closed, accident",...,True,2017-01-04T06:53:15,2017-01-04T10:53:15,2017-01-04 06:53:15,2017-01-04 10:53:15,Tempelhof-Schöneberg,7,2017-01-04 06:53:15+01:00,2017-01-04 10:53:15+01:00,POINT (13.36914 52.47136)
