In [2]:
import os
import folium
import matplotlib as plt
import matplotlib.colors as mcolors
import seaborn as sns
import polars as pl

In [3]:
# load the accident data
acc_files = os.listdir(f'../../data/processed/accidents/')
acc_frames = [pl.read_csv(f'../../data/processed/accidents/{file}') for file in acc_files]

In [4]:
# get a view of how the data looks like
for frame in acc_frames:
    display(frame.head(1))

state,region,district,municipality,year,month,weekday,hour,accident_type,injury_severity,light_condition,road_condition,is_bicycle,is_car,is_pedestrian,is_motorcycle,is_other,LINREFX,LINREFY,latitude,longitude
i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,f64,f64,f64,f64
1,0,57,25,2019,1,5,5,1,3,2,1,0,1,0,0,0,579075.960927,6023500.0,54.35346,10.216714


state,region,district,municipality,year,month,weekday,hour,accident_type,injury_severity,light_condition,road_condition,is_bicycle,is_car,is_pedestrian,is_motorcycle,is_other,LINREFX,LINREFY,latitude,longitude
i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,f64,f64,f64,f64
1,0,55,12,2017,1,6,5,1,2,2,2,0,1,0,0,0,605079.4229,6001800.0,54.15315,10.609031


state,region,district,municipality,year,month,weekday,hour,accident_type,injury_severity,light_condition,road_condition,is_bicycle,is_car,is_pedestrian,is_motorcycle,is_other,LINREFX,LINREFY,latitude,longitude
i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,f64,f64,f64,f64
1,0,2,0,2023,5,6,22,2,3,2,0,0,1,0,0,0,574751.5619,6018300.0,54.306951,10.148875


state,region,district,municipality,year,month,weekday,hour,accident_type,injury_severity,light_condition,road_condition,is_bicycle,is_car,is_pedestrian,is_motorcycle,is_other,LINREFX,LINREFY,latitude,longitude
i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,f64,f64,f64,f64
1,0,59,44,2024,5,1,23,5,3,2,0,0,1,0,0,0,525162.3763,6045500.0,54.55638,9.389076


state,region,district,municipality,year,month,weekday,hour,accident_type,injury_severity,light_condition,road_condition,is_bicycle,is_car,is_pedestrian,is_motorcycle,is_other,LINREFX,LINREFY,latitude,longitude
i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,f64,f64,f64,f64
1,0,3,0,2018,1,5,8,7,2,0,0,1,0,0,0,0,612054.342,5969600.0,53.863081,10.70395


state,region,district,municipality,year,month,weekday,hour,accident_type,injury_severity,light_condition,road_condition,is_bicycle,is_car,is_pedestrian,is_motorcycle,is_other,LINREFX,LINREFY,latitude,longitude
i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,f64,f64,f64,f64
1,0,54,165,2021,3,2,7,1,2,0,2,0,1,0,0,0,483995.394384,6069100.0,54.768787,8.751233


state,region,district,municipality,year,month,weekday,hour,accident_type,injury_severity,light_condition,road_condition,is_bicycle,is_car,is_pedestrian,is_motorcycle,is_other,LINREFX,LINREFY,latitude,longitude
i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,f64,f64,f64,f64
1,0,54,84,2022,2,6,19,3,3,2,1,1,1,0,0,0,506085.644,6035100.0,54.463396,9.093886


state,region,district,municipality,year,month,weekday,hour,accident_type,injury_severity,light_condition,road_condition,is_bicycle,is_car,is_pedestrian,is_motorcycle,is_other,LINREFX,LINREFY,latitude,longitude
i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,f64,f64,f64,f64
1,0,53,120,2016,1,5,9,1,2,0,2,0,1,0,0,0,606982.394,5954700.0,53.729615,10.621659


state,region,district,municipality,year,month,weekday,hour,accident_type,injury_severity,light_condition,road_condition,is_bicycle,is_car,is_pedestrian,is_motorcycle,is_other,LINREFX,LINREFY,latitude,longitude
i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,f64,f64,f64,f64
12,0,68,468,2020,1,5,11,7,2,0,0,0,0,0,0,1,735840.4367,5887200.0,53.082133,12.521519


In [5]:
# get all accidents with bicycle involvement is_bicycle	== 1
bicycle_accidents = pl.concat(acc_frames)
bicycle_accidents = bicycle_accidents.filter(pl.col('is_bicycle') == 1)

In [6]:
# choose heidelberg: state=8, region=2, district=21
state = 8
region = 2
district = 21
bicycle_accidents_hdb = bicycle_accidents.filter(
    (pl.col('state') == state) &
    (pl.col('region') == region) &
    (pl.col('district') == district)
)

In [7]:

#check the mean location of the accidents in heidelberg
bicycle_accidents_hdb.select(['latitude', 'longitude']).mean().to_numpy()[0].tolist()

[49.40615652129418, 8.683069257318808]

In [8]:
# Create a color map for the years
min_acc_year = bicycle_accidents_hdb['year'].min()
max_acc_year = bicycle_accidents_hdb['year'].max()
num_years = max_acc_year - min_acc_year + 1

# Use a colormap for the years
cmap = plt.colormaps['viridis'] 
year_colors = {year: mcolors.rgb2hex(cmap(i / (num_years - 1))) for i, year in enumerate(range(min_acc_year, max_acc_year + 1))}

m2 = folium.Map(location=[49.398752, 8.672434], zoom_start=13)

for row in bicycle_accidents_hdb.iter_rows(named=True):
    year = row['year']
    folium.CircleMarker(
        location=[row['latitude'], row['longitude']],
        radius=5,
        popup=f"Year: {year}, Month: {row['month']}",
        color=year_colors[year],
        fill=True,
        fill_color=year_colors[year],
        fill_opacity=0.7
    ).add_to(m2)

# just for a simple legend
legend_html = '''
     <div style="position: fixed; 
                 bottom: 50px; left: 50px; width: 150px; height: 220px; 
                 border:2px solid grey; z-index:9999; font-size:14px;
                 background-color:white; padding: 10px;">
     <b>Accident Years</b><br>
'''
for year in range(min_acc_year, max_acc_year + 1):
    color = year_colors[year]
    legend_html += f'<i style="background:{color};width:10px;height:10px;float:left;margin-right:5px;"></i>{year}<br>'
legend_html += '</div>'

m2.get_root().html.add_child(folium.Element(legend_html))

m2