In [2]:
%pip install -r requirements.txt

Collecting geopandas>=0.14.0 (from -r requirements.txt (line 1))
  Downloading geopandas-1.1.0-py3-none-any.whl.metadata (2.3 kB)
Collecting folium (from -r requirements.txt (line 2))
  Downloading folium-0.19.7-py2.py3-none-any.whl.metadata (4.1 kB)
Collecting pyogrio>=0.7.2 (from geopandas>=0.14.0->-r requirements.txt (line 1))
  Downloading pyogrio-0.11.0-cp311-cp311-macosx_12_0_arm64.whl.metadata (5.3 kB)
Collecting pyproj>=3.5.0 (from geopandas>=0.14.0->-r requirements.txt (line 1))
  Downloading pyproj-3.7.1-cp311-cp311-macosx_14_0_arm64.whl.metadata (31 kB)
Collecting shapely>=2.0.0 (from geopandas>=0.14.0->-r requirements.txt (line 1))
  Downloading shapely-2.1.1-cp311-cp311-macosx_11_0_arm64.whl.metadata (6.8 kB)
Collecting branca>=0.6.0 (from folium->-r requirements.txt (line 2))
  Downloading branca-0.8.1-py3-none-any.whl.metadata (1.5 kB)
Collecting xyzservices (from folium->-r requirements.txt (line 2))
  Downloading xyzservices-2025.4.0-py3-none-any.whl.metad

In [16]:
# Filter: category
import geopandas as gpd
import pandas as pd
import folium

# Load data
issues_df = pd.read_csv('../data/challenge_2/complete_issues_data.csv')
states = gpd.read_file(
    "../vg5000_12-31.gk3.shape.ebenen/vg5000_ebenen_1231/VG5000_LAN.shp"
).to_crs("EPSG:4326")

# Category
#issues_df = issues_df.loc[issues_df["category"] == "Wirtschaft"]

# Time
#issues_df["timestamp"] = pd.to_datetime(issues_df["timestamp"])
#issues_df = issues_df.loc[
#    (issues_df["timestamp"] >= "2025-03-03 08:46:00") &
#    (issues_df["timestamp"] < "2025-03-20 00:00:00")
#]

# Age Group
#issues_df = issues_df.loc[
#    (issues_df["age_group"] >= "45-54") &
#    (issues_df["age_group"] < "65+")
#]

# Gender
issues_df = issues_df.loc[issues_df["gender"] == "female"]

display(issues_df)

# Count issues per state
issues_per_state = (
    issues_df.groupby('state')
    .size()
    .reset_index(name='issue_count')
)

# Merge
states_with_data = states.merge(
    issues_per_state,
    left_on='GEN',
    right_on='state',
    how='left'
)

# **Drop any datetime columns** and keep only GEN, issue_count, geometry
datetime_cols = states_with_data.select_dtypes(['datetime64[ns]']).columns
states_for_map = states_with_data.drop(columns=datetime_cols)[['GEN','issue_count','geometry']]

# Fill NaNs so tooltip shows 0 instead of “nan”
states_for_map['issue_count'] = states_for_map['issue_count'].fillna(0).astype(int)

# Build map
m = folium.Map(location=[51.0, 10.0], zoom_start=6)

folium.Choropleth(
    geo_data=states_for_map.to_json(),
    name='Issues by State',
    data=states_for_map,
    columns=['GEN', 'issue_count'],
    key_on='feature.properties.GEN',
    fill_color='YlOrRd',
    fill_opacity=0.7,
    line_opacity=0.2,
    legend_name='Number of Issues'
).add_to(m)

folium.features.GeoJson(
    states_for_map,
    name='State Info',
    tooltip=folium.features.GeoJsonTooltip(
        fields=['GEN', 'issue_count'],
        aliases=['State:', 'Issues:'],
        localize=True
    )
).add_to(m)

m.save('germany_issues_choropleth-cat.html')


Unnamed: 0,issue_id,timestamp,category,description,latitude,longitude,municipality,district,state,age_group,...,day_of_month,week_of_year,month,quarter,is_weekend,is_business_hours,is_morning,is_afternoon,date,week
2,ISS_20250613033708_1610,2024-04-21 11:37:00,Wirtschaft,Die Baustelle an der Hauptgeschäftsstraße daue...,54.361981,10.317594,Passade,Gemeinde,Schleswig-Holstein,45-54,...,21,16,2024-04,2,True,True,True,False,2024-04-21,2024-04-15/2024-04-21
3,ISS_20250613033645_2139,2023-09-11 20:59:00,Migration,Die Bearbeitung meines Antrags auf Familienzus...,53.231994,9.797590,Welle,Gemeinde,Niedersachsen,35-44,...,11,37,2023-09,3,False,False,False,False,2023-09-11,2023-09-11/2023-09-17
5,ISS_20250613033646_6220,2023-11-12 07:51:00,Verkehr,Die Bushaltestelle an der Rosenbacher Straße i...,50.792947,7.573647,Windeck,Gemeinde,Nordrhein-Westfalen,55-64,...,12,45,2023-11,4,True,False,True,False,2023-11-12,2023-11-06/2023-11-12
10,ISS_20250613033645_4927,2024-03-05 16:24:00,Verkehr,Die Baustelle in der Erfurter Straße dauert sc...,51.100001,10.648215,Bad Langensalza,Stadt,Thüringen,35-44,...,5,10,2024-03,1,False,True,False,True,2024-03-05,2024-03-04/2024-03-10
11,ISS_20250613033645_9007,2024-06-18 13:20:00,Gesundheit,Die Wartezeit für einen Facharzttermin beim Or...,51.774097,6.420249,Rees,Stadt,Nordrhein-Westfalen,35-44,...,18,25,2024-06,2,False,True,False,True,2024-06-18,2024-06-17/2024-06-23
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
985,ISS_20250613034255_1250,2024-09-07 09:18:00,Umwelt,Die Mülltonne in unserem Wohngebiet wird seit ...,49.202464,10.186358,Schnelldorf,Gemeinde,Bayern,35-44,...,7,36,2024-09,3,True,True,True,False,2024-09-07,2024-09-02/2024-09-08
986,ISS_20250613034254_7903,2024-11-01 16:05:00,Digitalisierung,Das Online-Portal für den Studienausweis ist s...,48.380503,10.052970,Neu-Ulm,Stadt,Bayern,18-24,...,1,44,2024-11,4,False,True,False,True,2024-11-01,2024-10-28/2024-11-03
990,ISS_20250613034257_4685,2024-11-30 08:29:00,Bildung,Die Turnhalle der Grundschule ist seit drei Mo...,52.501304,12.838678,Ketzin/Havel,Stadt,Brandenburg,35-44,...,30,48,2024-11,4,True,True,True,False,2024-11-30,2024-11-25/2024-12-01
991,ISS_20250613034257_6904,2023-10-20 07:22:00,Wirtschaft,Die Baustelle in der Innenstadt dauert schon ü...,50.419337,8.632332,Butzbach,Stadt,Hessen,45-54,...,20,42,2023-10,4,False,False,True,False,2023-10-20,2023-10-16/2023-10-22


In [9]:
import geopandas as gpd
import pandas as pd
import folium

# Load data
issues_df = pd.read_csv('../data/challenge_2/complete_issues_data.csv')
states = gpd.read_file(
    "../vg5000_12-31.gk3.shape.ebenen/vg5000_ebenen_1231/VG5000_LAN.shp"
).to_crs("EPSG:4326")

# Count issues per state
issues_per_state = (
    issues_df.groupby('state')
    .size()
    .reset_index(name='issue_count')
)

# Merge
states_with_data = states.merge(
    issues_per_state,
    left_on='GEN',
    right_on='state',
    how='left'
)

# **Drop any datetime columns** and keep only GEN, issue_count, geometry
datetime_cols = states_with_data.select_dtypes(['datetime64[ns]']).columns
states_for_map = states_with_data.drop(columns=datetime_cols)[['GEN','issue_count','geometry']]

# Fill NaNs so tooltip shows 0 instead of “nan”
states_for_map['issue_count'] = states_for_map['issue_count'].fillna(0).astype(int)

# Build map
m = folium.Map(location=[51.0, 10.0], zoom_start=6)

folium.Choropleth(
    geo_data=states_for_map.to_json(),
    name='Issues by State',
    data=states_for_map,
    columns=['GEN', 'issue_count'],
    key_on='feature.properties.GEN',
    fill_color='YlOrRd',
    fill_opacity=0.7,
    line_opacity=0.2,
    legend_name='Number of Issues'
).add_to(m)

folium.features.GeoJson(
    states_for_map,
    name='State Info',
    tooltip=folium.features.GeoJsonTooltip(
        fields=['GEN', 'issue_count'],
        aliases=['State:', 'Issues:'],
        localize=True
    )
).add_to(m)

m.save('germany_issues_choropleth.html')
