In [1]:
import json
import pandas as pd

### Urbanicity

We found the following dataset:
- https://www.google.com/url?q=https://www.waarstaatjegemeente.nl/viewer&sa=D&source=docs&ust=1768514018598375&usg=AOvVaw3j9oIR3dESIYGrtVO6_XuI

Which contains all the municipalities in the Netherlands along with a score between 1.0 - 5.0.

A score of 1.0 indicates a urban municipality and 5.0 a rural one.

In [17]:
"""
Load the csv file unsing pandas.
Use utf-8 encodign to unsure that it handles teh characters correctly.
Keep only the colomns that we need: Gemeenten, Stedelijkheid.
Rename/translate the column to English.
Strip the municipalities names to imporve merging.
"""
urban_rural_df = pd.read_csv("Demografie en gebied - 2025 - Gemeenten.csv",
                             sep =";",
                             decimal=",",
                             encoding="utf-8")
urban_rural_df = urban_rural_df[["Gemeenten", "Stedelijkheid|2025"]]
urban_rural_df=urban_rural_df.rename(columns ={'Gemeenten': "municipality", "Stedelijkheid|2025" : "urbanicity_level"})
urban_rural_df["municipality"] = (urban_rural_df["municipality"].str.strip().str.lower())
urban_rural_df.head()


Unnamed: 0,municipality,urbanicity_level
0,aa en hunze,5.0
1,aalsmeer,4.0
2,aalten,4.0
3,achtkarspelen,5.0
4,alblasserdam,2.0


In [22]:
"""
Load the raw JSON data.
Convert to pandas.
Select only the relevant columns.
Rename/ translate the columns.
Strip the names for merging.
"""
with open("uitslagen.json", encoding="utf-8") as f:
    election_raw = json.load(f)

gemeentes_data = election_raw["gemeentes"]

election_df = pd.json_normalize(gemeentes_data, sep ="_")

election_df = election_df[
    ["gemeente_naam",
     "gemeente_provincie_aantal_inwoners",
     "eerste_partij_name"
    ]
]
election_df = election_df.rename(columns={
    "gemeente_naam": "municipality",
    "gemeente_provincie_aantal_inwoners": "inhabitants",
    "eerste_partij_name": "largest_party"
})

election_df["municipality"] = (
    election_df["municipality"]
    .str.strip()
    .str.lower()
)
election_df.head()

Unnamed: 0,municipality,inhabitants,largest_party
0,arnhem,2133751,GROENLINKS / Partij van de Arbeid (PvdA)
1,heerlen,1128334,PVV (Partij voor de Vrijheid)
2,gilze en rijen,2626368,PVV (Partij voor de Vrijheid)
3,zutphen,2133751,GROENLINKS / Partij van de Arbeid (PvdA)
4,oldebroek,2133751,PVV (Partij voor de Vrijheid)


In [23]:
# Merge the two data sets
merged_df = pd.merge(urban_rural_df, election_df, on="municipality", how="inner")
merged_df.head()

Unnamed: 0,municipality,urbanicity_level,inhabitants,largest_party
0,aa en hunze,5.0,502120,PVV (Partij voor de Vrijheid)
1,aalsmeer,4.0,2956223,PVV (Partij voor de Vrijheid)
2,aalten,4.0,2133751,PVV (Partij voor de Vrijheid)
3,achtkarspelen,5.0,659612,PVV (Partij voor de Vrijheid)
4,alblasserdam,2.0,3841910,PVV (Partij voor de Vrijheid)


In [25]:
# Save data
merged_df.to_csv("merged_dataset.csv",
                 index = False,
                 encoding="utf-8")