Data Source: https://ergebnisse.zensus2022.de

In [20]:
import os
import pandas as pd

data = pd.DataFrame()

for level in ["gemeinde", "kreis", "bundesland"]:
    # Read in the data from a dedicated csv file
    input = pd.read_csv(
        f"{os.getcwd()}/Quellen/bevoelkerung/2024-07-28_Zensus2022_bevoelkerung-{level}.csv",
        sep=";",
        names=[
            "regkey",
            "name",
            "PERSONEN",
            "ANZAHL",
            "population",
            "QUALITAET",
        ],
        index_col=0,
        usecols=["regkey", "name", "population"],
        engine="python",
        dtype={
            "regkey": "str",  # String because of leading zeros
            "name": "str",
            "population": "int64",
        },
        skiprows=4,
        skipfooter=4,
    )

    input["level"] = level
    input["name"] = input["name"].str.lstrip()

    data = pd.concat([data, input])


# Reshape names from "Stadt, X" to "X Stadt" for better readability
# Example: "München, Landeshauptstadt" -> "Landeshauptstadt München"
def rearrange_name(s):
    # Check if the string contains a comma
    if "," not in s:
        return s

    # Split the string at the rightmost comma
    parts = s.rsplit(",", 1)

    # Strip leading and trailing whitespace from both parts
    parts = [part.strip().title() for part in parts]

    # Use an f-string to format the output
    return f"{parts[1]} {parts[0]}"


data["name"] = data["name"].apply(rearrange_name)

# Handle "Deutschland Gesamt" (DG): Set special level (and prepare to drop duplicates)
data.loc["DG", "level"] = "bundesrepublik"

# Reshape all index values to valid 12-digit regkeys by adding trailing zeros where necessary
data.index = data.index.str.ljust(12, "0")
data = data.drop_duplicates()

data = data.sort_index()

In [21]:
# Save the processed data to a new csv file
data.to_csv(
    f"{os.getcwd()}/Quellen/bevoelkerung/Zensus2022_Bevoelkerungszahl_regkey.csv",
    sep=";",
)

In [22]:
# Read in the data from a dedicated csv file
test = pd.read_csv(
    f"{os.getcwd()}/Quellen/bevoelkerung/Zensus2022_Bevoelkerungszahl_regkey.csv",
    sep=";",
    index_col=0,
    dtype={
        "regkey": "str",  # String because of leading zeros
        "name": "str",
        "population": "int64",
        "level": "str",
    },
)

In [23]:
# Get all entries with "level" == "bundesland"
gemeinden = test[test["level"] == "gemeinde"]
kreise = test[test["level"] == "kreis"]
bundeslaender = test[test["level"] == "bundesland"]

In [24]:
gemeinden

Unnamed: 0_level_0,name,population,level
regkey,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
010010000000,Stadt Flensburg,95015,gemeinde
010020000000,Landeshauptstadt Kiel,249132,gemeinde
010030000000,Hansestadt Lübeck,215958,gemeinde
010040000000,Stadt Neumünster,79625,gemeinde
010510011011,Stadt Brunsbüttel,12573,gemeinde
...,...,...,...
160775051011,Göpfersdorf,215,gemeinde
160775051023,Langenleuba-Niederhain,1722,gemeinde
160775051036,Nobitz,7023,gemeinde
160775052003,Dobitschen,418,gemeinde


In [25]:
kreise

Unnamed: 0_level_0,name,population,level
regkey,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
010010000000,Stadt Flensburg,95015,kreis
010020000000,Landeshauptstadt Kiel,249132,kreis
010030000000,Hansestadt Lübeck,215958,kreis
010040000000,Stadt Neumünster,79625,kreis
010510000000,Dithmarschen,132810,kreis
...,...,...,...
160730000000,Saalfeld-Rudolstadt,101106,kreis
160740000000,Saale-Holzland-Kreis,83347,kreis
160750000000,Saale-Orla-Kreis,78378,kreis
160760000000,Greiz,95217,kreis


In [26]:
bundeslaender

Unnamed: 0_level_0,name,population,level
regkey,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
10000000000,Schleswig-Holstein,2927542,bundesland
20000000000,Hamburg,1808846,bundesland
30000000000,Niedersachsen,7943265,bundesland
40000000000,Bremen,693204,bundesland
50000000000,Nordrhein-Westfalen,17890489,bundesland
60000000000,Hessen,6207278,bundesland
70000000000,Rheinland-Pfalz,4094169,bundesland
80000000000,Baden-Württemberg,11104040,bundesland
90000000000,Bayern,13038724,bundesland
100000000000,Saarland,1006864,bundesland
