# Bevölkerung im Jahr 2022
Quelle:
- **Population 2002:** https://www.data.gv.at/katalog/dataset/688b289e-ab72-3ef0-ab7e-b8dc0d2c21d1
- **Verwaltungsgrenzen:** https://www.bev.gv.at/Services/Downloads/Produktbezogene-Downloads/Unentgeltliche-Produkte/Kataster-Verzeichnisse.html
- **Bezirksgrenzen Wien:** https://www.data.gv.at/katalog/dataset/stat_gliederung-osterreichs-in-politische-bezirke131e2

In [None]:
import pandas as pd, datetime as dt, numpy as np
import matplotlib.pyplot as plt
pd.set_option('display.max_rows', 128)

In [None]:
genders = pd.read_csv("OGD_bevstandjbab2002_BevStand_2022_C-C11-0.csv", sep=";", encoding="utf_8") \
    .convert_dtypes()["code	name en_name".split()] \
    .rename({"name": "NameDe", "en_name": "NameEn"}, axis=1)
genders["Id"] = genders.code.str.extract("-(\d)$")
genders = genders.set_index("code")

In [None]:
ages = pd.read_csv("OGD_bevstandjbab2002_BevStand_2022_C-GALTEJ112-0.csv", sep=";", encoding="utf_8") \
    .convert_dtypes()["code	name".split()].set_index("code")
ages["Value"] = ages.name.str.extract("^(\d+)").astype("int")
ages["Range5"] = ages.Value.apply(lambda val : f"{5*int(val/5)}-{5*int(val/5)+4}" if val < 100 else "100 und älter").astype("string")
ages["Range10"] = ages.Value.apply(lambda val : f"{10*int(val/10)}-{10*int(val/10)+9}" if val < 100 else "100 und älter").astype("string")
ages = ages.drop("name", axis=1)

Geoinformationen lesen. Wien hat ein eigenes File.

In [None]:
cities_area = pd.read_csv("cities_area.csv.bz2", sep=";", encoding="utf_8").convert_dtypes() \
    .query("GKZ < 90000") \
    .groupby(["GKZ", "PG", "PB", "BL"], as_index=False).aggregate(area=("area", "sum"))
wien_area = pd.read_csv("wien_bezirke_area.csv", sep=";", encoding="utf_8").convert_dtypes()
wien_area["GKZ"] = wien_area["g_id"]*100+1
wien_area["PG"] = wien_area["g_name"]
wien_area["PB"] = wien_area["g_name"]
wien_area["BL"] = "Wien"
cities_area = pd.concat([cities_area, wien_area[["GKZ", "PG", "PB", "BL", "area"]]]) \
    .rename({"area": "Area", "PG": "Name", "PB": "Bezirk", "BL": "Bundesland"}, axis=1) \
    .set_index("GKZ")
cities_area["Area"] = cities_area.Area/1_000_000
cities_area["Area"] = cities_area.Area.round(3)

cities_means = pd.read_csv("cities_means.csv", sep=";", encoding="utf_8")[["GKZ", "X", "Y"]].convert_dtypes() \
    .query("GKZ < 90000")
wien_means = pd.read_csv("wien_bezirke_means.csv", sep=";", encoding="utf_8")[["g_id", "X", "Y"]].convert_dtypes()
wien_means["GKZ"] = wien_means["g_id"]*100+1
cities_means = pd.concat([cities_means, wien_means[["GKZ", "X", "Y"]]]) \
    .rename({"X":"Longitude", "Y": "Latitude"}, axis=1) \
    .set_index("GKZ")
cities_means["Longitude"] = cities_means.Longitude.round(6)
cities_means["Latitude"] = cities_means.Latitude.round(6)

In [None]:
cities = pd.read_csv("OGD_bevstandjbab2002_BevStand_2022_C-GRGEMAKT-0.csv", sep=";", encoding="utf_8") \
    [["code"]].convert_dtypes()
cities["Kennziffer"] = cities.code.str.extract("^GRGEMAKT-(\d+)$").astype("int")
# Matrei am Brenner hat eine andere Kennziffer.
cities["Kennziffer"] = cities.Kennziffer.mask(cities.Kennziffer == 70370, 70327)
# Wien gesamt (9001) liegt in Bezirken vor. Unknown (0) kommt auch nicht vor.
cities = cities[(cities.Kennziffer != 0) & (cities.Kennziffer != 90001)]
cities = cities.join(cities_area, on="Kennziffer").join(cities_means, on="Kennziffer").set_index("code")

In [None]:
population = pd.read_csv("OGD_bevstandjbab2002_BevStand_2022.csv.bz2", sep=";", encoding="utf_8") \
    .convert_dtypes()["C-C11-0	C-GRGEMAKT-0	C-GALTEJ112-0	F-ISIS-1".split()]
population.columns = "gender city age Population".split()
population = population.join(genders.rename(lambda col: "Gender"+col, axis=1), on="gender") \
    .join(cities.rename(lambda col: "City"+col, axis=1), on="city") \
    .join(ages.rename(lambda col: "Age"+col, axis=1), on="age")
population = population.drop("gender	city	age".split(), axis=1)
population.to_csv("../population.csv", sep=";", encoding="utf-8", index=False)
population.to_csv("../population_unicode.csv", sep=";", encoding="utf-16", index=False)
if population.CityName.isna().sum() > 0:
    print("Achtung: Fehlende Orte erkannt.")
    print(population[population.CityName.isna()])

Längen der Strings

In [None]:
population.apply(lambda col: col.astype("string").str.len()).max()

In [None]:
population.describe()

In [None]:
population.groupby("CityBundesland").aggregate(Population=("Population", "sum")) \
    .to_clipboard()