In [1]:
import pandas as pd
import geopandas as gpd
import pyogrio
import h3pandas
import shapely.geometry as sgeo
import plotly.express as px

### Settings

In [2]:
sampling_factor = 0.01

output_path = "../../output"
output_prefix = "mun_1pct_"

data_path = "../../analysis/data"

### Prepare spatial analysis

In [3]:
df_zones = pyogrio.read_dataframe("../../output/mun_zones.gpkg")
df_municipalities = df_zones[["municipality_id", "geometry"]].copy()
df_kreis = df_zones[["kreis_id", "geometry"]].dissolve("kreis_id").reset_index()

df_centroids = df_municipalities.copy()
df_centroids["geometry"] = df_centroids["geometry"].centroid

### Load data

In [4]:
df_activities = pyogrio.read_dataframe("../../output/mun_activities.gpkg")

### Spatial analysis

In [5]:
# Population density
df_population = df_activities[df_activities["purpose"] == "home"].drop_duplicates("person_id")
df_population.to_crs("EPSG:4326").h3.geo_to_h3_aggregate(7, "size").rename(
    columns = { 0: "population "}).to_file("../../output/analysis/population.gpkg")

In [6]:
# Work density
df_employment = df_activities[df_activities["purpose"] == "work"].drop_duplicates("person_id")
df_employment.to_crs("EPSG:4326").h3.geo_to_h3_aggregate(7, "size").rename(
    columns = { 0: "employment "}).to_file("../../output/analysis/employment.gpkg")

In [7]:
# Education density
df_education = df_activities[df_activities["purpose"] == "education"].drop_duplicates("person_id")
df_education.to_crs("EPSG:4326").h3.geo_to_h3_aggregate(7, "size").rename(
    columns = { 0: "education "}).to_file("../../output/analysis/education.gpkg")

In [8]:
# Flow 
df_flow = df_activities[
    (df_activities["purpose"] == "home") | (df_activities["purpose"] == "work")
].drop_duplicates(["person_id", "purpose"])[["person_id", "purpose", "geometry"]]

df_origin = df_flow[df_flow["purpose"] == "home"]
df_destination = df_flow[df_flow["purpose"] == "work"]

df_origin = gpd.sjoin(df_origin, df_municipalities)[["person_id", "municipality_id"]]
df_destination = gpd.sjoin(df_destination, df_municipalities)[["person_id", "municipality_id"]]

df_origin = df_origin.rename(columns = { "municipality_id": "origin_id" })
df_destination = df_destination.rename(columns = { "municipality_id": "destination_id" })

df_flow = pd.merge(df_origin, df_destination, on = "person_id")
df_flow = df_flow.groupby(["origin_id", "destination_id"]).size().reset_index(name = "flow")

df_flow = pd.merge(df_flow, df_centroids.rename(columns = {
    "municipality_id": "origin_id", "geometry": "origin_geometry"
}))

df_flow = pd.merge(df_flow, df_centroids.rename(columns = {
    "municipality_id": "destination_id", "geometry": "destination_geometry"
}))

df_flow["geometry"] = [
    sgeo.LineString(od)
    for od in zip(df_flow["origin_geometry"], df_flow["destination_geometry"])
]

df_flow = gpd.GeoDataFrame(df_flow, crs = df_centroids.crs)[[
    "origin_id", "destination_id", "flow", "geometry"
]]

df_flow = df_flow[df_flow["origin_id"] != df_flow["destination_id"]]
df_flow.to_file("../../output/analysis/flow.gpkg")