In [1]:
import pandas as pd
import numpy as np
import itertools
import scipy.stats as stats
import geopandas as gpd

from tqdm.notebook import tqdm

### Generate parcels
This notebook loads the calibrated parcel model and generates parcels for the *study area* (in contrast to the Lyon Metropolis on which the model is calibrated).

In [2]:
# Manage inputs and outputs
persons_path = "../../resources/external/population_2022/lead_2022_100pct_persons.csv"
homes_path = "../../resources/external/population_2022/lead_2022_100pct_homes.gpkg"
study_area_path = "../../resources/spatial/study_area.gpkg"
model_path = "../../results/parcel_model.parquet"
output_path = "../../results/parcels/parcels.gpkg"

random_seed = 0
delivery_days = 260
scaling = 1.0
crs = "EPSG:2154"

if "snakemake" in locals():
    persons_path = snakemake.input["persons"]
    homes_path = snakemake.input["homes"]
    study_area_path = snakemake.input["study_area"]
    model_path = snakemake.input["model"]
    output_path = snakemake.output[0]

    params = snakemake.params[0] if len(snakemake.params) == 1 and len(snakemake.params.keys()) == 0 else snakemake.params

    if "random_seed" in params:
        random_seed = params["random_seed"]

    if "delivery_days" in params:
        delivery_days = params["delivery_days"]
    
    if "scaling" in params:
        scaling = params["scaling"]
    
    if "crs" in params:
        crs = params["crs"]

In [3]:
# Load population and study area
df_homes = gpd.read_file(homes_path)[["household_id", "geometry"]].to_crs(crs)
df_area = gpd.read_file(study_area_path).dissolve().to_crs(crs)

# Filter out persons not within the study area
df_homes = gpd.sjoin(df_homes, df_area, predicate = "within")

In [4]:
# Filter for persons over 18 to find reference persons
df_persons = pd.read_csv(persons_path, sep = ";", usecols = [
    "household_id", "age", "socioprofessional_class"])

df_persons = df_persons[df_persons["age"] >= 18]

df_persons = df_persons[df_persons["household_id"].isin(
    df_homes["household_id"]
)]

In [5]:
# Assign age class
df_persons.loc[(df_persons["age"] >= 18) & (df_persons["age"] <= 34), "ac"] = 0 # 18 - 34
df_persons.loc[(df_persons["age"] >= 35) & (df_persons["age"] <= 49), "ac"] = 1 # 35 - 49
df_persons.loc[(df_persons["age"] >= 50) & (df_persons["age"] <= 64), "ac"] = 2 # 50 - 64
df_persons.loc[(df_persons["age"] >= 65), "ac"] = 3 # 65+
df_persons["ac"] = df_persons["ac"].astype(int)

In [6]:
# Assign socioprofessional class
df_persons.loc[df_persons["socioprofessional_class"] == 1, "sc"] = -1 # Agriculture
df_persons.loc[df_persons["socioprofessional_class"] == 2, "sc"] = 0 # CE,Artis,Com
df_persons.loc[df_persons["socioprofessional_class"] == 3, "sc"] = 1 # Cadre
df_persons.loc[df_persons["socioprofessional_class"] == 4, "sc"] = 2 # Prof Int
df_persons.loc[df_persons["socioprofessional_class"] == 5, "sc"] = 3 # Employe
df_persons.loc[df_persons["socioprofessional_class"] == 6, "sc"] = 4 # Ouvrier
df_persons.loc[df_persons["socioprofessional_class"] == 7, "sc"] = 5 # Retraite
df_persons.loc[df_persons["socioprofessional_class"] == 8, "sc"] = 6 # Sans Act
df_persons["sc"] = df_persons["sc"].astype(int)

In [7]:
# Assign household size class
df_household_size = df_persons.groupby("household_id").size().reset_index(name = "household_size")
df_persons = pd.merge(df_persons, df_household_size, on = "household_id")

df_persons.loc[df_persons["household_size"] == 1, "hc"] = 0 # 1
df_persons.loc[df_persons["household_size"] == 2, "hc"] = 1 # 2
df_persons.loc[df_persons["household_size"] == 3, "hc"] = 2 # 3
df_persons.loc[df_persons["household_size"] >= 4, "hc"] = 3 # 4+
df_persons["hc"] = df_persons["hc"].astype(int)

In [8]:
# Sample reference persons
np.random.seed(random_seed)

sorter = np.arange(len(df_persons))
np.random.shuffle(sorter)

df_persons = df_persons.iloc[sorter]
df_persons = df_persons.drop_duplicates("household_id")

In [9]:
# Load model
df_model = pd.read_parquet(model_path)

In [10]:
# Generate parcels
df_parcels = df_persons[["household_id", "ac", "hc", "sc"]].copy()
df_parcels = pd.merge(df_parcels, df_model, on = ["ac", "hc", "sc"])

for sc, ac, hc in itertools.product(range(7), range(4), range(4)):
    f = df_parcels["sc"] == sc
    f &= df_parcels["ac"] == ac
    f &= df_parcels["hc"] == hc
    
    if np.count_nonzero(f) > 0:
        weight = scaling * df_parcels[f]["weight"].values[0] / delivery_days
        
        df_parcels.loc[f, "parcels"] = stats.poisson(weight).rvs(np.count_nonzero(f))
        df_parcels.loc[f, "home_parcels"] = stats.poisson(
            weight * df_parcels[f]["home_probability"].values[0]).rvs(np.count_nonzero(f))

In [13]:
# Extract non-zero households
df_parcels = df_parcels[df_parcels["home_parcels"] > 0]
df_parcels["home_parcels"] = df_parcels["home_parcels"].astype(int)
df_parcels = df_parcels.loc[df_parcels.index.repeat(df_parcels["home_parcels"])]
df_parcels = df_parcels[["household_id", "sc", "ac", "hc"]]

In [14]:
# Attach geometry
df_parcels = pd.merge(df_homes, df_parcels, on = "household_id")[["geometry", "sc", "ac", "hc", "household_id"]]

In [15]:
# Assign unique identifiers
df_parcels["parcel_id"] = np.arange(len(df_parcels))

In [16]:
# Output
df_parcels.to_file(output_path)