In [4]:
import geopandas as gpd
import pandas as pd
import yaml
import numpy as np

### Assign operators
This notebook assigns operators to parcels based on the market shares.

In [2]:
# Manage inputs and outputs
parcels_path = "../../results/parcels/located_parcels.baseline_2022.gpkg"
operators_path = "../../resources/operators.yml"
output_path = "../../results/parcels/assigned_parcels.gpkg"
random_seed = 0

if "snakemake" in locals():
    parcels_path = snakemake.input["parcels"]
    operators_path = snakemake.input["operators"]
    output_path = snakemake.output[0]

    params = snakemake.params[0] if len(snakemake.params) == 1 and len(snakemake.params.keys()) == 0 else snakemake.params

    if "random_seed" in params:
        random_seed = params["random_seed"]

In [3]:
# Read parcels
df_parcels = gpd.read_file(parcels_path)

In [8]:
# Read operator data
with open(operators_path) as f:
    operator_data = yaml.load(f, yaml.SafeLoader)["operators"]
    
df_operators = []

for operator_id, operator in operator_data.items():
    df_operators.append({ "operator": operator_id, "weight": operator["market_share"] })
    
df_operators = pd.DataFrame.from_records(df_operators)
df_operators["weight"] /= df_operators["weight"].sum()

In [9]:
# Sample operators
cdf = np.cumsum(df_operators["weight"].values)
cdf /= cdf[-1]

np.random.seed(random_seed)
indices = [np.count_nonzero(s > cdf) for s in np.random.random(len(df_parcels))]
df_parcels["operator"] = df_operators.iloc[indices]["operator"].values

In [10]:
# Output
df_parcels.to_file(output_path)