In [None]:
import os
import warnings
from glob import glob
from pathlib import Path

import geopandas
import pandas
from tqdm.notebook import tqdm

In [None]:
og_results_path = Path("~/projects/open-gira/results")

In [None]:
trade_adm1_total = pandas.read_csv("baci_total_adm1_trade_THA.csv")

In [None]:
nodes_path = (
    og_results_path / "composite_network" / "south-east-asia-road" / "nodes.gpq"
)
nodes = geopandas.read_parquet(nodes_path).query('iso_a3 == "THA"')

In [None]:
if not os.path.exists("adm1_tha.gpq"):
    adm1_path = og_results_path / "input" / "admin-boundaries" / "gadm36_levels.gpkg"
    adm1 = geopandas.read_file(adm1_path, driver="pyogrio", layer="level1")
    adm1_tha = adm1.query('GID_0 == "THA"')
    adm1_tha.to_parquet("adm1_tha.gpq")
else:
    adm1_tha = geopandas.read_parquet("adm1_tha.gpq")

adm1_tha = adm1_tha[["GID_1", "geometry"]].copy()

In [None]:
def group_to_nodes(basename, nodes):
    out_base = basename.replace("3ss", "nodes")
    dfs = []
    warnings.filterwarnings(action="ignore", message="Geometry is in a geographic CRS")
    for fname in tqdm(list(glob(f"{basename}.parquet/*"))):
        df = geopandas.read_parquet(fname)
        if not df.empty:
            df_nodes = (
                df.sjoin_nearest(nodes[["id", "geometry"]], how="left")[["id", "data"]]
                .groupby("id")
                .sum()
                .reset_index()
            )
            dfs.append(df_nodes)
    data_nodes = pandas.concat(dfs).groupby("id").sum()
    data_nodes = nodes.set_index("id").join(data_nodes)
    data_nodes.to_parquet(f"{out_base}.parquet")
    data_nodes.to_file(f"{out_base}.gpkg", engine="pyogrio", driver="GPKG")

In [None]:
for sector in ("ag", "man", "serv"):
    if not os.path.exists(f"gva_{sector}_3ss.parquet"):
        group_to_nodes(f"gva_{sector}_3ss", nodes)

In [None]:
gva_man_nodes = geopandas.read_parquet("gva_man_nodes.parquet")
gva_ag_nodes = geopandas.read_parquet("gva_ag_nodes.parquet")
gva_serv_nodes = geopandas.read_parquet("gva_serv_nodes.parquet")

In [None]:
nodes_adm1 = nodes.sjoin(adm1_tha, how="left")
a = nodes_adm1.dropna()
b = (
    nodes_adm1[nodes_adm1.GID_1.isna()]
    .drop(columns=["index_right", "GID_1"])
    .sjoin_nearest(adm1_tha, how="left")
)
nodes_adm1 = pandas.concat([a, b]).fillna(0)
nodes_adm1.shape

In [None]:
nodes_with_gva = (
    nodes_adm1.set_index("id")
    .join(gva_ag_nodes[["data"]])
    .rename(columns={"data": "gva_ag_usd"})
    .join(gva_man_nodes[["data"]])
    .rename(columns={"data": "gva_man_usd"})
    .join(gva_serv_nodes[["data"]])
    .rename(columns={"data": "gva_ser_usd"})
)

In [None]:
gva_from_nodes_adm1 = (
    nodes_with_gva.fillna(0)
    .drop(columns=["geometry", "iso_a3", "component_id", "index_right"])
    .groupby("GID_1")
    .sum()
    .rename(
        columns={
            "gva_ag_usd": "gva_ag_adm1_usd",
            "gva_man_usd": "gva_man_adm1_usd",
            "gva_ser_usd": "gva_ser_adm1_usd",
        }
    )
)

In [None]:
nodes_weighted = nodes_with_gva.reset_index().merge(
    gva_from_nodes_adm1.reset_index(), on="GID_1", how="left"
)

for sector in ("ag", "man", "ser"):
    nodes_weighted[f"proportion_of_adm1_{sector}"] = (
        nodes_weighted[f"gva_{sector}_usd"] / nodes_weighted[f"gva_{sector}_adm1_usd"]
    )

nodes_weighted = (
    nodes_weighted[
        [
            "id",
            "GID_1",
            "proportion_of_adm1_ag",
            "proportion_of_adm1_man",
            "proportion_of_adm1_ser",
            "geometry",
        ]
    ]
    .copy()
    .fillna(0)
)

In [None]:
nodes_weighted.to_parquet("tha_road_nodes.parquet")

In [None]:
nodes_weighted = geopandas.read_parquet("tha_road_nodes.parquet")

In [None]:
nodes_weighted.head()

In [None]:
trade_adm1 = pandas.read_csv("baci_sector_adm1_trade_THA.csv").rename(
    columns={
        "trade_value_thousandUSD": "value_kusd_adm1",
        "trade_quantity_tons": "volume_tons_adm1",
    }
)
trade_adm1.head(2)

In [None]:
nodes_with_prop = (
    nodes_weighted[
        [
            "id",
            "GID_1",
            "proportion_of_adm1_ag",
            "proportion_of_adm1_man",
            "proportion_of_adm1_ser",
        ]
    ]
    .rename(
        columns={
            "proportion_of_adm1_ag": "ag",
            "proportion_of_adm1_man": "man",
            "proportion_of_adm1_ser": "ser",
        }
    )
    .melt(id_vars=["id", "GID_1"], var_name="sector", value_name="proportion_of_adm1")
)

In [None]:
def disaggregate_to_nodes(nodes_with_prop, trade_adm1, adm1):
    trade_nodes = nodes_with_prop.query(f'GID_1 == "{adm1}"').merge(
        trade_adm1.query(f'THA_GID_1 == "{adm1}"'),
        left_on=("GID_1", "sector"),
        right_on=("THA_GID_1", "sector"),
    )
    trade_nodes["value_kusd"] = (
        trade_nodes.proportion_of_adm1 * trade_nodes.value_kusd_adm1
    )
    trade_nodes["volume_tons"] = (
        trade_nodes.proportion_of_adm1 * trade_nodes.volume_tons_adm1
    )
    trade_nodes = trade_nodes[
        [
            "id",
            "THA_GID_1",
            "partner_GID_0",
            "sector",
            "export_country_code",
            "export_zone",
            "import_country_code",
            "import_zone",
            "value_kusd",
            "volume_tons",
        ]
    ]
    return trade_nodes

In [None]:
! rm -r trade_nodes.parquet
! rm -r trade_nodes_total.parquet
! mkdir -p trade_nodes.parquet
! mkdir -p trade_nodes_total.parquet

In [None]:
adm1s = trade_adm1.THA_GID_1.unique()

for adm1 in tqdm(adm1s):
    trade_nodes_adm1 = disaggregate_to_nodes(nodes_with_prop, trade_adm1, adm1)
    os.mkdir(f"trade_nodes.parquet/GID_1={adm1}")
    trade_nodes_adm1.to_parquet(f"trade_nodes.parquet/GID_1={adm1}/data.parquet")
    trade_nodes_total = (
        trade_nodes_adm1[
            ["id", "THA_GID_1", "partner_GID_0", "value_kusd", "volume_tons"]
        ]
        .groupby(["id", "THA_GID_1", "partner_GID_0"])
        .sum()
        .reset_index()
    )

    os.mkdir(f"trade_nodes_total.parquet/GID_1={adm1}")
    trade_nodes_total.to_parquet(f"trade_nodes_total.parquet/GID_1={adm1}/data.parquet")
    trade_nodes_adm1 = None
    trade_nodes_total = None