In [None]:
from pathlib import Path

import numpy
import pandas
from tqdm.auto import tqdm

In [None]:
working_dir = Path("/home/mert2014/projects/bhutan-demo")
results_dir = Path("/home/mert2014/projects/infra-risk-vis/global/etl/raw_data/nbs-adaptation/")

In [None]:
# input data is available as GPKG files
# created by the ETL processing script at
# https://github.com/nismod/infra-risk-vis/blob/2cf1dcf6713ba1d71944dd9efb6a68d9bf4d6a29/etl/pipelines/nbs-adaptation/extract-nbs-adaptation-opportunities.ipynb
# dataset published at
# Harwood, T., & Russell, T. (2025). Global opportunity areas for nature-based solutions to reduce risks to infrastructure [Data set].
# Zenodo. https://doi.org/10.5281/zenodo.15001764

ls_all = pandas.read_parquet(results_dir / "landslide_slope_vegetation_with_EAD_grouped.geoparquet")
mg_all = pandas.read_parquet(results_dir / "mangrove_with_EAD_grouped.geoparquet")
rf_all = pandas.read_parquet(results_dir / "river_basin_afforestation_with_EAD_grouped.geoparquet").reset_index()

In [None]:
rf_all.columns, rf_all.index.name

In [None]:
ls_all.columns, ls_all.index.name

In [None]:
mg_all.columns, mg_all.index.name

In [None]:
def extract_options(iso, country_name, mg_all, rf_all, ls_all):
    dfs = []
    config = [
        (mg_all, "coastal-flooding", "mangroves"),
        (rf_all, "river-flooding", "basin_scale_tree_planting"),
        (ls_all, "landslide", "slope_vegetation"),
    ]
    for df_all, hazard, planting_type in config:
        df = df_all.query(f"GID_0 == '{iso}'").drop(columns="geometry").copy()
        df["country"] = country_name
        df["hazard_targeted"] = hazard
        df["planting_option"] = planting_type
        df.rename(columns={
            colname: colname.replace("avoided_ead", "baseline_transport_ead_usd")
            for colname in df.columns
            if "avoided_ead" in colname
        }, inplace=True)
        # combine landuse (for landslide/slope veg) and shoreline (for coastal flooding/mangrove) categories
        df.rename(columns={
            "option_landuse": "classes",
            "option_shoreline": "classes",
        }, inplace=True)
        df["baseline_transport_ead_usd"] = df["baseline_transport_ead_usd_hist_2020_mean"].copy()
        dfs.append(df)

    df = pandas.concat(dfs)
    df.fillna({"classes": ""}, inplace=True)

    # filter on non-zero expected annual damage
    df = df.query("baseline_transport_ead_usd > 0.99").copy()

    # add local ID
    df["opportunity_area_id"] = numpy.arange(len(df)) + 1

    # select columns in order for Excel
    # note that we only select the baseline/historical/mean EAD value - there are more scenarios available in the
    # data, but the Excel tool expects only a single estimate in its current state
    select_columns = [
        "opportunity_area_id",
        "HYBAS_ID",
        "GID_0",
        "country",
        "GID_1",
        "area_m2",
        "area_ha",
        "planting_cost_usd_per_ha",
        "regen_cost_usd_per_ha",
        "carbon_benefit_t_per_ha",
        "baseline_transport_ead_usd",
        "biodiversity_benefit",
        "hazard_targeted",
        "planting_option",
        "classes"
    ]
    return df[select_columns]

In [None]:
adm_meta = pandas.read_csv(Path().parent / "boundaries-meta" / "gadm36_ne.csv")

In [None]:
iso_name_lookup = adm_meta.set_index("ISO_A3")

In [None]:
iso_name_lookup.loc["HKG"]

In [None]:
iso_codes = set(ls_all.GID_0.dropna().unique()) | set(mg_all.GID_0.dropna().unique()) | set(rf_all.GID_0.dropna().unique())
len(iso_codes)

In [None]:
hybas_meta = pandas.read_csv("../results/input/hydrobasins/hybas_lev12_v1c_with_gadm_codes_pop.csv").set_index("GID_0")
hybas_meta.head(1)

In [None]:
for iso in tqdm(iso_codes):
    iso_name = iso_name_lookup.loc[iso, "NAME_LONG"]
    dirname = iso_name_lookup.loc[iso, "CONTINENT"]
    df = extract_options(iso, iso_name, mg_all, rf_all, ls_all)
    (working_dir / "for_excel" / dirname).mkdir(exist_ok=True)
    df.to_csv(working_dir / "for_excel" / dirname / f"nbs_hydrobasin_options__{iso}.csv", index=False, float_format="%.6g")

    hybas_pop = hybas_meta[hybas_meta.HYBAS_ID.isin(df.HYBAS_ID.unique())]
    assert len(hybas_pop) == len(df.HYBAS_ID.unique())
    hybas_pop.to_csv(working_dir / "for_excel" / dirname / f"nbs_hydrobasin_population__{iso}.csv", index=False, float_format="%.6g")

## Regional versions

In [None]:
for region_wb, region_meta in adm_meta[adm_meta.GID_0.isin(iso_codes)].groupby("REGION_WB"):
    region_isos = list(region_meta.GID_0.dropna().unique())
    region_slug = region_wb.lower().replace(" ", "-").replace("&", "and")
    dfs = []
    for iso in tqdm(region_isos, desc=region_slug):
        df = extract_options(iso, mg_all, rf_all, ls_all)
        dfs.append(df)
    df = pandas.concat(dfs)
    df.to_csv(working_dir / "for_excel" / f"wb_region_nbs_hydrobasin_options__{region_slug}.csv", index=False)


In [None]:
for region_wb, region_meta in adm_meta[adm_meta.GID_0.isin(iso_codes)].groupby("REGION_WB"):
    region_slug = region_wb.lower().replace(" ", "-").replace("&", "and")
    df = pandas.read_csv(working_dir / "for_excel" / f"wb_region_nbs_hydrobasin_options__{region_slug}.csv")
    print(f"{(int(len(df) / 1000))}k {region_slug}")

In [None]:
for subregion, region_meta in adm_meta[adm_meta.GID_0.isin(iso_codes)].groupby("SUBREGION"):
    region_isos = list(region_meta.GID_0.dropna().unique())
    region_slug = subregion.lower().replace(" ", "-").replace("&", "and")
    dfs = []
    for iso in tqdm(region_isos, desc=region_slug):
        df = extract_options(iso, mg_all, rf_all, ls_all)
        dfs.append(df)
    df = pandas.concat(dfs)
    df.to_csv(working_dir / "for_excel" / f"un_subregion_nbs_hydrobasin_options__{region_slug}.csv", index=False)


In [None]:
for subregion, region_meta in adm_meta[adm_meta.GID_0.isin(iso_codes)].groupby("SUBREGION"):
    region_slug = subregion.lower().replace(" ", "-").replace("&", "and")
    df = pandas.read_csv(working_dir / "for_excel" / f"un_subregion_nbs_hydrobasin_options__{region_slug}.csv")
    print(f"{(int(len(df) / 1000))}k {region_slug}")