In [None]:
from pathlib import Path

import geopandas
import numpy
import pandas
from tqdm.auto import tqdm

In [None]:
working_dir = Path("/home/mert2014/projects/bhutan-demo")
results_dir = Path("/home/mert2014/projects/infra-risk-vis/global/etl/raw_data/nbs-adaptation/")

In [None]:
adm_meta = pandas.read_csv(Path().parent / "boundaries-meta" / "gadm36_ne.csv")

In [None]:
list((Path().absolute().parent / "results" / "input" / "hydrobasins").glob("*csv"))

In [None]:
hybas_meta = pandas.read_csv(Path().absolute().parent / "results" / "input" / "hydrobasins" / "hybas_lev12_v1c_with_gadm_codes_pop.csv").set_index("GID_0")
hybas_meta.head(1)

In [None]:
iso_name_lookup = adm_meta.set_index("ISO_A3")

In [None]:
iso_name_lookup.loc["HKG"]

In [None]:
# input data is available as GPKG files
# created by the ETL processing script at
# https://github.com/nismod/infra-risk-vis/blob/2cf1dcf6713ba1d71944dd9efb6a68d9bf4d6a29/etl/pipelines/nbs-adaptation/extract-nbs-adaptation-opportunities.ipynb
# dataset published at
# Harwood, T., & Russell, T. (2025). Global opportunity areas for nature-based solutions to reduce risks to infrastructure [Data set].
# Zenodo. https://doi.org/10.5281/zenodo.15001764

ls_all = geopandas.read_file(results_dir / "landslide_slope_vegetation_with_EAD_grouped_gt0.gpkg", engine="pyogrio")
mg_all = geopandas.read_file(results_dir / "mangrove_with_EAD_grouped_gt0.gpkg", engine="pyogrio")
rf_all = geopandas.read_file(results_dir / "river_basin_afforestation_with_EAD_grouped_gt0.gpkg", engine="pyogrio").reset_index()

In [None]:
iso_codes = set(ls_all.GID_0.dropna().unique()) | set(mg_all.GID_0.dropna().unique()) | set(rf_all.GID_0.dropna().unique())
len(iso_codes)

In [None]:
rf_all.columns, rf_all.index.name

In [None]:
ls_all.columns, ls_all.index.name

In [None]:
mg_all.columns, mg_all.index.name

In [None]:
def extract_options(iso, country_name, mg_all, rf_all, ls_all, include_geometry=False, ead_min=0.99):
    dfs = []
    config = [
        (mg_all, "coastal-flooding", "mangroves"),
        (rf_all, "river-flooding", "basin_scale_tree_planting"),
        (ls_all, "landslide", "slope_vegetation"),
    ]
    for df_all, hazard, planting_type in config:
        if iso is not None:
            df = df_all.query(f"GID_0 == '{iso}'").copy()
        else:
            df = df_all

        if not include_geometry:
            df = df.drop(columns="geometry")

        df["country"] = country_name
        df["hazard_targeted"] = hazard
        df["planting_option"] = planting_type
        df.rename(columns={
            colname: colname.replace("avoided_ead", "baseline_transport_ead_usd")
            for colname in df.columns
            if "avoided_ead" in colname
        }, inplace=True)
        # combine landuse (for landslide/slope veg) and shoreline (for coastal flooding/mangrove) categories
        df.rename(columns={
            "option_landuse": "classes",
            "option_shoreline": "classes",
        }, inplace=True)
        df["baseline_transport_ead_usd"] = df["baseline_transport_ead_usd_hist_2020_mean"].copy()
        dfs.append(df)

    df = pandas.concat(dfs)
    df.fillna({"classes": ""}, inplace=True)

    # filter on non-zero expected annual damage
    df = df.query(f"baseline_transport_ead_usd > {ead_min}").copy()

    # add local ID
    df["opportunity_area_id"] = numpy.arange(len(df)) + 1

    # select columns in order for Excel
    # note that we only select the baseline/historical/mean EAD value - there are more scenarios available in the
    # data, but the Excel tool expects only a single estimate in its current state
    select_columns = [
        "opportunity_area_id",
        "HYBAS_ID",
        "GID_0",
        "country",
        "GID_1",
        "area_m2",
        "area_ha",
        "planting_cost_usd_per_ha",
        "regen_cost_usd_per_ha",
        "carbon_benefit_t_per_ha",
        "baseline_transport_ead_usd",
        "biodiversity_benefit",
        "hazard_targeted",
        "planting_option",
        "classes"
    ]
    if include_geometry:
        select_columns.append("geometry")
    return df[select_columns]

In [None]:
for iso in tqdm(iso_codes):
    iso_name = iso_name_lookup.loc[iso, "NAME_LONG"]
    dirname = iso_name_lookup.loc[iso, "CONTINENT"]
    df = extract_options(iso, iso_name, mg_all, rf_all, ls_all)
    (working_dir / "for_excel" / dirname).mkdir(exist_ok=True)
    df.to_csv(working_dir / "for_excel" / dirname / f"nbs_hydrobasin_options__{iso}.csv", index=False, float_format="%.6g")

    hybas_pop = hybas_meta[hybas_meta.HYBAS_ID.isin(df.HYBAS_ID.unique())]
    assert len(hybas_pop) == len(df.HYBAS_ID.unique())
    hybas_pop.to_csv(working_dir / "for_excel" / dirname / f"nbs_hydrobasin_population__{iso}.csv", index=False, float_format="%.6g")

In [None]:
kenya = pandas.read_csv(working_dir / "for_excel" / "Africa" / f"nbs_hydrobasin_options__KEN.csv")

In [None]:
kenya[['planting_option', 'area_ha']].groupby('planting_option').agg(['sum','count']).round()

## Regional versions

In [None]:
adm_meta

In [None]:
for region_wb, region_meta in adm_meta[adm_meta.ISO_A3.isin(iso_codes)].groupby("REGION_WB"):
    region_isos = list(region_meta.ISO_A3.dropna().unique())
    region_slug = region_wb.lower().replace(" ", "-").replace("&", "and")
    dfs = []
    for iso in tqdm(region_isos, desc=region_slug):
        df = extract_options(iso, "", mg_all, rf_all, ls_all)
        dfs.append(df)
    df = pandas.concat(dfs)
    df.to_csv(working_dir / "for_excel" / f"wb_region_nbs_hydrobasin_options__{region_slug}.csv", index=False)


In [None]:
for region_wb, region_meta in adm_meta[adm_meta.ISO_A3.isin(iso_codes)].groupby("REGION_WB"):
    region_slug = region_wb.lower().replace(" ", "-").replace("&", "and")
    df = pandas.read_csv(working_dir / "for_excel" / f"wb_region_nbs_hydrobasin_options__{region_slug}.csv")
    print(f"{(int(len(df) / 1000))}k {region_slug}")

In [None]:
for subregion, region_meta in adm_meta[adm_meta.ISO_A3.isin(iso_codes)].groupby("SUBREGION"):
    region_isos = list(region_meta.ISO_A3.dropna().unique())
    region_slug = subregion.lower().replace(" ", "-").replace("&", "and")
    dfs = []
    for iso in tqdm(region_isos, desc=region_slug):
        df = extract_options(iso,"", mg_all, rf_all, ls_all)
        dfs.append(df)
    df = pandas.concat(dfs)
    df.to_csv(working_dir / "for_excel" / f"un_subregion_nbs_hydrobasin_options__{region_slug}.csv", index=False)


In [None]:
for subregion, region_meta in adm_meta[adm_meta.ISO_A3.isin(iso_codes)].groupby("SUBREGION"):
    region_slug = subregion.lower().replace(" ", "-").replace("&", "and")
    df = pandas.read_csv(working_dir / "for_excel" / f"un_subregion_nbs_hydrobasin_options__{region_slug}.csv")
    print(f"{(int(len(df) / 1000))}k {region_slug}")

In [None]:
africa_adm_meta = adm_meta.loc[adm_meta.SUBREGION.str.contains("Africa"), ["ISO_A3", "COUNTRY"]]
africa_adm_meta.head()

In [None]:
all_options = extract_options(None, "country_name", mg_all, rf_all, ls_all, include_geometry=True, ead_min=-0.1).query('area_ha > 6.25')

In [None]:
africa_options = all_options[all_options.GID_0.isin(set(africa_adm_meta.ISO_A3.unique()))].copy()
africa_options.classes = africa_options.classes.fillna('')

In [None]:
africa_options.iloc[0]

In [None]:
type(africa_options), africa_options.columns, africa_options.loc[0, "geometry"]

In [None]:
africa_options.baseline_transport_ead_usd.describe()

In [None]:
reduction_factors = pandas.read_csv(Path().absolute().parent /"config" / "nbs_reduction_factors.csv").fillna('')
reduction_factors

In [None]:
africa_options = africa_options.merge(reduction_factors, on=['planting_option', 'classes'], how='left', validate='many_to_one')
africa_options['avoided_damages_usd'] = africa_options.baseline_transport_ead_usd * africa_options.factor
africa_options['cost_usd'] = africa_options.planting_cost_usd_per_ha * africa_options.area_ha
africa_options['bcr'] = africa_options.avoided_damages_usd / africa_options.cost_usd

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(8, 6))
plt.scatter(africa_options.avoided_damages_usd, africa_options.bcr, alpha=0.3, s=2)
plt.xscale('log')
plt.yscale('log')
plt.xlabel('Avoided Damages (USD, log scale)')
plt.ylabel('Benefit-Cost Ratio (BCR, log scale)')
plt.title('BCR vs Avoided Damages (log-log scale)')
plt.grid(True, which="both", ls="--", linewidth=0.5)
plt.show()

In [None]:
outliers = africa_options.query('bcr > 100').copy()
outliers.geometry = outliers.geometry.centroid
outliers.head()

In [None]:
by_hb = []
for hb, group in africa_options.groupby(['HYBAS_ID', 'planting_option']):
    by_hb.append(group.sort_values(by='bcr', ascending=False).head(1))
africa_options_maxhb = pandas.concat(by_hb)
africa_options_maxhb

In [None]:
tops = []
for (iso, planting_option), group in africa_options_maxhb.groupby(['GID_0', 'planting_option']):
    tops.append(group.sort_values(by='bcr', ascending=False).head(20))

selection = pandas.concat(tops)
selection

In [None]:
selection.to_file(working_dir / "selection_6.25ha_top20-per-class.gpkg")

In [None]:
selection_points = selection.copy()
selection_points['geometry'] = selection_points.geometry.centroid
selection_points.to_file(working_dir / "selection_6.25ha_top20-per-class_points.gpkg")

In [None]:
africa_options.to_file(working_dir / "africa_options.gpkg")

In [None]:
africa_options_points = africa_options.copy()
africa_options_points['geometry'] = africa_options_points.geometry.centroid
africa_options_points.to_file(working_dir / "africa_options_points.gpkg")