In [190]:
import subprocess
from pathlib import Path

import geopandas
import numpy
import orjson as json
import pandas
import pyarrow.parquet as pq
from tqdm.auto import tqdm

In [12]:
etl_path = Path('.').absolute().parent.parent
raw_dir = etl_path / 'raw_data'
opp_dir = etl_path / 'raw_data' / 'nbs-adaptation'
cluster_path = Path('/mnt/linux-filestore/mert2014/projects/open-gira/')

# Extract

In [None]:
def extract_opp(pds_dir):
    pds = pq.ParquetDataset(pds_dir)
    nonempty = []
    schemas = []
    for pf_name in pds.files:
        pf = pq.ParquetFile(pf_name)
        if "HYBAS_ID" in pf.schema.names:
            nonempty.append(pf_name)

        for s in schemas:
            if s.equals(pf.schema):
                break
        else:
            schemas.append(pf.schema)
    print(schemas)
    return nonempty
    # pt = pq.read_table(
    #     nonempty,
    # )
    # df = pt.to_pandas()
    # df.geometry = geopandas.GeoSeries.from_wkb(df.geometry)
    # gdf = geopandas.GeoDataFrame(df).set_crs(epsg=4326)
    # return gdf

In [83]:
read = [
    ("river_basin_afforestation_with_EAD", [
        'hazard-floodMapGL_EAD__rail',
        'hazard-floodMapGL_EAD__road',
        'hazard-inunriver_historical_MAX_1980_EAD__rail',
        'hazard-inunriver_historical_MAX_1980_EAD__road',
        'hazard-inunriver_historical_MEAN_1980_EAD__rail',
        'hazard-inunriver_historical_MEAN_1980_EAD__road',
        'hazard-inunriver_historical_MIN_1980_EAD__rail',
        'hazard-inunriver_historical_MIN_1980_EAD__road',
        'hazard-inunriver_rcp4p5_MAX_2030_EAD__rail',
        'hazard-inunriver_rcp4p5_MAX_2030_EAD__road',
        'hazard-inunriver_rcp4p5_MAX_2050_EAD__rail',
        'hazard-inunriver_rcp4p5_MAX_2050_EAD__road',
        'hazard-inunriver_rcp4p5_MAX_2080_EAD__rail',
        'hazard-inunriver_rcp4p5_MAX_2080_EAD__road',
        'hazard-inunriver_rcp4p5_MEAN_2030_EAD__rail',
        'hazard-inunriver_rcp4p5_MEAN_2030_EAD__road',
        'hazard-inunriver_rcp4p5_MEAN_2050_EAD__rail',
        'hazard-inunriver_rcp4p5_MEAN_2050_EAD__road',
        'hazard-inunriver_rcp4p5_MEAN_2080_EAD__rail',
        'hazard-inunriver_rcp4p5_MEAN_2080_EAD__road',
        'hazard-inunriver_rcp4p5_MIN_2030_EAD__rail',
        'hazard-inunriver_rcp4p5_MIN_2030_EAD__road',
        'hazard-inunriver_rcp4p5_MIN_2050_EAD__rail',
        'hazard-inunriver_rcp4p5_MIN_2050_EAD__road',
        'hazard-inunriver_rcp4p5_MIN_2080_EAD__rail',
        'hazard-inunriver_rcp4p5_MIN_2080_EAD__road',
        'hazard-inunriver_rcp8p5_MAX_2030_EAD__rail',
        'hazard-inunriver_rcp8p5_MAX_2030_EAD__road',
        'hazard-inunriver_rcp8p5_MAX_2050_EAD__rail',
        'hazard-inunriver_rcp8p5_MAX_2050_EAD__road',
        'hazard-inunriver_rcp8p5_MAX_2080_EAD__rail',
        'hazard-inunriver_rcp8p5_MAX_2080_EAD__road',
        'hazard-inunriver_rcp8p5_MEAN_2030_EAD__rail',
        'hazard-inunriver_rcp8p5_MEAN_2030_EAD__road',
        'hazard-inunriver_rcp8p5_MEAN_2050_EAD__rail',
        'hazard-inunriver_rcp8p5_MEAN_2050_EAD__road',
        'hazard-inunriver_rcp8p5_MEAN_2080_EAD__rail',
        'hazard-inunriver_rcp8p5_MEAN_2080_EAD__road',
        'hazard-inunriver_rcp8p5_MIN_2030_EAD__rail',
        'hazard-inunriver_rcp8p5_MIN_2030_EAD__road',
        'hazard-inunriver_rcp8p5_MIN_2050_EAD__rail',
        'hazard-inunriver_rcp8p5_MIN_2050_EAD__road',
        'hazard-inunriver_rcp8p5_MIN_2080_EAD__rail',
        'hazard-inunriver_rcp8p5_MIN_2080_EAD__road',
    ]),
    ("mangrove_with_EAD", [
        'hazard-GFM_MERITDEM1km_2018_EAD__rail',
        'hazard-GFM_MERITDEM1km_2018_EAD__road',
        'hazard-GFM_MERITDEM1km_2050_EAD__rail',
        'hazard-GFM_MERITDEM1km_2050_EAD__road',
        'hazard-GFM_NASADEM1km_2018_EAD__rail',
        'hazard-GFM_NASADEM1km_2018_EAD__road',
        'hazard-GFM_NASADEM1km_2050_EAD__rail',
        'hazard-GFM_NASADEM1km_2050_EAD__road',
        'hazard-inuncoast_historical_MAX_hist_0_EAD__rail',
        'hazard-inuncoast_historical_MAX_hist_0_EAD__road',
        'hazard-inuncoast_historical_MAX_hist_5_EAD__rail',
        'hazard-inuncoast_historical_MAX_hist_5_EAD__road',
        'hazard-inuncoast_historical_MEAN_hist_0_EAD__rail',
        'hazard-inuncoast_historical_MEAN_hist_0_EAD__road',
        'hazard-inuncoast_historical_MEAN_hist_5_EAD__rail',
        'hazard-inuncoast_historical_MEAN_hist_5_EAD__road',
        'hazard-inuncoast_historical_MIN_hist_0_EAD__rail',
        'hazard-inuncoast_historical_MIN_hist_0_EAD__road',
        'hazard-inuncoast_historical_MIN_hist_5_EAD__rail',
        'hazard-inuncoast_historical_MIN_hist_5_EAD__road',
        'hazard-inuncoast_rcp4p5_MAX_2030_0_EAD__rail',
        'hazard-inuncoast_rcp4p5_MAX_2030_0_EAD__road',
        'hazard-inuncoast_rcp4p5_MAX_2030_5_EAD__rail',
        'hazard-inuncoast_rcp4p5_MAX_2030_5_EAD__road',
        'hazard-inuncoast_rcp4p5_MAX_2050_0_EAD__rail',
        'hazard-inuncoast_rcp4p5_MAX_2050_0_EAD__road',
        'hazard-inuncoast_rcp4p5_MAX_2050_5_EAD__rail',
        'hazard-inuncoast_rcp4p5_MAX_2050_5_EAD__road',
        'hazard-inuncoast_rcp4p5_MAX_2080_0_EAD__rail',
        'hazard-inuncoast_rcp4p5_MAX_2080_0_EAD__road',
        'hazard-inuncoast_rcp4p5_MAX_2080_5_EAD__rail',
        'hazard-inuncoast_rcp4p5_MAX_2080_5_EAD__road',
        'hazard-inuncoast_rcp4p5_MEAN_2030_0_EAD__rail',
        'hazard-inuncoast_rcp4p5_MEAN_2030_0_EAD__road',
        'hazard-inuncoast_rcp4p5_MEAN_2030_5_EAD__rail',
        'hazard-inuncoast_rcp4p5_MEAN_2030_5_EAD__road',
        'hazard-inuncoast_rcp4p5_MEAN_2050_0_EAD__rail',
        'hazard-inuncoast_rcp4p5_MEAN_2050_0_EAD__road',
        'hazard-inuncoast_rcp4p5_MEAN_2050_5_EAD__rail',
        'hazard-inuncoast_rcp4p5_MEAN_2050_5_EAD__road',
        'hazard-inuncoast_rcp4p5_MEAN_2080_0_EAD__rail',
        'hazard-inuncoast_rcp4p5_MEAN_2080_0_EAD__road',
        'hazard-inuncoast_rcp4p5_MEAN_2080_5_EAD__rail',
        'hazard-inuncoast_rcp4p5_MEAN_2080_5_EAD__road',
        'hazard-inuncoast_rcp4p5_MIN_2030_0_EAD__rail',
        'hazard-inuncoast_rcp4p5_MIN_2030_0_EAD__road',
        'hazard-inuncoast_rcp4p5_MIN_2030_5_EAD__rail',
        'hazard-inuncoast_rcp4p5_MIN_2030_5_EAD__road',
        'hazard-inuncoast_rcp4p5_MIN_2050_0_EAD__rail',
        'hazard-inuncoast_rcp4p5_MIN_2050_0_EAD__road',
        'hazard-inuncoast_rcp4p5_MIN_2050_5_EAD__rail',
        'hazard-inuncoast_rcp4p5_MIN_2050_5_EAD__road',
        'hazard-inuncoast_rcp4p5_MIN_2080_0_EAD__rail',
        'hazard-inuncoast_rcp4p5_MIN_2080_0_EAD__road',
        'hazard-inuncoast_rcp4p5_MIN_2080_5_EAD__rail',
        'hazard-inuncoast_rcp4p5_MIN_2080_5_EAD__road',
        'hazard-inuncoast_rcp8p5_MAX_2030_0_EAD__rail',
        'hazard-inuncoast_rcp8p5_MAX_2030_0_EAD__road',
        'hazard-inuncoast_rcp8p5_MAX_2030_5_EAD__rail',
        'hazard-inuncoast_rcp8p5_MAX_2030_5_EAD__road',
        'hazard-inuncoast_rcp8p5_MAX_2050_0_EAD__rail',
        'hazard-inuncoast_rcp8p5_MAX_2050_0_EAD__road',
        'hazard-inuncoast_rcp8p5_MAX_2050_5_EAD__rail',
        'hazard-inuncoast_rcp8p5_MAX_2050_5_EAD__road',
        'hazard-inuncoast_rcp8p5_MAX_2080_0_EAD__rail',
        'hazard-inuncoast_rcp8p5_MAX_2080_0_EAD__road',
        'hazard-inuncoast_rcp8p5_MAX_2080_5_EAD__rail',
        'hazard-inuncoast_rcp8p5_MAX_2080_5_EAD__road',
        'hazard-inuncoast_rcp8p5_MEAN_2030_0_EAD__rail',
        'hazard-inuncoast_rcp8p5_MEAN_2030_0_EAD__road',
        'hazard-inuncoast_rcp8p5_MEAN_2030_5_EAD__rail',
        'hazard-inuncoast_rcp8p5_MEAN_2030_5_EAD__road',
        'hazard-inuncoast_rcp8p5_MEAN_2050_0_EAD__rail',
        'hazard-inuncoast_rcp8p5_MEAN_2050_0_EAD__road',
        'hazard-inuncoast_rcp8p5_MEAN_2050_5_EAD__rail',
        'hazard-inuncoast_rcp8p5_MEAN_2050_5_EAD__road',
        'hazard-inuncoast_rcp8p5_MEAN_2080_0_EAD__rail',
        'hazard-inuncoast_rcp8p5_MEAN_2080_0_EAD__road',
        'hazard-inuncoast_rcp8p5_MEAN_2080_5_EAD__rail',
        'hazard-inuncoast_rcp8p5_MEAN_2080_5_EAD__road',
        'hazard-inuncoast_rcp8p5_MIN_2030_0_EAD__rail',
        'hazard-inuncoast_rcp8p5_MIN_2030_0_EAD__road',
        'hazard-inuncoast_rcp8p5_MIN_2030_5_EAD__rail',
        'hazard-inuncoast_rcp8p5_MIN_2030_5_EAD__road',
        'hazard-inuncoast_rcp8p5_MIN_2050_0_EAD__rail',
        'hazard-inuncoast_rcp8p5_MIN_2050_0_EAD__road',
        'hazard-inuncoast_rcp8p5_MIN_2050_5_EAD__rail',
        'hazard-inuncoast_rcp8p5_MIN_2050_5_EAD__road',
        'hazard-inuncoast_rcp8p5_MIN_2080_0_EAD__rail',
        'hazard-inuncoast_rcp8p5_MIN_2080_0_EAD__road',
        'hazard-inuncoast_rcp8p5_MIN_2080_5_EAD__rail',
        'hazard-inuncoast_rcp8p5_MIN_2080_5_EAD__road',
    ]),
    ("landslide_slope_vegetation_with_EAD", [
        'hazard-ls_eq_tiled__rail_damage_fraction_EAD__rail',
        'hazard-ls_eq_tiled__rail_lower_EAD__rail',
        'hazard-ls_eq_tiled__rail_upper_EAD__rail',
        'hazard-LS_RF_Median_1980-2018__rail_damage_fraction_EAD__rail',
        'hazard-LS_RF_Median_1980-2018__rail_lower_EAD__rail',
        'hazard-LS_RF_Median_1980-2018__rail_upper_EAD__rail',
        'hazard-LS_TH__rail_damage_fraction_EAD__rail',
        'hazard-LS_TH__rail_lower_EAD__rail',
        'hazard-LS_TH__rail_upper_EAD__rail',
        'hazard-_landslide_sum__rail_damage_fraction_EAD__rail',
        'hazard-_landslide_sum__rail_lower_EAD__rail',
        'hazard-_landslide_sum__rail_upper_EAD__rail',
        'hazard-ls_eq_tiled__road_damage_fraction_EAD__road',
        'hazard-ls_eq_tiled__road_lower_EAD__road',
        'hazard-ls_eq_tiled__road_upper_EAD__road',
        'hazard-LS_RF_Median_1980-2018__road_damage_fraction_EAD__road',
        'hazard-LS_RF_Median_1980-2018__road_lower_EAD__road',
        'hazard-LS_RF_Median_1980-2018__road_upper_EAD__road',
        'hazard-_landslide_sum__road_damage_fraction_EAD__road',
        'hazard-_landslide_sum__road_lower_EAD__road',
        'hazard-_landslide_sum__road_upper_EAD__road',
    ])
]
for layer, ensure_cols in read:
    print("Processing", layer)
    files = list((cluster_path / 'results/slices/planet-latest_nbs').glob(f"slice-*/{layer}.parquet"))
    dfs = []
    for pf_name in tqdm(files, desc=layer):
        pf = pq.ParquetFile(pf_name)
        if "HYBAS_ID" in pf.schema.names:
            df = geopandas.read_parquet(pf_name)
            missing = set(ensure_cols) - set(df.columns)
            for col in missing:
                df[col] = 0
            dfs.append(df)
    gdf = pandas.concat(dfs)
    gdf.to_parquet(opp_dir / f"{layer}.geoparquet")

Processing river_basin_afforestation_with_EAD


river_basin_afforestation_with_EAD:   0%|          | 0/2025 [00:00<?, ?it/s]

Processing mangrove_with_EAD


mangrove_with_EAD:   0%|          | 0/2024 [00:00<?, ?it/s]

Processing landslide_slope_vegetation_with_EAD


landslide_slope_vegetation_with_EAD:   0%|          | 0/2025 [00:00<?, ?it/s]

# Mangrove

In [84]:
mangrove = geopandas.read_parquet(opp_dir / "mangrove_with_EAD.geoparquet")

In [85]:
list(mangrove.columns)

['geometry',
 'option_shoreline',
 'HYBAS_ID',
 'GID_0',
 'GID_1',
 'GID_2',
 'area_m2',
 'area_ha',
 'biodiversity_benefit',
 'carbon_benefit_t_per_ha',
 'planting_cost_usd_per_ha',
 'regen_cost_usd_per_ha',
 'hazard-inuncoast_historical_MAX_hist_5_EAD__rail',
 'hazard-inuncoast_historical_MAX_hist_0_EAD__rail',
 'hazard-inuncoast_historical_MEAN_hist_5_EAD__rail',
 'hazard-inuncoast_historical_MEAN_hist_0_EAD__rail',
 'hazard-inuncoast_historical_MIN_hist_5_EAD__rail',
 'hazard-inuncoast_historical_MIN_hist_0_EAD__rail',
 'hazard-inuncoast_rcp4p5_MAX_2030_5_EAD__rail',
 'hazard-inuncoast_rcp4p5_MAX_2030_0_EAD__rail',
 'hazard-inuncoast_rcp4p5_MAX_2050_5_EAD__rail',
 'hazard-inuncoast_rcp4p5_MAX_2050_0_EAD__rail',
 'hazard-inuncoast_rcp4p5_MAX_2080_5_EAD__rail',
 'hazard-inuncoast_rcp4p5_MAX_2080_0_EAD__rail',
 'hazard-inuncoast_rcp4p5_MEAN_2030_5_EAD__rail',
 'hazard-inuncoast_rcp4p5_MEAN_2030_0_EAD__rail',
 'hazard-inuncoast_rcp4p5_MEAN_2050_5_EAD__rail',
 'hazard-inuncoast_rcp4p5_M

In [86]:
mangrove.head(2)

Unnamed: 0_level_0,geometry,option_shoreline,HYBAS_ID,GID_0,GID_1,GID_2,area_m2,area_ha,biodiversity_benefit,carbon_benefit_t_per_ha,...,hazard-inuncoast_rcp8p5_MIN_2080_5_EAD__road,hazard-inuncoast_rcp8p5_MIN_2080_0_EAD__road,hazard-GFM_MERITDEM1km_2018_EAD__rail,hazard-GFM_MERITDEM1km_2050_EAD__rail,hazard-GFM_NASADEM1km_2018_EAD__rail,hazard-GFM_NASADEM1km_2050_EAD__rail,hazard-GFM_MERITDEM1km_2018_EAD__road,hazard-GFM_MERITDEM1km_2050_EAD__road,hazard-GFM_NASADEM1km_2018_EAD__road,hazard-GFM_NASADEM1km_2050_EAD__road
feature_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1466_0,"POLYGON ((82.31454 17.05543, 82.31454 17.04918...",retreating,4120027730,IND,IND.2_1,IND.2.3_1,154260.940093,15.426094,0.636911,325.576965,...,0.0,54945.353448,0.0,0.0,0.0,0.0,0.0,0.0,319262.89908,438391.93253
1466_1,"POLYGON ((82.31037 17.04918, 82.31037 17.04293...",retreating,4120027730,IND,IND.2_1,IND.2.3_1,308532.201171,30.85322,0.610973,327.314117,...,0.0,54945.353448,0.0,0.0,0.0,0.0,0.0,0.0,320314.930661,439798.355333


In [87]:
core = [c for c in mangrove.columns if "hazard-" not in c]
m_agg_sector = mangrove[core].copy()
m_to_sum = {
    'hazard-GFM_MERITDEM1km_2018_EAD': ['hazard-GFM_MERITDEM1km_2018_EAD__rail', 'hazard-GFM_MERITDEM1km_2018_EAD__road',],
    'hazard-GFM_MERITDEM1km_2050_EAD': ['hazard-GFM_MERITDEM1km_2050_EAD__rail', 'hazard-GFM_MERITDEM1km_2050_EAD__road',],
    'hazard-GFM_NASADEM1km_2018_EAD': ['hazard-GFM_NASADEM1km_2018_EAD__rail', 'hazard-GFM_NASADEM1km_2018_EAD__road',],
    'hazard-GFM_NASADEM1km_2050_EAD': ['hazard-GFM_NASADEM1km_2050_EAD__rail', 'hazard-GFM_NASADEM1km_2050_EAD__road',],
}
for agg in ("MIN", "MEAN", "MAX"):
    m_to_sum[f'hazard-inuncoast_historical_{agg}_hist_EAD'] = [
        f'hazard-inuncoast_historical_{agg}_hist_0_EAD__rail',
        f'hazard-inuncoast_historical_{agg}_hist_0_EAD__road',
        f'hazard-inuncoast_historical_{agg}_hist_5_EAD__rail',
        f'hazard-inuncoast_historical_{agg}_hist_5_EAD__road',
    ]
for rcp in ("rcp4p5", "rcp8p5"):
    for epoch in ("2030", "2050", "2080"):
        for agg in ("MIN", "MEAN", "MAX"):
            m_to_sum[f'hazard-inuncoast_{rcp}_{agg}_{epoch}_EAD'] = [
                f'hazard-inuncoast_{rcp}_{agg}_{epoch}_0_EAD__rail',
                f'hazard-inuncoast_{rcp}_{agg}_{epoch}_0_EAD__road',
                f'hazard-inuncoast_{rcp}_{agg}_{epoch}_5_EAD__rail',
                f'hazard-inuncoast_{rcp}_{agg}_{epoch}_5_EAD__road',
            ]

for key, keys in m_to_sum.items():
    m_agg_sector[key] = mangrove[keys].sum(axis=1)
m_agg_sector.head()

Unnamed: 0_level_0,geometry,option_shoreline,HYBAS_ID,GID_0,GID_1,GID_2,area_m2,area_ha,biodiversity_benefit,carbon_benefit_t_per_ha,...,hazard-inuncoast_rcp4p5_MAX_2080_EAD,hazard-inuncoast_rcp8p5_MIN_2030_EAD,hazard-inuncoast_rcp8p5_MEAN_2030_EAD,hazard-inuncoast_rcp8p5_MAX_2030_EAD,hazard-inuncoast_rcp8p5_MIN_2050_EAD,hazard-inuncoast_rcp8p5_MEAN_2050_EAD,hazard-inuncoast_rcp8p5_MAX_2050_EAD,hazard-inuncoast_rcp8p5_MIN_2080_EAD,hazard-inuncoast_rcp8p5_MEAN_2080_EAD,hazard-inuncoast_rcp8p5_MAX_2080_EAD
feature_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1466_0,"POLYGON ((82.31454 17.05543, 82.31454 17.04918...",retreating,4120027730,IND,IND.2_1,IND.2.3_1,154260.940093,15.426094,0.636911,325.576965,...,46393.028945,10732.753654,10732.753654,10732.753654,25016.806093,25016.806093,25016.806093,54945.353448,54945.353448,54945.353448
1466_1,"POLYGON ((82.31037 17.04918, 82.31037 17.04293...",retreating,4120027730,IND,IND.2_1,IND.2.3_1,308532.201171,30.85322,0.610973,327.314117,...,46393.028945,10732.753654,10732.753654,10732.753654,25016.806093,25016.806093,25016.806093,54945.353448,54945.353448,54945.353448
1466_2,"POLYGON ((82.30829 17.04709, 82.30829 17.04293...",accreting,4120027730,IND,IND.2_1,IND.2.3_1,205688.317155,20.568832,0.594886,325.576965,...,46393.028945,10732.753654,10732.753654,10732.753654,25016.806093,25016.806093,25016.806093,54945.353448,54945.353448,54945.353448
1466_3,"POLYGON ((82.30204 17.04084, 82.30204 17.03251...",accreting,4120027730,IND,IND.2_1,IND.2.3_1,411396.900648,41.13969,0.610162,325.576965,...,46393.028945,10732.753654,10732.753654,10732.753654,25016.806093,25016.806093,25016.806093,54945.353448,54945.353448,54945.353448
1466_4,"POLYGON ((82.29579 17.03251, 82.29579 17.02626...",accreting,4120027730,IND,IND.2_1,IND.2.3_1,308559.705845,30.855971,0.654705,187.788498,...,47189.31089,10736.183127,10736.183127,10736.183127,25088.36713,25088.36713,25088.36713,56087.037571,56087.037571,56087.037571


In [88]:
# [c for c in agg_sector.columns if "hazard-" in c]
m_to_agg = {
    'avoided_ead_hist_2020': [
        'hazard-GFM_MERITDEM1km_2018_EAD',
        'hazard-GFM_NASADEM1km_2018_EAD',
        'hazard-inuncoast_historical_MIN_hist_EAD',
        'hazard-inuncoast_historical_MEAN_hist_EAD',
        'hazard-inuncoast_historical_MAX_hist_EAD',
    ],
    'avoided_ead_rcp4p5_2050': [
        'hazard-GFM_MERITDEM1km_2050_EAD',
        'hazard-GFM_NASADEM1km_2050_EAD',
        'hazard-inuncoast_rcp4p5_MIN_2050_EAD',
        'hazard-inuncoast_rcp4p5_MEAN_2050_EAD',
        'hazard-inuncoast_rcp4p5_MAX_2050_EAD',
    ],
    'avoided_ead_rcp8p5_2050': [
        'hazard-GFM_MERITDEM1km_2050_EAD',
        'hazard-GFM_NASADEM1km_2050_EAD',
        'hazard-inuncoast_rcp8p5_MIN_2050_EAD',
        'hazard-inuncoast_rcp8p5_MEAN_2050_EAD',
        'hazard-inuncoast_rcp8p5_MAX_2050_EAD',
    ]
}
m_agg_model = mangrove[core].copy()

for key, keys in m_to_agg.items():
    for op in ("min", "mean", "max"):
        m_agg_model[f"{key}_{op}"] = m_agg_sector[keys].agg(op, axis=1)

m_agg_model.columns

Index(['geometry', 'option_shoreline', 'HYBAS_ID', 'GID_0', 'GID_1', 'GID_2',
       'area_m2', 'area_ha', 'biodiversity_benefit', 'carbon_benefit_t_per_ha',
       'planting_cost_usd_per_ha', 'regen_cost_usd_per_ha',
       'avoided_ead_hist_2020_min', 'avoided_ead_hist_2020_mean',
       'avoided_ead_hist_2020_max', 'avoided_ead_rcp4p5_2050_min',
       'avoided_ead_rcp4p5_2050_mean', 'avoided_ead_rcp4p5_2050_max',
       'avoided_ead_rcp8p5_2050_min', 'avoided_ead_rcp8p5_2050_mean',
       'avoided_ead_rcp8p5_2050_max'],
      dtype='object')

In [89]:
mangrove_agg = m_agg_model.dissolve(
    by=["HYBAS_ID", "option_shoreline"],
    aggfunc={
       'GID_0': 'first',
       'GID_1': 'first',
       'GID_2': 'first',
       'area_m2': 'sum',
       'area_ha': 'sum',
       'biodiversity_benefit': 'mean',
       'carbon_benefit_t_per_ha': 'mean',
       'planting_cost_usd_per_ha': 'mean',
       'regen_cost_usd_per_ha': 'mean',
       'avoided_ead_hist_2020_min': 'mean',
       'avoided_ead_hist_2020_mean': 'mean',
       'avoided_ead_hist_2020_max': 'mean',
       'avoided_ead_rcp4p5_2050_min': 'mean',
       'avoided_ead_rcp4p5_2050_mean': 'mean',
       'avoided_ead_rcp4p5_2050_max': 'mean',
       'avoided_ead_rcp8p5_2050_min': 'mean',
       'avoided_ead_rcp8p5_2050_mean': 'mean',
       'avoided_ead_rcp8p5_2050_max': 'mean',
    }
)

In [90]:
mangrove_agg.reset_index().to_parquet(opp_dir / "mangrove_with_EAD_grouped.geoparquet")

In [91]:
# TODO still needs properties sorting?
# mangrove_agg.to_file(opp_dir / "mangrove_with_EAD_grouped.geojson")

# Landslide

In [92]:
landslide = geopandas.read_parquet(opp_dir / "landslide_slope_vegetation_with_EAD.geoparquet")

In [93]:
list(landslide.columns)

['option_landuse',
 'HYBAS_ID',
 'GID_0',
 'GID_1',
 'GID_2',
 'geometry',
 'area_m2',
 'area_ha',
 'biodiversity_benefit',
 'carbon_benefit_t_per_ha',
 'planting_cost_usd_per_ha',
 'regen_cost_usd_per_ha',
 'hazard-ls_eq_tiled__road_damage_fraction_EAD__road',
 'hazard-ls_eq_tiled__road_lower_EAD__road',
 'hazard-ls_eq_tiled__road_upper_EAD__road',
 'hazard-LS_RF_Median_1980-2018__road_damage_fraction_EAD__road',
 'hazard-LS_RF_Median_1980-2018__road_lower_EAD__road',
 'hazard-LS_RF_Median_1980-2018__road_upper_EAD__road',
 'hazard-_landslide_sum__road_damage_fraction_EAD__road',
 'hazard-_landslide_sum__road_lower_EAD__road',
 'hazard-_landslide_sum__road_upper_EAD__road',
 'hazard-LS_RF_Median_1980-2018__rail_damage_fraction_EAD__rail',
 'hazard-_landslide_sum__rail_upper_EAD__rail',
 'hazard-ls_eq_tiled__rail_damage_fraction_EAD__rail',
 'hazard-ls_eq_tiled__rail_upper_EAD__rail',
 'hazard-LS_RF_Median_1980-2018__rail_lower_EAD__rail',
 'hazard-_landslide_sum__rail_damage_fraction_

In [94]:
l_core = [c for c in landslide.columns if "hazard-" not in c]
l_agg_model = landslide[l_core].copy()
l_to_sum = {
    'avoided_ead_hist_2020_min': [
        'hazard-_landslide_sum__rail_damage_fraction_EAD__rail',
        'hazard-_landslide_sum__road_damage_fraction_EAD__road',
    ],
    'avoided_ead_hist_2020_mean': [
        'hazard-_landslide_sum__rail_lower_EAD__rail',
        'hazard-_landslide_sum__road_lower_EAD__road',
    ],
    'avoided_ead_hist_2020_max': [
        'hazard-_landslide_sum__rail_upper_EAD__rail',
        'hazard-_landslide_sum__road_upper_EAD__road',
    ]
}
for key, keys in l_to_sum.items():
    l_agg_model[key] = landslide[keys].sum(axis=1)

l_agg_model.columns

Index(['option_landuse', 'HYBAS_ID', 'GID_0', 'GID_1', 'GID_2', 'geometry',
       'area_m2', 'area_ha', 'biodiversity_benefit', 'carbon_benefit_t_per_ha',
       'planting_cost_usd_per_ha', 'regen_cost_usd_per_ha',
       'avoided_ead_hist_2020_min', 'avoided_ead_hist_2020_mean',
       'avoided_ead_hist_2020_max'],
      dtype='object')

In [None]:
l_agg = l_agg_model.dissolve(
    by=["HYBAS_ID", "option_landuse"],
    aggfunc={
        'GID_0': 'first',
        'GID_1': 'first',
        'GID_2': 'first',
        'area_m2': 'sum',
        'area_ha': 'sum',
        'biodiversity_benefit': 'mean',
        'carbon_benefit_t_per_ha': 'mean',
        'planting_cost_usd_per_ha': 'mean',
        'regen_cost_usd_per_ha': 'mean',
        'avoided_ead_hist_2020_min': 'mean',
        'avoided_ead_hist_2020_mean': 'mean',
        'avoided_ead_hist_2020_max': 'mean',
    }
)

In [96]:
l_agg.reset_index().to_parquet(opp_dir / "landslide_slope_vegetation_with_EAD_grouped.geoparquet")

In [None]:
l_share = l_agg.copy()
l_share = l_share[l_share.avoided_ead_hist_2020_max > 0.99]
l_share.reset_index().to_file(opp_dir / "landslide_slope_vegetation_with_EAD_grouped_gt0.gpkg", driver="GPKG", engine="pyogrio")
l_share.head()

In [151]:
def columns_to_json(df):
    records = df.to_dict(orient='records')
    return [json.dumps(d).decode() for d in records]

In [None]:
l_csv = l_share.reset_index()[['geometry']].copy()
l_props = columns_to_json(l_share.reset_index()[[
    'HYBAS_ID',
    'GID_0',
    'GID_1',
    'GID_2',
    'option_landuse',
    'area_m2',
    'area_ha',
    'biodiversity_benefit',
    'carbon_benefit_t_per_ha',
    'planting_cost_usd_per_ha',
    'regen_cost_usd_per_ha',
]])

# id,string_id,layer,properties,geom
l_csv['properties'] = l_props
l_csv['layer'] = 'nbs_ls'
l_csv['geom'] = l_csv.geometry.to_wkt()
l_csv.drop(columns='geometry')
l_csv['id'] = numpy.arange(len(l_csv)) + 70_000_000
l_csv['string_id'] = "nbs_ls_" + l_share.reset_index().HYBAS_ID.astype(str)
l_csv = l_csv[["id","string_id","layer","properties","geom"]]
l_csv.head()

Unnamed: 0,id,string_id,layer,properties,geom
0,70000000,nbs_ls_1120009000,nbs_ls,"{""HYBAS_ID"":1120009000,""GID_0"":""TZA"",""GID_1"":""...","POLYGON ((38.989548 -5.411235, 38.989548 -5.41..."
1,70000001,nbs_ls_1120009010,nbs_ls,"{""HYBAS_ID"":1120009010,""GID_0"":""TZA"",""GID_1"":""...","MULTIPOLYGON (((38.960382 -5.440402, 38.958298..."
2,70000002,nbs_ls_1120009020,nbs_ls,"{""HYBAS_ID"":1120009020,""GID_0"":""TZA"",""GID_1"":""...","POLYGON ((38.981215 -5.434152, 38.981215 -5.43..."
3,70000003,nbs_ls_1120010690,nbs_ls,"{""HYBAS_ID"":1120010690,""GID_0"":""MOZ"",""GID_1"":""...","POLYGON ((40.349965 -13.99665, 40.348496 -13.9..."
4,70000004,nbs_ls_1120010730,nbs_ls,"{""HYBAS_ID"":1120010730,""GID_0"":""MOZ"",""GID_1"":""...","MULTIPOLYGON (((40.447881 -14.232067, 40.44371..."


In [182]:
l_csv.to_csv(opp_dir / "ls_features.csv", index=False)

In [203]:
# feature_id,hazard,properties,rcp,adaptation_name,adaptation_protection_level
l_abc_csv = l_csv[[
    'id',
]].copy().rename(columns={'id': 'feature_id'})

l_abc_csv['avoided_ead_hist_2020_min'] = l_share.reset_index().avoided_ead_hist_2020_min
l_abc_csv['avoided_ead_hist_2020_mean'] = l_share.reset_index().avoided_ead_hist_2020_mean
l_abc_csv['avoided_ead_hist_2020_max'] = l_share.reset_index().avoided_ead_hist_2020_max

l_abc_csv['slope_vegetation:native_planting'] = l_share.planting_cost_usd_per_ha.array * l_share.area_ha.array
l_abc_csv['slope_vegetation:natural_regeneration'] = l_share.regen_cost_usd_per_ha.array * l_share.area_ha.array

l_abc_csv = l_abc_csv.melt(
    id_vars=[
        'feature_id',
        'avoided_ead_hist_2020_min',
        'avoided_ead_hist_2020_mean',
        'avoided_ead_hist_2020_max',
    ],
    var_name='adaptation_name',
    value_name='adaptation_cost'
)

l_abc_csv['hazard'] = 'ls'
l_abc_csv['properties'] = columns_to_json(l_abc_csv[[
    'avoided_ead_hist_2020_min',
    'avoided_ead_hist_2020_mean',
    'avoided_ead_hist_2020_max',
    'adaptation_cost',
]].rename(columns={
    'avoided_ead_hist_2020_min': 'avoided_ead_amin',
    'avoided_ead_hist_2020_mean': 'avoided_ead_mean',
    'avoided_ead_hist_2020_max': 'avoided_ead_amax',
}))
l_abc_csv.drop(columns=[
    'avoided_ead_hist_2020_min',
    'avoided_ead_hist_2020_mean',
    'avoided_ead_hist_2020_max',
    'adaptation_cost',
], inplace=True)
l_abc_csv['rcp'] = 'baseline'
l_abc_csv['adaptation_protection_level'] = 1
l_abc_csv = l_abc_csv[['feature_id','hazard','rcp','adaptation_name','adaptation_protection_level','properties']].sort_values(by='feature_id')
l_abc_csv.head()

Unnamed: 0,feature_id,hazard,rcp,adaptation_name,adaptation_protection_level,properties
0,70000000,ls,baseline,slope_vegetation:native_planting,1,"{""avoided_ead_amin"":580.2592330599725,""avoided..."
211890,70000000,ls,baseline,slope_vegetation:natural_regeneration,1,"{""avoided_ead_amin"":580.2592330599725,""avoided..."
211891,70000001,ls,baseline,slope_vegetation:natural_regeneration,1,"{""avoided_ead_amin"":1160.518466119945,""avoided..."
1,70000001,ls,baseline,slope_vegetation:native_planting,1,"{""avoided_ead_amin"":1160.518466119945,""avoided..."
211892,70000002,ls,baseline,slope_vegetation:natural_regeneration,1,"{""avoided_ead_amin"":1160.518466119945,""avoided..."


In [204]:
l_abc_csv.to_csv(opp_dir / "ls_adaptation_cost_benefit.csv", index=False)

In [205]:
l_gj = l_share.reset_index()[[
    'HYBAS_ID',
    'option_landuse',
    'avoided_ead_hist_2020_mean',
    'regen_cost_usd_per_ha',
    'geometry'
]].copy().rename(columns={'avoided_ead_hist_2020_mean': 'avoided_ead_mean', 'regen_cost_usd_per_ha': 'adaptation_cost'})
l_gj['id'] = l_csv.id
l_gj.head()

Unnamed: 0,HYBAS_ID,option_landuse,avoided_ead_mean,adaptation_cost,geometry,id
0,1120009000,other,290.129617,81.0,"POLYGON ((38.98955 -5.41124, 38.98955 -5.41332...",70000000
1,1120009010,other,580.259233,81.0,"MULTIPOLYGON (((38.96038 -5.44040, 38.95830 -5...",70000001
2,1120009020,other,580.259233,81.0,"POLYGON ((38.98121 -5.43415, 38.98121 -5.43832...",70000002
3,1120010690,other,1.476538,57.0,"POLYGON ((40.34996 -13.99665, 40.34850 -13.996...",70000003
4,1120010730,crops,1.476538,57.0,"MULTIPOLYGON (((40.44788 -14.23207, 40.44371 -...",70000004


In [206]:
l_gj.to_file(opp_dir / "nbs_ls.json", driver="GeoJSONSeq", engine="pyogrio")

In [207]:
l_gjp = l_gj.copy()
l_gjp.geometry = l_gj.geometry.centroid
l_gjp.to_file(opp_dir / "nbs_ls_points.json", driver="GeoJSONSeq", engine="pyogrio")


  l_gjp.geometry = l_gj.geometry.centroid


In [209]:
for suffix in ["", "_points"]:
    subprocess.run([
        "tippecanoe",
        "--read-parallel",
        "--force",
        "--drop-densest-as-needed",
        "--use-attribute-for-id=id",
        "-zg",
        "-o",
        f"{etl_path}/../tileserver/vector/data/nbs_ls{suffix}.mbtiles",
        f"{opp_dir}/nbs_ls{suffix}.json"
    ])

For layer 0, using name "nbs_ls"
211890 features, 231373002 bytes of geometry and attributes, 5935298 bytes of string pool, 0 bytes of vertices, 0 bytes of nodes
Choosing a maxzoom of -z4 for features typically 28849 feet (8793 meters) apart, and at least 5960 feet (1817 meters) apart
Choosing a maxzoom of -z8 for resolution of about 1440 feet (438 meters) within features
tile 1/1/0 size is 506157 (probably really 506157) with detail 12, >500000    
Going to try keeping the sparsest 79.03% of the features to make it fit
tile 2/2/1 size is 992467 (probably really 992467) with detail 12, >500000    
Going to try keeping the sparsest 40.30% of the features to make it fit
tile 2/2/1 size is 535691 (probably really 535691) with detail 12, >500000    
Going to try keeping the sparsest 30.09% of the features to make it fit
tile 3/5/3 size is 515813 (probably really 515813) with detail 12, >500000    
Going to try keeping the sparsest 77.55% of the features to make it fit
tile 3/6/3 size is 55

# River

In [98]:
river = geopandas.read_parquet(opp_dir / "river_basin_afforestation_with_EAD.geoparquet")

In [99]:
river.columns

Index(['GID_0', 'GID_1', 'GID_2', 'geometry', 'area_m2', 'area_ha',
       'biodiversity_benefit', 'carbon_benefit_t_per_ha',
       'planting_cost_usd_per_ha', 'regen_cost_usd_per_ha',
       'hazard-inunriver_historical_MAX_1980_EAD__road',
       'hazard-inunriver_historical_MEAN_1980_EAD__road',
       'hazard-inunriver_historical_MIN_1980_EAD__road',
       'hazard-inunriver_rcp4p5_MAX_2030_EAD__road',
       'hazard-inunriver_rcp4p5_MAX_2050_EAD__road',
       'hazard-inunriver_rcp4p5_MAX_2080_EAD__road',
       'hazard-inunriver_rcp4p5_MEAN_2030_EAD__road',
       'hazard-inunriver_rcp4p5_MEAN_2050_EAD__road',
       'hazard-inunriver_rcp4p5_MEAN_2080_EAD__road',
       'hazard-inunriver_rcp4p5_MIN_2030_EAD__road',
       'hazard-inunriver_rcp4p5_MIN_2050_EAD__road',
       'hazard-inunriver_rcp4p5_MIN_2080_EAD__road',
       'hazard-inunriver_rcp8p5_MAX_2030_EAD__road',
       'hazard-inunriver_rcp8p5_MAX_2050_EAD__road',
       'hazard-inunriver_rcp8p5_MAX_2080_EAD__road',
   

In [None]:
r_core = [c for c in river.columns if "hazard-" not in c]
r_agg_sector = river[core].copy()
r_to_sum = {
    'hazard-floodMapGL_EAD': ['hazard-floodMapGL_EAD__road', 'hazard-floodMapGL_EAD__rail',],
}
for agg in ("MIN", "MEAN", "MAX"):
    r_to_sum[f'hazard-inunriver_historical_{agg}_hist_EAD'] = [
        f'hazard-inunriver_historical_{agg}_hist_EAD__road',
        f'hazard-inunriver_historical_{agg}_hist_EAD__rail',
    ]
for rcp in ("rcp4p5", "rcp8p5"):
    for epoch in ("2030", "2050", "2080"):
        for agg in ("MIN", "MEAN", "MAX"):
            r_to_sum[f'hazard-inunriver_{rcp}_{agg}_{epoch}_EAD'] = [
                f'hazard-inunriver_{rcp}_{agg}_{epoch}_EAD__rail',
                f'hazard-inunriver_{rcp}_{agg}_{epoch}_EAD__road',
            ]

for key, keys in r_to_sum.items():
    r_agg_sector[key] = mangrove[keys].sum(axis=1)
r_agg_sector.head()

In [None]:
r_to_agg = {
    'avoided_ead_hist_2020': [
       'hazard-inunriver_historical_MAX_1980_EAD',
       'hazard-inunriver_historical_MEAN_1980_EAD',
       'hazard-inunriver_historical_MIN_1980_EAD',
       'hazard-floodMapGL_EAD',
    ],
    'avoided_ead_rcp4p5_2050': [
        'hazard-inunriver_rcp4p5_MIN_2050_EAD',
        'hazard-inunriver_rcp4p5_MEAN_2050_EAD',
        'hazard-inunriver_rcp4p5_MAX_2050_EAD',
    ],
    'avoided_ead_rcp8p5_2050': [
        'hazard-inunriver_rcp8p5_MIN_2050_EAD',
        'hazard-inunriver_rcp8p5_MEAN_2050_EAD',
        'hazard-inunriver_rcp8p5_MAX_2050_EAD',
    ]
}
r_agg_model = river[r_core].copy()

for key, keys in r_to_agg.items():
    for op in ("min", "mean", "max"):
        r_agg_model[f"{key}_{op}"] = r_agg_sector[keys].agg(op, axis=1)

r_agg_model.columns

Index(['GID_0', 'GID_1', 'GID_2', 'geometry', 'area_m2', 'area_ha',
       'biodiversity_benefit', 'carbon_benefit_t_per_ha',
       'planting_cost_usd_per_ha', 'regen_cost_usd_per_ha',
       'avoided_ead_hist_2020_min', 'avoided_ead_hist_2020_mean',
       'avoided_ead_hist_2020_max', 'avoided_ead_rcp4p5_2050_min',
       'avoided_ead_rcp4p5_2050_mean', 'avoided_ead_rcp4p5_2050_max',
       'avoided_ead_rcp8p5_2050_min', 'avoided_ead_rcp8p5_2050_mean',
       'avoided_ead_rcp8p5_2050_max'],
      dtype='object')

In [78]:
r_agg_model.shape

(516261, 19)

In [79]:
r_agg_model.to_parquet(opp_dir / "river_basin_afforestation_with_EAD_grouped.geoparquet")