In [None]:
import json
from collections import defaultdict
from glob import glob
from pathlib import Path

import geopandas
import pandas

In [None]:
with open("../config.json", "r") as fh:
    config = json.load(fh)
base_path = Path(config["base_path"])
base_path

In [None]:
def list_paths(paths, prefix):
    return sorted(p.replace(str(prefix), "") for p in paths)

In [None]:
feature_damage_paths = [
    p
    for p in glob(
        str(
            base_path
            / "Bangladesh GCA-UNOPS data"
            / "Data"
            / "Output2"
            / "**"
            / "*.csv"
        ),
        recursive=True,
    )
    if "Feature" in p
]
list_paths(
    feature_damage_paths, base_path / "Bangladesh GCA-UNOPS data" / "Data" / "Output2"
)

In [None]:
feature_paths = glob(
    str(base_path / "Bangladesh_GIS_database" / "infrastructure-layers" / "**" / "*"),
    recursive=True,
)
list_paths(
    feature_paths, base_path / "Bangladesh_GIS_database" / "infrastructure-layers"
)

In [None]:
health_sites = next(p for p in feature_paths if "health" in p)
health_sites

In [None]:
features = geopandas.read_file(health_sites, engine="pyogrio")
list(features.columns)

In [None]:
len(features.geometry), len(features.geometry.unique()), features.loc[0, "geometry"]

In [None]:
health_sites_damages = next(p for p in feature_damage_paths if "health" in p)
health_sites_damages

In [None]:
damage = pandas.read_csv(health_sites_damages)
damage.columns

## Polders, restoration sites

In [None]:
DISTANCE_WITHIN_M = (
    5_000  # distance for selection of polder-to-restoration-site relations (in metres)
)

In [None]:
protected_polders = geopandas.read_file(
    base_path / "infrastructure" / "protectedPolders" / "Polders_selected.shp",
    engine="pyogrio",
).to_crs(epsg=9678)

In [None]:
len(protected_polders.polder_ID.unique()), len(protected_polders)

In [None]:
restoration_sites = geopandas.read_file(
    base_path
    / "nature-ecosystems"
    / "Potential Mangrove Restoration Sites"
    / "potential_sites_epsg9678.gpkg",
    engine="pyogrio",
).explode(index_parts=False)

In [None]:
restoration_sites["potential_site_id"] = range(len(restoration_sites))

In [None]:
restoration_sites.tail(2)

In [None]:
restoration_sites_buf = restoration_sites.copy()
restoration_sites_buf.geometry = restoration_sites.buffer(DISTANCE_WITHIN_M)

In [None]:
site_polder_intersection = (
    protected_polders.sjoin(restoration_sites_buf, how="left", predicate="intersects")[
        ["polder_ID", "potential_site_id"]
    ]
    .sort_values(by="polder_ID")
    .reset_index(drop=True)
)

In [None]:
site_polder_intersection

In [None]:
# each site is within DISTANCE_WITHIN_M of a set of one or more polders
# that may be protected if the site is restored
# (site 1 may protect polders A and B, site 2 => polder C)
site_to_polders = defaultdict(set)
# this is all the different sets of polders that are within DISTANCE_WITHIN_M
# of any potential site
# (polders A and B or polder C)
polder_sets = set()
# each set of polders is within DISTANCE_WITHIN_M of a set of sites that
# may protect them
# (polders A and B may be protected by site 1, polder C => site 2)
polder_set_to_sites = defaultdict(set)

lookup = site_polder_intersection.set_index("potential_site_id")
for site in site_polder_intersection.potential_site_id.unique():
    site_polders = lookup.loc[site, "polder_ID"]
    if isinstance(site_polders, str):
        polder_set = frozenset([site_polders])
    else:
        polder_set = frozenset(site_polders)
    site_to_polders[site] = polder_set
    polder_sets.add(polder_set)
    polder_set_to_sites[polder_set].add(site)