In [None]:
import geopandas
import pandas
from pyproj import Geod

In [None]:
hazard = "heat"
extract_regex = "epoch:(?P<epoch>[^|]*)\|hazard:(?P<hazard>[^|]*)\|quantile:(?P<quantile>[^|]*)\|ssp:(?P<ssp>[^|]*)"
extract_colnames = ["epoch", "quantile", "ssp"]
threshold = 1

In [None]:
geod = Geod(ellps="WGS84")

In [None]:
wide_df = geopandas.read_parquet(f"exposure/Supply network_link.{hazard}.parquet")
wide_df["length_m"] = wide_df.geometry.apply(geod.geometry_length).astype("int")

In [None]:
datacols = [c for c in wide_df.columns if ":" in c]
keepcols = [
    "NO",
    "FROMNODENO",
    "TONODENO",
    "split",
    "length_m",
]
melted = wide_df.melt(id_vars=keepcols, value_vars=datacols)
hazard_params = melted.variable.str.extract(extract_regex)

long_df = (
    pandas.concat([melted, hazard_params], axis=1)
    .drop("variable", axis=1)
    .query("value > 0")
)

In [None]:
indexcols = keepcols + extract_colnames
long_neat = long_df.pivot(
    index=indexcols,
    columns="hazard",
    values="value",
).fillna(0)

In [None]:
long_neat.to_csv(f"exposure/Supply network_link.{hazard}.csv")

# Summarise

In [None]:
lookup = wide_df[['NO', 'geometry']].drop_duplicates(subset='NO')

In [None]:
adm1 = geopandas.read_file("gadm.gpkg")

In [None]:
lookup_adm1 = geopandas.sjoin(lookup, adm1, how="left", predicate="intersects")[
    ["GID_0", "NAME_0", "GID_1", "NAME_1", "NO"]
]
lookup_adm1.head()

In [None]:
lookup_adm1.set_index("NO", inplace=True)

In [None]:
above_threshold_df = long_df.query(f"value > {threshold}")

In [None]:
above_threshold_df = above_threshold_df.set_index("NO").join(lookup_adm1, how="left")

In [None]:
above_threshold_df.head()

In [None]:
indexcols_summary = [
    "NO",
    "FROMNODENO",
    "TONODENO",
    "split",
    "GID_0",
    "NAME_0",
    "GID_1",
    "NAME_1",
] + extract_colnames

above_threshold_df_neat = above_threshold_df.reset_index().pivot(
    index=indexcols_summary,
    columns="hazard",
    values="length_m",
).fillna(0)
above_threshold_df_neat.head()

In [None]:
groupby_cols = [
    "GID_0",
    "NAME_0",
    "GID_1",
    "NAME_1",
] + extract_colnames
summary = (
    above_threshold_df_neat
    .reset_index()
    .drop(columns=["NO", "FROMNODENO", "TONODENO", "split"])
    .groupby(groupby_cols)
    .sum()
)
summary

In [None]:
summary.to_csv(f"exposure/admin1.{hazard}.csv")

In [None]:
summary_wide = (
    summary
    .reset_index()
    .pivot(
        index=[
            "GID_0",
            "NAME_0",
            "GID_1",
            "NAME_1",
        ],
        columns=extract_colnames,
    )
    .reset_index()
    .set_index("GID_1")
)
summary_wide

In [None]:
newcols = []
for c in summary_wide.columns:
    parts = [p for p in c if p != '']
    newcols.append("_".join(parts))
summary_wide.columns = newcols

In [None]:
summary_gdf = adm1.set_index("GID_1").join(summary_wide.drop(columns=['GID_0', 'NAME_0', 'NAME_1'])).fillna(0)
summary_gdf.head()

In [None]:
summary_gdf.to_file(f"exposure/admin1.{hazard}.gpkg")