# Workflow #6

Comparative assessment: Network simplification

**Please note:**
For this comparison you need to re-compute centrality with network simplification deactivated. Please store these result files in a subdirectory `origNetNoSimplify` within the `data`-directory.

In [None]:
import algo.helper as h
import os.path
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt

# settings
aoi_names = ["at_wien", "at_zs", "at_ib", "at_no", "at_zw", "at_graz_15"]

recalc = True
generate_plots = True
plot_dir = os.path.join("plots", "centr_subsampling")

file_suffix = ""

dir_data = "data"

mode = "bike_incwalk"
# tolerable access is determined by input network: all segments that have an index value assigned 
# (other than NULL, > 0) but have mode access set to False
access = "bicycle" 

# plot settings
P_SIZE_S = (4,3)
# colors: distance variants
P_C_D2 = "#2C8DBC"
P_C_D4 = "#056390"
P_C_D7 = "#003650"
# colors: min, mean, max
P_C_MIN = "#00A246"
P_C_MEAN = "#270D9D"
P_C_MAX = "#C60049"

# generated params
f_network_cent_simpnet = os.path.join(dir_data, f"r_<aoi_name>_edges.gpkg")
f_network_cent_fullnet = os.path.join(dir_data, "origNetNoSimplify", f"r_<aoi_name>_edges.gpkg")
f_compare_stats_out = os.path.join(dir_data, f"r_c_simp_stats.csv")

In [None]:
def run_comparison(aoi, centr_summary, centr_cdf, plot_types=["sbc"], plot_nws=[300, 600, 1500]):
    fn_full = f_network_cent_fullnet.replace("<aoi_name>", aoi)
    fn_simp = f_network_cent_simpnet.replace("<aoi_name>", aoi)
    if not os.path.exists(fn_full) or not os.path.exists(fn_simp):
        print(f"ERR: required centrality result files for aoi '{aoi}' not found.")
        return
    dir_detail_plot = os.path.join("plots", aoi)
    aoi_core_extent = h.get_aoi_extent(aoi)
    # read file (clipped to bounding box of core extent - needs to be clipped to exact extent after import)
    centr_df_fullnet = gpd.read_file(fn_full, bbox=aoi_core_extent.iloc[0].geometry.bounds, engine='pyogrio') # pot. speedup: use arrow
    if "index" in centr_df_fullnet.columns:
        centr_df_fullnet.drop(columns=["index"], inplace=True)
    centr_df_fullnet.set_index("edge_id", inplace=True, drop=False)
    centr_df_fullnet.index.rename("index", inplace=True)
    centr_df_fullnet["length"] = centr_df_fullnet.length
    print("loaded input gdf. for full network", len(centr_df_fullnet), "edges")
    centr_df_simpnet = gpd.read_file(fn_simp, bbox=aoi_core_extent.iloc[0].geometry.bounds, engine='pyogrio') # pot. speedup: use arrow
    if "index" in centr_df_simpnet.columns:
        centr_df_simpnet.drop(columns=["index"], inplace=True)
    centr_df_simpnet.set_index("edge_id", inplace=True, drop=False)
    centr_df_simpnet.index.rename("index", inplace=True)
    centr_df_simpnet["length"] = centr_df_simpnet.length
    # append columns to fullnet df
    centr_df_simpnet = centr_df_simpnet.filter(regex="(centr)\w+", axis=1)
    # rename columns (for compatibility with helper function)
    centr_df = centr_df_fullnet.join(centr_df_simpnet, rsuffix="_refnet")
    list(centr_df.columns)
    def rename_cols(n):
        parts = n.split("_")
        if parts[-1] != "refnet":
            return n
        parts[-1] = str(parts[-2])
        parts[-2] = "refnet"
        return "_".join(parts)
    centr_df.rename(columns=rename_cols, inplace=True)

    print("clipping to core AOI extent...")
    centr_df = centr_df.clip(aoi_core_extent)
    print("done.", len(centr_df), "edges")
    cols = centr_df.columns

    # now run comparisons
    for cn in cols:
        if not cn.startswith("centr") or "_nws_" in cn or not cn.endswith("_sum") or not "refnet" in cn:
            continue
        # c: simplified
        c = h.CentralityDef.from_str(cn)
        # corig: non-simplified, full network
        corig = c.clone()
        corig.refnet = False
        print("comparing", c, "to", corig)
        _r_ = h.centr_comparison(aoi, centr_df, c, corig, c_label=f"{c.cut}_simplified", dir_detail_plot=dir_detail_plot, generate_plots=generate_plots)
        if _r_ is not None:
            centr_summary.append(_r_)    

In [None]:
if not recalc and os.path.exists(f_compare_stats_out):
    diffstats = pd.read_csv(f_compare_stats_out)
    display(diffstats.head())
else:
    centr_summary = []
    centr_cdf = []
    for aoi in aoi_names:
        run_comparison(aoi, centr_summary, centr_cdf)
        
    diffstats = pd.DataFrame.from_dict(centr_summary, orient="columns")
    display(diffstats.head())
    diffstats.to_csv(f_compare_stats_out)

In [None]:
diffstats

In [None]:
# compare each centrality variant across cases
hc_ch_grp = diffstats.groupby(["label_compare"]).hc_seg_share_changed.describe()
hc_ch_grp

In [None]:
# plot share of HC segments changed, ordered by mean (grouped by comparison pair) -> min, mean, and max of AOIs (one value per AOI)
hc_ch_grp[hc_ch_grp["count"]>4][["min", "mean", "max"]].sort_values(by="mean").plot(figsize=(20,4))

In [None]:
tmp = hc_ch_grp[hc_ch_grp["count"]>4]
trange = (tmp["max"] - tmp["min"]).sort_values(ascending=False)
#tmp.loc[trange],["min", "mean", "max"].plot(figsize=(20,4))
tmp.loc[trange.index]

In [None]:
diffstats.hcq_dn_min.describe()

In [None]:
diffstats.hcp_dn_min.describe()

In [None]:
diffstats.hcp_dn_mean

In [None]:
diffstats[["label_c", "hcp_dn_mean", "hcq_dn_mean"]].boxplot(by="label_c", figsize=(10,5))

## High-Centrality segments

In [None]:
# HC segments (p-based): share changed (relative to all segments which were classified as HC in cref as well as in c)
# computed across all AOIs and centrality variants 
diffstats.groupby("label_c").hc_seg_share_changed.min().plot(legend=True, label="min")
diffstats.groupby("label_c").hc_seg_share_changed.mean().plot(legend=True, label="mean")
diffstats.groupby("label_c").hc_seg_share_changed.max().plot(legend=True, label="max")
plt.xlabel("subsampling distance")

In [None]:
# HC segments: mean share changed
# by route distance cutoff
diffstats.groupby(["label_cref"]).hc_seg_share_changed.mean()

In [None]:
diffstats.groupby(["label_c"]).hc_seg_share_changed.mean().plot(legend=True)


In [None]:
diffstats.hc_seg_share_changed.describe()

In [None]:
# mean absolute normalized delta
diffstats.groupby("label_c").hcp_dn_abs_mean.min().plot(legend=True, label="min", color=P_C_MIN, figsize=P_SIZE_S)
diffstats.groupby("label_c").hcp_dn_abs_mean.mean().plot(legend=True, label="min", color=P_C_MEAN)
diffstats.groupby("label_c").hcp_dn_abs_mean.max().plot(legend=True, label="min", color=P_C_MAX)


In [None]:
diffstats.hcp_dn_abs_mean.describe()

In [None]:
diffstats.dn_incr_gt10.describe()

In [None]:
diffstats.dn_decr_gt10.describe()

## All segments

In [None]:
### All segments: normalized change
diffstats.groupby("label_c").dn_mean.min().plot(legend=True, label="min")
diffstats.groupby("label_c").dn_mean.mean().plot(legend=True, label="mean")
diffstats.groupby("label_c").dn_mean.max().plot(legend=True, label="max")
plt.xlabel("subsampling distance")
# TODO: same for mean absolute change -> dn_abs_mean