# 06. POI experiments, Denmark-wide
## Project: Bicycle node network loop analysis

This notebook experiments with adding more POIs, creating new loop censuses, for scenario 2.  
Please select `denmark` as the `study_area`, and `scenarioid: 2` in the `config.yml`.

Contact: Michael Szell (michael.szell@gmail.com)

Created: 2025-08-11  
Last modified: 2025-08-14

### Experimental setups
1. Add POIs to random nodes
2. Add POIs to nodes in H3 grid cells with lowest water densities (q0.15)
3. Add POIs to nodes in H3 grid cells with lowest loop density (q0.30 to get more nodes)
4. Add POIs to nodes in H3 grid cells with highest node density (q0.10)
5. Add POIs to nodes in H3 grid cells with lowest water densities (q0.5), and of those lowest loops (q0.5), and of those highest node density (q0.5).

Setups are limited to different numbers of cells due to varying node densities. A POI added to a node will snap to all adjacent links.

Performance metrics:  
Compare results in the range baseline (no node gets a POI) --- best possible (all nodes get a POI)
1. Increase the percent of zero-loop nodes (improvement from 29%)
2. Decrease the number of cells with zero loop bits?

## To do

- [ ] Add systematically water to water deserts and measure loop increase (for family e-bike scenario only). Identify "low hanging fruits", "biggest bang for the buck".

## Parameters

In [None]:
%run -i setup_parameters.py
load_data = True  # Set to False if data are huge and have already been loaded
debug = True  # Set to True for extra plots and verbosity
plt.style.use(PATH["parameters"] + "plotstyle.mplstyle")

In [None]:
try:  # See if allloops_dict exists. If not, initialize. This allows running multiple scenarios. Here we run only scenario 2 though.
    allloops_dict
except NameError:
    allloops_dict = {}
    dfunified_scenarios = {}
allloops_dict[SCENARIOID] = {}

In [None]:
print("Running scenario " + str(SCENARIOID) + " in " + STUDY_AREA)
for k, v in SCENARIO[SCENARIOID].items():
    print(k + ": " + str(v))

In [None]:
experiment_setups = {
    "random": {},
    "waterprovision": {},
    "loopbits": {},
    "nodedensity": {},
    "wplbnd": {},
}
experiment_results = {
    "random": {},
    "waterprovision": {},
    "loopbits": {},
    "nodedensity": {},
    "wplbnd": {},
}
addpoisnums = [1000]  # Number of POIs to add in each experiment
runsperexperiment = 10  # Number of runs per experiment

## Functions

In [None]:
%run -i functions.py

## Set up baseline data and targeted cells/nodes for experiments

### Load data

In [None]:
if load_data:
    if LOOP_LENGTH_BOUND:
        llb_string = "_maxlength" + str(LOOP_LENGTH_BOUND)
    else:
        llb_string = ""

    with open(
        PATH["data_out"]
        + "loopcensus_"
        + str(LOOP_NUMNODE_BOUND)
        + llb_string
        + ".pkl",
        "rb",
    ) as f:
        allloops = pickle.load(f)
        alllooplengths = pickle.load(f)
        allloopnumnodes = pickle.load(f)
        allloopmaxslopes = pickle.load(f)
        Gnx = pickle.load(f)
        LOOP_NUMNODE_BOUND = pickle.load(f)
        nodes_id = pickle.load(f)
        nodes_coords = pickle.load(f)
        numloops = pickle.load(f)
        faceloops = pickle.load(f)

In [None]:
# Create gdf and igraph versions
nodes, edges = momepy.nx_to_gdf(net=Gnx, points=True, lines=True)
nodes.set_crs(epsg=25832, inplace=True)
edges.set_crs(epsg=25832, inplace=True)
G = ig.Graph.from_networkx(Gnx)
G.summary()

In [None]:
pois_orig = load_pois()

nodes_id = list(nodes.nodeID)
nodes_x = list(nodes.geometry.x)
nodes_y = list(nodes.geometry.y)
nodes_coords = list(zip(normalize_data(nodes_x), normalize_data(nodes_y)))

# POIs are snapped and edge attributes are updated later in each experiment

### Loops

Restrictions:

In [None]:
allloops_dict[SCENARIOID][0] = restrict_scenario(allloops, allloops, level=0)
allloops_dict[SCENARIOID][1] = restrict_scenario(
    allloops, allloops_dict[SCENARIOID][0], level=1
)
allloops_dict[SCENARIOID][2] = restrict_scenario(
    allloops, allloops_dict[SCENARIOID][1], level=2
)

Get loop bits for each node:

In [None]:
nodes_loopnum3 = nodes.drop(
    columns=["name", "_igraph_index", "x", "y", "nodeID"]
)  # drop all data
nodes_loopnum3.to_crs(epsg=4326, inplace=True)  # reproject for H3
nodes_loopnum3["loopnum3"] = get_vertex_loopnums(
    allloops_dict[SCENARIOID][2], "log2"
).tolist()

### H3 grids

In [None]:
nodes_nodata = nodes.drop(
    columns=["name", "_igraph_index", "x", "y", "nodeID"]
)  # drop all data
nodes_nodata.to_crs(epsg=4326, inplace=True)  # reproject for H3
if debug:
    print(nodes_nodata.head())

nodesh3 = nodes_nodata.assign(count=1).h3.geo_to_h3_aggregate(6)

nodesh3.plot(column="count", figsize=(5, 5), legend=True)
plt.title("Node density")
plt.gca().axis("off");

In [None]:
edgesh3 = edges.to_crs(epsg=25832)  # do geometric operations on projected CRS
edgesh3["geometry"] = edgesh3.geometry.centroid
edgesh3.to_crs(epsg=4326, inplace=True)  # project back for H3

edges_has_water = edgesh3[["weight", "has_water", "geometry"]]
edges_has_water["has_water"] = edges_has_water["has_water"].astype(
    int
)  # Turn True/False into 1/0

wm = {
    "has_water": lambda x: np.average(x, weights=edges_has_water.loc[x.index, "weight"])
}
edges_has_water_wmh3 = edges_has_water.h3.geo_to_h3_aggregate(6, wm)

edges_has_water_wmh3.plot(column="has_water", figsize=(5, 5), legend=True)
plt.title("Has water (weighted)")
plt.gca().axis("off");

In [None]:
nodes_loopnum3h3 = nodes_loopnum3.h3.geo_to_h3_aggregate(6, "mean")
nodes_loopnum3h3.plot(column="loopnum3", figsize=(5, 5), legend=True)
plt.title("Average loop bits+1 (water restriction)")
plt.gca().axis("off");

#### Join into one dataframe

In [None]:
gdfjoined = (
    nodesh3.join(edges_has_water_wmh3.drop(columns="geometry"))
    .join(nodes_loopnum3h3.drop(columns="geometry"))
    .rename(
        columns={
            "count": "Node density",
            "has_water": "Water provision",
            "loopnum3": "Loop bits",
        }
    )
)
if debug:
    print(gdfjoined.head())

#### Create quantiles

In [None]:
q = gdfjoined["Water provision"].quantile([0.15, 0.5])  # Bottom 15% water provision
q_wp15 = gdfjoined[gdfjoined["Water provision"].le(q[0.15])]
q_wp50 = gdfjoined[gdfjoined["Water provision"].le(q[0.5])]

q = gdfjoined["Loop bits"].quantile([0.30])  # Bottom 30% loop bits
q_lb30 = gdfjoined[gdfjoined["Loop bits"].le(q[0.30])]

q = gdfjoined["Node density"].quantile([0.9])  # Top 10% node density
q_nd10 = gdfjoined[gdfjoined["Node density"].ge(q[0.9])]

q = q_wp50["Loop bits"].quantile([0.5])  # Bottom 50% loop bits
q_wplb50 = q_wp50[q_wp50["Water provision"].le(q[0.5])]
q = q_wplb50["Node density"].quantile([0.5])  # Top 50% node density
q_wplbnd = q_wplb50[q_wplb50["Node density"].ge(q[0.5])]

# Clip to lengths to get similar node population, by smallest water provisions
numcells = min(len(q_nd10), len(q_wp15), len(q_lb30), len(q_wplbnd))
q_nd10 = q_nd10.nsmallest(n=int(numcells * 0.5), columns=["Water provision"])
q_wp15 = q_wp15.nsmallest(n=int(numcells * 1.5), columns=["Water provision"])
# q_lb15 = q_lb15.nsmallest(n=numcells, columns=["Water provision"])
q_wplbnd = q_wplbnd.nsmallest(n=int(numcells * 0.75), columns=["Water provision"])

Plot the targeted cells in each experimental setup (except random):

In [None]:
fig, axes = plt.subplots(nrows=1, ncols=4, figsize=(20, 4))

q_wp15.plot(column="Loop bits", figsize=(5, 5), legend=True, ax=axes[1])
axes[1].set_title("Loop bits+1, water provision q15")
axes[1].set_ylim([54.5, 57.8])
axes[1].set_xlim([8, 12.8])

q_lb30.plot(column="Loop bits", figsize=(5, 5), legend=True, ax=axes[2])
axes[2].set_title("Loop bits+1, loop bits q30")
axes[2].set_ylim([54.5, 57.8])
axes[2].set_xlim([8, 12.8])

q_nd10.plot(column="Loop bits", figsize=(5, 5), legend=True, ax=axes[0])
axes[0].set_title("Loop bits+1, node density q10")
axes[0].set_ylim([54.5, 57.8])
axes[0].set_xlim([8, 12.8])

q_wplbnd.plot(column="Loop bits", figsize=(5, 5), legend=True, ax=axes[3])
axes[3].set_title("Loop bits+1, wplbnd q50")
axes[3].set_ylim([54.5, 57.8])
axes[3].set_xlim([8, 12.8]);

In [None]:
experiment_setups["random"]["cellpopulation"] = nodesh3
experiment_setups["waterprovision"]["cellpopulation"] = q_wp15
experiment_setups["loopbits"]["cellpopulation"] = q_lb30
experiment_setups["nodedensity"]["cellpopulation"] = q_nd10
experiment_setups["wplbnd"]["cellpopulation"] = q_wplbnd

for k in (
    experiment_setups.keys()
):  # Determine node population: Select all nodes within cellpopulation
    experiment_setups[k]["nodepopulation"] = nodes_nodata.within(
        experiment_setups[k]["cellpopulation"].geometry.unary_union
    )

if debug:
    for k in (
        experiment_setups.keys()
    ):  # True values show how many nodes are in each node population
        print(k)
        print(experiment_setups[k]["nodepopulation"].value_counts())
        print("")

## Sample nodes for experiments

Sampling:
- Assume number of POIs to add is not larger than the number of nodes available in the targeted cells
- Sample nodes randomly from the targeted grid cells (without replacement)

In [None]:
c = 0
for experimentid in experiment_results.keys():
    nodepop = experiment_setups[experimentid]["nodepopulation"].loc[
        experiment_setups[experimentid]["nodepopulation"] == True
    ]
    for poistoadd in addpoisnums:
        experiment_results[experimentid][poistoadd] = {
            "nodesamples": [],  # This is a list of lists of the node indices that are sampled in each run
            "nodeloopnums": [],  # This is a list of lists of the loop numbers found in each run
        }
        for run in range(runsperexperiment):
            experiment_results[experimentid][poistoadd]["nodesamples"].append(
                list(nodepop.sample(poistoadd, axis="index").index)
            )
            c += 1
print(
    "Sampled "
    + str(c)
    + " node populations for "
    + str(len(experiment_results.keys()))
    + " experiments."
)

## Run experiments: Add POIs, snap POIs, generate loops

Can take hours to days:

==== All code below not finished yet ====

In [None]:
for experimentid in ["wplbnd"]:  # experiment_results.keys():
    for poistoadd in addpoisnums:
        for run in [0]:  # range(runsperexperiment):
            # Step 1: Add additional POIs from node samples
            nodesample = experiment_results[experimentid][poistoadd]["nodesamples"][run]
            pois_new = gpd.GeoDataFrame(
                geometry=gpd.GeoSeries(nodes.loc[nodesample]["geometry"])
            )
            pois_new["category"] = "facility"
            pois_merged = gpd.GeoDataFrame(
                pd.concat([pois_orig, pois_new], ignore_index=True)
            )

            # Step 2: Snap POIs
            edges["has_water"] = False
            edges["has_facility"] = False
            edges["has_service"] = False
            edges["has_attraction"] = False
            edges["poi_diversity"] = 0
            pois_snapped = snap_pois(pois_merged, edges)
            edges = update_poi_attributes(edges, pois_snapped)
            G = gdf_to_igraph(edges)
            Gnx = nx.empty_graph()
            Gnx = nx.disjoint_union(Gnx, G.to_networkx())

            # Step 3: Generate loops
            (
                allloops,
                alllooplengths,
                allloopnumnodes,
                allloopmaxslopes,
            ) = get_allloops_nx(Gnx)

            # Step 4: Restrict to water
            try:  # See if allloops_dict exists. If not, initialize. This allows running multiple scenarios.
                allloops_dict
            except NameError:
                allloops_dict = {}
            allloops_dict[SCENARIOID] = {}

            allloops_dict[SCENARIOID][0] = restrict_scenario(
                allloops, allloops, level=0
            )
            allloops_dict[SCENARIOID][1] = restrict_scenario(
                allloops, allloops_dict[SCENARIOID][0], level=1
            )
            allloops_dict[SCENARIOID][2] = restrict_scenario(
                allloops, allloops_dict[SCENARIOID][1], level=2
            )

            # Step 5: Save relevant results
            experiment_results[experimentid][poistoadd]["nodeloopnums"].append(
                get_vertex_loopnums(allloops_dict[SCENARIOID][2])
            )

In [None]:
if LOOP_LENGTH_BOUND:
    llb_string = "_maxlength" + str(LOOP_LENGTH_BOUND)
else:
    llb_string = ""

with open(
    PATH["data_out"] + "experiments_" + str(LOOP_NUMNODE_BOUND) + llb_string + ".pkl",
    "wb",
) as f:
    pickle.dump(experiment_setups, f)
    pickle.dump(experiment_results, f)

## Analyze experiment results

In [None]:
loopnums = np.array(experiment_results[experimentid][poistoadd]["nodeloopnums"])
loopnums

In [None]:
bit_threshold = 8
ymaxconst = 7800

fig = plt.figure(
    figsize=(240 / PLOTPARAM["dpi"], 240 / PLOTPARAM["dpi"]),
    dpi=PLOTPARAM["dpi"],
)
xmax = bit_threshold + 2
axes = fig.add_axes([0.1, 0.1, 0.8, 0.8])

cmaparr = get_cmap(bit_threshold=bit_threshold)
cmaparr = np.vstack((PLOTPARAM["color"]["noloop"], cmaparr))

loopnums = np.array(
    experiment_results[experimentid][poistoadd]["nodeloopnums"]
).flatten()

N, bins, patches = axes.hist(
    loopnums,
    bins=list(np.linspace(0, xmax, xmax + 1)),
    density=False,
    linewidth=0.5,
)
print(N)

patches[0].set_edgecolor(PLOTPARAM["color"]["neutral"])
for i in range(xmax):
    patches[i].set_facecolor(cmaparr[i, :])

n_upper_outliers = (loopnums > bit_threshold).sum()
patches[-1].set_height(patches[-1].get_height() + n_upper_outliers)

axes.text(
    0.1,
    (max(N) + n_upper_outliers) * 1.1 if not ymaxconst else ymaxconst * 0.985,
    str(
        round(
            len([i for i, x in enumerate(loopnums) if (x == 0)]) / len(loopnums) * 100
        )
    )
    + "%",
    horizontalalignment="left",
    verticalalignment="top",
    color=PLOTPARAM["color"]["neutral"],
)
axes.text(
    xmax - 0.1,
    (max(N) + n_upper_outliers) * 1.1 if not ymaxconst else ymaxconst * 0.985,
    str(
        round(
            len([i for i, x in enumerate(loopnums) if (x >= bit_threshold)])
            / len(loopnums)
            * 100
        )
    )
    + "%",
    horizontalalignment="right",
    verticalalignment="top",
    color=cmaparr[-1, :],
)

axes.set_xlabel("Bits $2^b$")
axes.set_ylabel("Frequency")
axes.set_title("Loops per node")
axes.set_xticks([i + 0.5 for i in list(range(xmax))])
axes.set_xticklabels(["No"] + [(str(i)) for i in list(range(xmax - 2))] + ["8+"])
axes.set_xlim([0, xmax])

if not ymaxconst:
    axes.set_ylim([0, 1.12 * (max(N) + n_upper_outliers)])
else:
    axes.set_ylim([0, ymaxconst])