# 05DK. Hex-grid correlations, Denmark-wide
## Project: Bicycle node network loop analysis

This notebook puts data into an H3 grid and performs correlations.

Contact: Michael Szell (michael.szell@gmail.com)

Created: 2025-08-01  
Last modified: 2025-08-01  

## To do

- [ ] Is node density a proxy for loop census size?

## Parameters

In [None]:
%run -i setup_parameters.py
load_data = True  # Set to False if data are huge and have already been loaded
debug = False  # Set to True for extra plots and verbosity

In [None]:
print("Running scenario in " + STUDY_AREA)
for k, v in SCENARIO[SCENARIOID].items():
    print(k + ": " + str(v))

## Functions

In [None]:
%run -i functions.py

## Load data

This can take several minutes.

In [None]:
if load_data:
    if LOOP_LENGTH_BOUND:
        llb_string = "_maxlength" + str(LOOP_LENGTH_BOUND)
    else:
        llb_string = ""

    with open(
        PATH["data_out"]
        + "loopcensus_"
        + str(LOOP_NUMNODE_BOUND)
        + llb_string
        + ".pkl",
        "rb",
    ) as f:
        allloops = pickle.load(f)
        alllooplengths = pickle.load(f)
        allloopnumnodes = pickle.load(f)
        allloopmaxslopes = pickle.load(f)
        Gnx = pickle.load(f)
        LOOP_NUMNODE_BOUND = pickle.load(f)
        nodes_id = pickle.load(f)
        nodes_coords = pickle.load(f)
        numloops = pickle.load(f)
        faceloops = pickle.load(f)

In [None]:
# Create gdf and igraph versions
nodes, edges = momepy.nx_to_gdf(net=Gnx, points=True, lines=True)
nodes.set_crs(epsg=25832, inplace=True)
edges.set_crs(epsg=25832, inplace=True)
G = ig.Graph.from_networkx(Gnx)
G.summary()

In [None]:
# Plot network
if debug:
    plot_dk_gdf(
        nodes,
        edges,
        scale=0.4,
        vertex_size=get_vertex_size_constant(G.vcount()),
        link_width=get_edgewidth_constant(G.ecount()),
    )

## Put into H3 grid

### Node density

In [None]:
if debug:
    nodes.plot.scatter(x="x", y="y", style=".", alpha=0.5, figsize=(5, 5))

In [None]:
nodes_nodata = nodes.drop(
    columns=["name", "id_cykelknudepkt", "_igraph_index", "x", "y", "nodeID"]
)  # drop all data
nodes_nodata.to_crs(epsg=4326, inplace=True)  # reproject for H3
if debug:
    print(nodes_nodata.head())

In [None]:
nodesh3 = nodes_nodata.assign(count=1).h3.geo_to_h3_aggregate(6)
if debug:
    print(nodesh3.head())

In [None]:
nodesh3.plot(column="count", figsize=(5, 5), legend=True)
plt.title("Node density")
plt.gca().axis("off");

### Edge properties

In [None]:
edgesh3 = edges.to_crs(epsg=25832)  # do geometric operations on projected CRS
edgesh3["geometry"] = edgesh3.geometry.centroid
edgesh3.to_crs(epsg=4326, inplace=True)  # project back for H3
if debug:
    print(edgesh3.head())

In [None]:
edges_length = edgesh3[["weight", "geometry"]]
edges_max_slope = edgesh3[["weight", "max_slope", "geometry"]]
edges_has_water = edgesh3[["weight", "has_water", "geometry"]]
edges_has_water["has_water"] = edges_has_water["has_water"].astype(
    int
)  # Turn True/False into 1/0
edges_poi_diversity = edgesh3[["weight", "poi_diversity", "geometry"]]

Weighted means (by length):

In [None]:
# https://stackoverflow.com/questions/31521027/groupby-weighted-average-and-sum-in-pandas-dataframe
wm = {
    "max_slope": lambda x: np.average(x, weights=edges_max_slope.loc[x.index, "weight"])
}
edges_max_slope_wmh3 = edges_max_slope.h3.geo_to_h3_aggregate(6, wm)

wm = {
    "has_water": lambda x: np.average(x, weights=edges_has_water.loc[x.index, "weight"])
}
edges_has_water_wmh3 = edges_has_water.h3.geo_to_h3_aggregate(6, wm)

wm = {
    "poi_diversity": lambda x: np.average(
        x, weights=edges_poi_diversity.loc[x.index, "weight"]
    )
}
edges_poi_diversity_wmh3 = edges_poi_diversity.h3.geo_to_h3_aggregate(6, wm)

In [None]:
edges_max_slope_wmh3.plot(column="max_slope", figsize=(5, 5), legend=True)
plt.title("Maximum gradient (weighted)")
plt.gca().axis("off");

In [None]:
edges_has_water_wmh3.plot(column="has_water", figsize=(5, 5), legend=True)
plt.title("Has water (weighted)")
plt.gca().axis("off");

In [None]:
edges_poi_diversity_wmh3.plot(column="poi_diversity", figsize=(5, 5), legend=True)
plt.title("POI diversity (weighted)")
plt.gca().axis("off");

Unweighted means:

In [None]:
edges_lengthh3 = edges_length.h3.geo_to_h3_aggregate(6, "mean")
edges_max_slopeh3 = edges_max_slope.h3.geo_to_h3_aggregate(
    6, "mean"
)  # not weighted by length
edges_has_waterh3 = edges_has_water.h3.geo_to_h3_aggregate(
    6, "mean"
)  # not weighted by length
edges_poi_diversityh3 = edges_poi_diversity.h3.geo_to_h3_aggregate(
    6, "mean"
)  # not weighted by length

### Loop census

#### Restrict to scenario lengths

In [None]:
allloops_bound = {}
for sourcenode in tqdm(allloops, desc="Restrict to scenario lengths"):
    try:
        lengths_this = allloops[sourcenode]["lengths"] * MPERUNIT
        mask_this = (lengths_this >= SCENARIO[SCENARIOID]["looplength_min"]) & (
            lengths_this <= SCENARIO[SCENARIOID]["looplength_max"]
        )
        allloops_bound[sourcenode] = mask_node(allloops[sourcenode], mask_this)
    except:  # Account for 0 loop nodes
        allloops_bound[sourcenode] = {}

In [None]:
nodes_loopnum1 = nodes.drop(
    columns=["name", "id_cykelknudepkt", "_igraph_index", "x", "y", "nodeID"]
)  # drop all data
nodes_loopnum1.to_crs(epsg=4326, inplace=True)  # reproject for H3
nodes_loopnum1["loopnum1"] = get_vertex_loopnums(allloops_bound, "log2").tolist()
if debug:
    print(nodes_loopnum1.head())

In [None]:
nodes_loopnum1h3 = nodes_loopnum1.h3.geo_to_h3_aggregate(6, "mean")
if debug:
    print(nodes_loopnum1h3.head())

In [None]:
nodes_loopnum1h3.plot(column="loopnum1", figsize=(5, 5), legend=True)
plt.title("Average loop bits (length restriction)")
plt.gca().axis("off");

#### Restrict to scenario gradients

In [None]:
allloops_bound_sloped = {}
for sourcenode in tqdm(allloops_bound, desc="Restrict to scenario gradients"):
    try:
        lengths_this = allloops[sourcenode]["lengths"] * MPERUNIT
        maxslopes_this = (
            allloops[sourcenode]["max_slopes"] / 100.0
        )  # max_slopes were multiplied by 100 for storage as uint16
        mask_this = lengths_this >= SCENARIO[SCENARIOID]["looplength_min"]
        mask_this &= lengths_this <= SCENARIO[SCENARIOID]["looplength_max"]
        mask_this &= maxslopes_this <= SCENARIO[SCENARIOID]["maxslope_limit"]
        allloops_bound_sloped[sourcenode] = mask_node(allloops[sourcenode], mask_this)
    except:  # Account for 0 loop nodes
        allloops_bound_sloped[sourcenode] = {}

In [None]:
nodes_loopnum2 = nodes.drop(
    columns=["name", "id_cykelknudepkt", "_igraph_index", "x", "y", "nodeID"]
)  # drop all data
nodes_loopnum2.to_crs(epsg=4326, inplace=True)  # reproject for H3
nodes_loopnum2["loopnum2"] = get_vertex_loopnums(allloops_bound_sloped, "log2").tolist()
if debug:
    print(nodes_loopnum2.head())

nodes_loopnum2h3 = nodes_loopnum2.h3.geo_to_h3_aggregate(6, "mean")
nodes_loopnum2h3.plot(column="loopnum2", figsize=(5, 5), legend=True)
plt.title("Average loop bits (gradient restriction)")
plt.gca().axis("off");

#### Restrict to water limits

In [None]:
allloops_bound_sloped_watered = {}
for sourcenode in tqdm(allloops_bound_sloped, desc="Restrict to water limits"):
    try:
        numloops = len(allloops_bound_sloped[sourcenode]["loops"])
        mask_this = [True] * numloops
        for i in range(numloops):
            wp = allloops_bound_sloped[sourcenode]["water_profile"][i]
            water_enough = True
            if wp:  # There is water on the way somewhere. Check distances
                for w in wp:
                    if w > WATERLENGTH_MAX:
                        water_enough = False
                        break
                if water_enough and (
                    allloops_bound_sloped[sourcenode]["lengths"][i] - wp[-1]
                    > WATERLENGTH_MAX
                ):
                    water_enough = False
            else:  # No water on the way, so the loop is only valid if short enough
                if allloops_bound_sloped[sourcenode]["lengths"][i] > WATERLENGTH_MAX:
                    water_enough = False
            mask_this[i] = water_enough
        allloops_bound_sloped_watered[sourcenode] = mask_node(
            allloops_bound_sloped[sourcenode], mask_this
        )
    except:  # Account for 0 loop nodes
        allloops_bound_sloped_watered[sourcenode] = {}

In [None]:
nodes_loopnum3 = nodes.drop(
    columns=["name", "id_cykelknudepkt", "_igraph_index", "x", "y", "nodeID"]
)  # drop all data
nodes_loopnum3.to_crs(epsg=4326, inplace=True)  # reproject for H3
nodes_loopnum3["loopnum3"] = get_vertex_loopnums(
    allloops_bound_sloped_watered, "log2"
).tolist()
if debug:
    print(nodes_loopnum3.head())

nodes_loopnum3h3 = nodes_loopnum3.h3.geo_to_h3_aggregate(6, "mean")
nodes_loopnum3h3.plot(column="loopnum3", figsize=(5, 5), legend=True)
plt.title("Average loop bits (water restriction)")
plt.gca().axis("off");

#### Restrict with POI diversity

In [None]:
allloops_bound_sloped_watered_diverse = {}
for sourcenode in tqdm(
    allloops_bound_sloped_watered, desc="Restrict with POI diversity"
):
    try:
        numloops = len(allloops_bound_sloped_watered[sourcenode]["loops"])
        mask_this = [False] * numloops
        for i in range(numloops):
            poidiv = allloops_bound_sloped_watered[sourcenode]["poi_diversity"][i]
            if poidiv >= 3:
                mask_this[i] = True
        allloops_bound_sloped_watered_diverse[sourcenode] = mask_node(
            allloops_bound_sloped_watered[sourcenode], mask_this
        )
    except:  # Account for 0 loop nodes
        allloops_bound_sloped_watered_diverse[sourcenode] = {}

In [None]:
nodes_loopnum4 = nodes.drop(
    columns=["name", "id_cykelknudepkt", "_igraph_index", "x", "y", "nodeID"]
)  # drop all data
nodes_loopnum4.to_crs(epsg=4326, inplace=True)  # reproject for H3
nodes_loopnum4["loopnum4"] = get_vertex_loopnums(
    allloops_bound_sloped_watered_diverse, "log2"
).tolist()
if debug:
    print(nodes_loopnum4.head())

nodes_loopnum4h3 = nodes_loopnum4.h3.geo_to_h3_aggregate(6, "mean")
nodes_loopnum4h3.plot(column="loopnum4", figsize=(5, 5), legend=True)
plt.title("Average loop bits (POI restriction)")
plt.gca().axis("off");

## Correlate

### Scenarios

In [None]:
dfunified_scenarios = (
    nodesh3.join(nodes_loopnum1h3.drop(columns="geometry"))
    .join(nodes_loopnum2h3.drop(columns="geometry"))
    .join(nodes_loopnum3h3.drop(columns="geometry"))
    .join(nodes_loopnum4h3.drop(columns="geometry"))
    .rename(
        columns={
            "count": "Node density",
            "loopnum1": "Loop bits (length)",
            "loopnum2": "Loop bits (gradient)",
            "loopnum3": "Loop bits (water)",
            "loopnum4": "Loop bits (POI)",
        }
    )
)

pd.plotting.scatter_matrix(dfunified_scenarios, alpha=0.05, figsize=(10, 10));

### Existing properties

#### Unweighted by length

In [None]:
dfunified_properties = (
    nodesh3.join(edges_lengthh3.drop(columns=["geometry"]))
    .join(edges_max_slopeh3.drop(columns=["geometry", "weight"]))
    .join(edges_has_waterh3.drop(columns=["geometry", "weight"]))
    .join(edges_poi_diversityh3.drop(columns=["geometry", "weight"]))
    .rename(
        columns={
            "count": "Node density",
            "weight": "Length",
            "max_slope": "Maximum gradient",
            "has_water": "Has water",
            "poi_diversity": "POI Diversity",
        }
    )
)

pd.plotting.scatter_matrix(dfunified_properties, alpha=0.05, figsize=(10, 10));

#### Weighted by length

In [None]:
dfunified_properties_weighted = (
    nodesh3.join(edges_lengthh3.drop(columns="geometry"))
    .join(edges_max_slope_wmh3.drop(columns="geometry"))
    .join(edges_has_water_wmh3.drop(columns="geometry"))
    .join(edges_poi_diversity_wmh3.drop(columns="geometry"))
    .rename(
        columns={
            "count": "Node density",
            "weight": "Length",
            "max_slope": "Maximum gradient",
            "has_water": "Has water",
            "poi_diversity": "POI Diversity",
        }
    )
)

pd.plotting.scatter_matrix(dfunified_properties_weighted, alpha=0.05, figsize=(10, 10));