# 01. Create loop census
## Project: Bicycle node network loop analysis

This notebook creates a loop census from the input data set and calculates/plots basic descriptive statistics.

Contact: Michael Szell (michael.szell@gmail.com)

Created: 2024-01-24  
Last modified: 2024-02-19  

## To do

* Double-check loop/link lengths. For example 3-loop east of Faxe
* Double-check edge_ids during simplifications
* X Snap POIs to the original link geometries, within a threshold
* X Incorporate gradients
* X Add loop permutations for node-based analysis
* X Drop non-main nodes
* X Drop loops (they are really dangling links)
* X Find all simple loops (bounded?-max length?) with networkX

## Parameters

In [None]:
loop_numnode_bound = 30  # From 30 it starts getting slow
faceloop_limit = [15000,30000]  # 90% of face loop lengths should conform to these length limits [m]
link_limit = [1000,5000,10000] # Optimal length between first and second value, maximal length the last value
maxslope_limit = 6
snap_threshold = 100  # Threshold to snap POIs to network links [m]

PATH = {
    "data_in_network": "../data/input/faxe/network/",
    "data_in_pois": "../data/input/faxe/pois/",
    "data_out": "../data/processed/faxe/",
    "plot": "../plots/faxe/"
}

## Imports

In [None]:
import geopandas as gpd
import igraph as ig
import matplotlib.pyplot as plt
import numpy as np
import networkx as nx
from functools import reduce
import pickle 
from random import seed, random

## Functions

In [None]:
%run -i functions.py

## Processing data

### Load data

In [None]:
edges = gpd.read_file(PATH['data_in_network'] + 'edges_slope.gpkg')
edges_orig = edges # Make a deep copy to save the geometries
nodes = gpd.read_file(PATH['data_in_network'] + 'nodes_edges_parallel.gpkg')
# Set CRS
edges.set_crs('epsg:25832')
nodes.set_crs('epsg:25832');

In [None]:
edges.head()

In [None]:
nodes.head()

In [None]:
nodes_id = list(nodes.id)
nodes_x = list(nodes.geometry.x)
nodes_y = list(nodes.geometry.y)
nodes_coords = list(zip(NormalizeData(nodes_x), NormalizeData(nodes_y)))

In [None]:
# Rename length to weight for igraph
edges = edges.rename(columns={"length": "weight"})
# Drop unused columns
used_columns = {"u":(), "v":(), "weight":(), "max_slope":(), "edge_id":()}
for c_name, _ in edges.items():
    if c_name not in used_columns:
        del edges[c_name]

# Reorder columns
edges = edges[['u','v','weight','max_slope','edge_id']]
edges

### Turn into igraph object

In [None]:
G = ig.Graph.TupleList(edges.itertuples(index=False), directed=False, weights=False, edge_attrs = ['weight', 'max_slope', 'edge_id'])

In [None]:
G.summary()

In [None]:
# Plot to double-check
plotCheck(G, nodes_id, nodes_coords);

### Drop self-loops

They are really dangling links which go outside the region, were mistakenly connected to themselves.

In [None]:
G.simplify(multiple=True, loops=True, combine_edges=dict(weight=min, max_slope=max, edge_id=min));

In [None]:
# Plot to double-check
plotCheck(G, nodes_id, nodes_coords);

### Drop dangling nodes

In [None]:
# Source: https://codereview.stackexchange.com/questions/284246/deletion-of-nodes-of-degree-1-from-a-python-igraph-graph
vertices = {v for v in G.vs.select(_degree_le=1)}
needs_to_be_checked = set(vertices)
while needs_to_be_checked:
    vertex = needs_to_be_checked.pop()
    for n_vertex in vertex.neighbors():
        if n_vertex in vertices \
                or sum(1 for v in n_vertex.neighbors() if v not in vertices) > 1:
            continue
        vertices.add(n_vertex)
        needs_to_be_checked.add(n_vertex)
G.delete_vertices(vertices)

In [None]:
# Plot to double-check
plotCheck(G, nodes_id, nodes_coords);

### Drop degree 2 nodes

This should include all non-ismain nodes.

In [None]:
nodes_nonismain = nodes.loc[nodes['ismain'] == 0]
nodes_nonismain = nodes_nonismain['node_id'].to_list()
# Turn to dict for fast finding
nodes_nonismain = {nodes_nonismain[i]: True for i in range(len(nodes_nonismain))} 

In [None]:
to_delete_ids = []

# Unclear how to select nodes in igraph by name, so let's iterate through them
for v in G.vs:
#     if v["name"] in nodes_nonismain and v.degree() == 2:
    if v.degree() == 2:
        # Remember node to delete
        to_delete_ids.append(v.index)
        # Add a new edge that combines the deleted ones
        sumoflengths = v.incident()[0].attributes()["weight"] + v.incident()[1].attributes()["weight"]
        maxofslopes = max([v.incident()[0].attributes()["max_slope"], v.incident()[1].attributes()["max_slope"]])
        G.add_edge(v.neighbors()[0].index, v.neighbors()[1].index, weight=sumoflengths, max_slope=maxofslopes)

G.delete_vertices(to_delete_ids)

# Re-simplify
G.simplify(multiple=True, loops=True, combine_edges=dict(weight=min, max_slope=max, edge_id=min));

In [None]:
# Plot to double-check
plotCheck(G, nodes_id, nodes_coords);

### Add POIs

Snap POIs to network

In [None]:
facilities = gpd.read_file(PATH['data_in_pois'] + 'facilities_within_reach_100.gpkg')
facilities

We just have a relatively small number of facilities and links, so it should be fine to just loop through all pairwise

In [None]:
for e in G.es:
    e["has_water"] = False
    
e_haswater = set()
for findex, frow in facilities.iterrows():
    d = 999999999999
    eid = False
    if frow['type']: # Can add conditions on type later
        poi_this = frow['geometry']
        for eindex, erow in edges_orig.iterrows(): 
            d_this = poi_this.distance(erow['geometry'])
            if d_this < d and erow['edge_id'] in edges_orig['edge_id'].values:
                d = d_this
                eid = erow['edge_id']
    if eid and d <= snap_threshold:
        e_haswater.add(eid)
        
for e in G.es:
    if e["edge_id"] in e_haswater:
        e["has_water"] = True
        

In [None]:
# Alternatively, for testing, we could randomly populate the graph with a new edge property has_water
# seed(10)
# for e in G.es:
#     e["has_water"] = True if random() < 0.1 else False

In [None]:
edge_colors = []
for e in G.es:
    if e["has_water"]:
        edge_colors.append("blue")
    else:
        edge_colors.append("grey")

In [None]:
fig = plotCheck(G, nodes_id, nodes_coords, vertex_size=7, edge_color=edge_colors)
plt.text(0,0.04, "Water links highlighted");
plt.tight_layout()

### Get minimal loop basis (=face loops)

In [None]:
# https://python.igraph.org/en/latest/api/igraph.GraphBase.html#minimum_cycle_basis
loopbasis = {}
cid = 0
for c in G.minimum_cycle_basis():
    # Add some statistics
    ws = [G.es(eid)['weight'] for eid in c]
    loopbasis[cid] = {"edges": c, "length": sum(reduce(lambda a, b: a + b, ws)), "numnodes": len(c)}
    cid += 1

In [None]:
# Show longest loop in minimal loop basis (last has most nodes)
edge_colors = []
for e in G.es:
    if e.index in loopbasis[max(loopbasis.keys())]["edges"]:
        edge_colors.append("red")
    else:
        edge_colors.append("grey")

In [None]:
fig = plotCheck(G, nodes_id, nodes_coords, vertex_size=7, edge_color=edge_colors)
plt.text(0,0.04, "Longest face loop highlighted: " + str(int(loopbasis[max(loopbasis.keys())]["length"]/1000)) + "km");
plt.tight_layout()

Getting all simple loops has not yet been implemented in igraph, see:  
* https://github.com/igraph/igraph/issues/379  
* https://github.com/igraph/igraph/issues/1398  
Some potential progress here, but only for C, not Python:
* https://github.com/igraph/igraph/pull/2181

But they can be XORed through the loop base.  

It has been implemented in networkX though: https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.cycles.simple_cycles.html#networkx.algorithms.cycles.simple_cycles

Therefore, we do not use igraph's loop basis, but go ahead with networkX.

### Get all loops via nx

In [None]:
Gnx = G.to_networkx()

In [None]:
# Get all unique loops, meaning a loop ABCA is counted only once and not as ABCA, BCAB, and CABC
allloops_unique = {}
nodes_done = set()
numloops_unique = 0
allloops_generator = nx.simple_cycles(Gnx, length_bound=loop_numnode_bound)
for c in allloops_generator:
    sourcenode = c[0]
    c_length = getLoopLength(c)
    c_max_slope = getLoopMaxSlope(c)
    c_water = getLoopWaterProfile(c)
    numloops_unique += 1
    if sourcenode in nodes_done:
        allloops_unique[sourcenode]["loops"].append(c)
        allloops_unique[sourcenode]["lengths"].append(c_length)
        allloops_unique[sourcenode]["numnodes"].append(len(c))
        allloops_unique[sourcenode]["max_slopes"].append(c_max_slope)
        allloops_unique[sourcenode]["water_profile"].append(c_water)
    else:
        allloops_unique[sourcenode] = {"loops": [c], "lengths": [c_length], "numnodes": [len(c)], "max_slopes": [c_max_slope], "water_profile": [c_water]}
        nodes_done.add(sourcenode)
print("Found " + str(numloops_unique) + " unique loops for length bound " + str(loop_numnode_bound))

In [None]:
# Get all loops, meaning a loop ABCA is counted also as ABCA, BCAB, and CABC
allloops = {}
nodes_done = set()
numloops = 0
allloops_generator = nx.simple_cycles(Gnx, length_bound=loop_numnode_bound)
for c in allloops_generator:
    sourcenode = c[0]
    c_length = getLoopLength(c)
    c_max_slope = getLoopMaxSlope(c)
    c_water = getLoopWaterProfile(c)
    for sourcenode in c:
        numloops += 1
        if sourcenode in nodes_done:
            allloops[sourcenode]["loops"].append(c)
            allloops[sourcenode]["lengths"].append(c_length)
            allloops[sourcenode]["numnodes"].append(len(c))
            allloops[sourcenode]["max_slopes"].append(c_max_slope)
            allloops[sourcenode]["water_profile"].append(c_water)
        else:
            allloops[sourcenode] = {"loops": [c], "lengths": [c_length], "numnodes": [len(c)], "max_slopes": [c_max_slope], "water_profile": [c_water]}
            nodes_done.add(sourcenode)
print("Found " + str(numloops) + " loops for length bound " + str(loop_numnode_bound))

In [None]:
alllooplengths = np.zeros(numloops)
allloopnumnodes = np.zeros(numloops, dtype=int)
allloopmaxslopes = np.zeros(numloops)
i = 0
for j in allloops:
    l = len(allloops[j]["lengths"])
    alllooplengths[i:i+l] = allloops[j]["lengths"]
    allloopnumnodes[i:i+l] = allloops[j]["numnodes"]
    allloopmaxslopes[i:i+l] = allloops[j]["max_slopes"]
    i += l

## Descriptive network statistics

### Link lengths and max slopes

In [None]:
linklengths = [e["weight"] for e in G.es]
linkmaxslopes = [e["max_slope"] for e in G.es]
fig = plt.figure(figsize=(8, 3))
axes1 = fig.add_axes([0.08, 0.16, 0.4, 0.75])
axes2 = fig.add_axes([0.58, 0.16, 0.4, 0.75])

histxy = axes1.hist(linklengths, density=False)
axes1.plot([link_limit[0],link_limit[0]], [0,max(histxy[0])], ":k")
axes1.plot([link_limit[1],link_limit[1]], [0,max(histxy[0])], ":k")
axes1.plot([link_limit[2],link_limit[2]], [0,max(histxy[0])], ":r")
indcond = [i for i,x in enumerate(linklengths) if (x >= link_limit[0] and x <= link_limit[1])]
massinallowedrange = round(len(indcond)/len(linklengths)*100) # Should be high
axes1.text((link_limit[0]+link_limit[1])/2, max(histxy[0]), str(massinallowedrange) + "%", horizontalalignment='center', verticalalignment='top')
axes1.text(link_limit[0]*0.9, max(histxy[0]), str(round(len([i for i,x in enumerate(linklengths) if (x <= link_limit[0])])/len(linklengths)*100)) + "%", horizontalalignment='right', verticalalignment='top')
axes1.text((link_limit[1]+link_limit[2])/2, max(histxy[0]), str(round(len([i for i,x in enumerate(linklengths) if (x >= link_limit[1] and x <= link_limit[2])])/len(linklengths)*100)) + "%", horizontalalignment='center', verticalalignment='top')
axes1.text(link_limit[2]*1.01, max(histxy[0]), str(round(len([i for i,x in enumerate(linklengths) if (x > link_limit[2])])/len(linklengths)*100)) + "%", horizontalalignment='left', verticalalignment='top', color="red")

axes1.set_xlabel('Length [m]')
axes1.set_ylabel('Frequency')
axes1.set_title('Link lengths')

histxy = axes2.hist(linkmaxslopes, density=False)
axes2.plot([maxslope_limit,maxslope_limit], [0,max(histxy[0])], ":r")
axes2.text(maxslope_limit*0.95, max(histxy[0]), str(round(len([i for i,x in enumerate(linkmaxslopes) if (x < maxslope_limit)])/len(linkmaxslopes)*100)) + "%", horizontalalignment='right', verticalalignment='top')
axes2.text(maxslope_limit*1.05, max(histxy[0]), str(round(len([i for i,x in enumerate(linkmaxslopes) if (x >= maxslope_limit)])/len(linkmaxslopes)*100)) + "%", horizontalalignment='left', verticalalignment='top', color="red")
axes2.set_xlabel('Max slope [%]')
axes2.set_ylabel('')
axes2.set_title('Link max slopes');

fig.savefig(PATH["plot"] + "linkstats")

### Loop lengths

In [None]:
fig = plt.figure(figsize=(8, 3))
axes1 = fig.add_axes([0.1, 0.1, 0.35, 0.8])
axes2 = fig.add_axes([0.55, 0.1, 0.35, 0.8])

axes1.hist(alllooplengths, density=True)
axes1.set_xlabel('Length [m]')
axes1.set_ylabel('Probability')
axes1.set_title('Loop lengths')

axes2.hist(allloopnumnodes, density=True, bins=list(range(loop_numnode_bound+1)))
axes2.set_xlabel('Nodes')
axes2.set_title('Nodes per loop')
axes2.set_xlim([0, loop_numnode_bound+0.5])

plt.text(loop_numnode_bound/20,0.01, "Bound: " + str(loop_numnode_bound))
plt.text(loop_numnode_bound/20,0.04, "Loops: " + str(numloops));

In [None]:
fig = plt.figure(figsize=(8, 3))
axes1 = fig.add_axes([0.08, 0.16, 0.4, 0.75])
axes2 = fig.add_axes([0.58, 0.16, 0.4, 0.75])

facelooplengths = [c["length"] for c in loopbasis.values()]

histxy = axes1.hist(facelooplengths, density=False)
axes1.set_xlabel('Length [m]')
axes1.set_ylabel('Frequency')
axes1.set_title('Face loop lengths')
axes1.plot([faceloop_limit[0],faceloop_limit[0]], [0,max(histxy[0])], ":k")
axes1.plot([faceloop_limit[1],faceloop_limit[1]], [0,max(histxy[0])], ":r")
axes1.text((faceloop_limit[0]+faceloop_limit[1])/2, max(histxy[0]), str(round(len([i for i,x in enumerate(facelooplengths) if (x >= faceloop_limit[0] and x <= faceloop_limit[1])])/len(facelooplengths)*100)) + "%", horizontalalignment='center', verticalalignment='top')
axes1.text(faceloop_limit[0]*0.95, max(histxy[0]), str(round(len([i for i,x in enumerate(facelooplengths) if (x < faceloop_limit[0])])/len(facelooplengths)*100)) + "%", horizontalalignment='right', verticalalignment='top')
axes1.text(faceloop_limit[1]*1.01, max(histxy[0]), str(round(len([i for i,x in enumerate(facelooplengths) if (x > faceloop_limit[1])])/len(facelooplengths)*100)) + "%", horizontalalignment='left', verticalalignment='top', color="red")

axes2.hist([c["numnodes"] for c in loopbasis.values()], density=False)
axes2.set_xlabel('Nodes')
axes2.set_title('Face loop nodes');

fig.savefig(PATH["plot"] + "faceloopstats")

In [None]:
histxy[0][min(indcond):max(indcond)+1]

In [None]:
# Show face loops that conform to the length thresholds
okedges = set()
for c in loopbasis.values():
    if c["length"] >= faceloop_limit[0] and c["length"] <= faceloop_limit[1]:
        okedges = okedges.union(set(c["edges"]))

edge_colors = []
for e in G.es:
    if e.index in okedges:
        edge_colors.append("green")
    else:
        edge_colors.append("grey")

In [None]:
fig = plotCheck(G, nodes_id, nodes_coords, vertex_size=7, edge_color=edge_colors)
plt.text(0,0.04, "Conforming face loops highlighted")
plt.tight_layout()

## Save loop census

In [None]:
with open(PATH['data_out'] + 'loopcensus_'+str(loop_numnode_bound)+'.pkl', 'wb') as f:
    pickle.dump(allloops, f)
    pickle.dump(alllooplengths, f)
    pickle.dump(allloopnumnodes, f)
    pickle.dump(allloopmaxslopes, f)
    pickle.dump(G, f)
    pickle.dump(loop_numnode_bound, f)
    pickle.dump(nodes_id, f)
    pickle.dump(nodes_coords, f)
    pickle.dump(numloops, f)
    pickle.dump(loopbasis, f)