# Explore data
## Project: Cycling node network loop analysis

This notebook explores the input data set.

Contact: Michael Szell (michael.szell@gmail.com)  
Created: 2024-01-24  
Last modified: 2024-01-24  

## Imports

In [None]:
import geopandas as gpd
import igraph as ig
import matplotlib.pyplot as plt
import numpy as np
from functools import reduce

## Functions

In [None]:
def NormalizeData(data):
    return list((data - np.min(data)) / (np.max(data) - np.min(data)))

def getLayout(G, nodes_id, nodes_coords):
    named_vertex_list = G.vs()["name"]
    layout = []
    for n in named_vertex_list:
        pos = nodes_id.index(n)
        layout.append(nodes_coords[pos])
    return layout

## Exploration

### Load data

In [None]:
edges = gpd.read_file(r'../data/input/faxe/network/network_edges_no_parallel.gpkg')
nodes = gpd.read_file(r'../data/input/faxe/network/nodes_edges_parallel.gpkg')
# Set CRS
edges.set_crs('epsg:25832')
nodes.set_crs('epsg:25832');

In [None]:
nodes

In [None]:
nodes_id = list(nodes.id)
nodes_x = list(nodes.geometry.x)
nodes_y = list(nodes.geometry.y)
nodes_coords = list(zip(NormalizeData(nodes_x), NormalizeData(nodes_y)))

In [None]:
# Rename length to weight for igraph
edges = edges.rename(columns={"length": "weight"})
# Drop unused columns
used_columns = {"u":(), "v":(), "weight":()}
for c_name, _ in edges.items():
    if c_name not in used_columns:
        del edges[c_name]

# Reorder columns
edges = edges[['u','v','weight']]

### Turn into igraph object

In [None]:
G = ig.Graph.TupleList(edges.itertuples(index=False), directed=False, weights=True)

In [None]:
# Plot to double-check
fig, ax = plt.subplots()
layout = getLayout(G, nodes_id, nodes_coords)
ig.plot(G, target=ax, vertex_size=6, layout=layout);

### Drop dangling nodes

In [None]:
# Source: https://codereview.stackexchange.com/questions/284246/deletion-of-nodes-of-degree-1-from-a-python-igraph-graph
vertices = {v for v in G.vs.select(_degree_le=1)}
needs_to_be_checked = set(vertices)
while needs_to_be_checked:
    vertex = needs_to_be_checked.pop()
    for n_vertex in vertex.neighbors():
        if n_vertex in vertices \
                or sum(1 for v in n_vertex.neighbors() if v not in vertices) > 1:
            continue
        vertices.add(n_vertex)
        needs_to_be_checked.add(n_vertex)
G.delete_vertices(vertices)

In [None]:
# Plot to double-check
fig, ax = plt.subplots()
layout = getLayout(G, nodes_id, nodes_coords)
ig.plot(G, target=ax, vertex_size=6, layout=layout);

### Get cycle basis

In [None]:
# https://python.igraph.org/en/latest/api/igraph.GraphBase.html#fundamental_cycles
fcycles = {}
cid = 0
for c in G.fundamental_cycles():
    # Add some statistics
    ws = [G.es(eid)['weight'] for eid in c]
    fcycles[cid] = {"edges": c, "length": sum(reduce(lambda a, b: a + b, ws))}
    cid += 1
    
fcycles

Getting all simple cycles has not yet been implemented in igraph, see:  
* https://github.com/igraph/igraph/issues/379  
* https://github.com/igraph/igraph/issues/1398  
Some potential progress here, but only for C, not Python:
* https://github.com/igraph/igraph/pull/2181

But they can be XORed through the cycle base.  

It has been implemented in networkX though: https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.cycles.simple_cycles.html#networkx.algorithms.cycles.simple_cycles