# 01. POI snapping, Denmark-wide
## Project: Bicycle node network loop analysis

This notebook snaps the POIs to the Denmark-wide network data which is assembled from the single study area networks created in 00.  
Please select `denmark` as the `study_area` in the `config.yml`.

Contact: Michael Szell (michael.szell@gmail.com)

Created: 2025-08-07  
Last modified: 2025-08-08

## To do

- [X] Rewrite snapping with momepy/geopandas

## Parameters

In [None]:
%run -i setup_parameters.py
debug = False  # Set to True for extra plots and verbosity

## Functions

In [None]:
%run -i functions.py

## Processing data

### Load data

In [None]:
# Merge graphs
Gnx = nx.empty_graph()
for subarea in STUDY_AREA_COMBINED[STUDY_AREA]:
    with lzma.open(PATH[subarea]["data_out"] + "network_preprocessed0.xz", "rb") as f:
        G_new = pickle.load(f)
        Gnx = nx.disjoint_union(Gnx, G_new.to_networkx())

# Merge POIs
poi_files = {
    "facility": [
        "facility.gpkg",
    ],
    "service": [
        "service.gpkg",
    ],
    "attraction": ["poi.gpkg"],
}
pois = gpd.GeoDataFrame()
for subarea in STUDY_AREA_COMBINED[STUDY_AREA]:
    for cat in [*poi_files]:
        for f in poi_files[cat]:
            pois_new = gpd.read_file(PATH[subarea]["data_in_pois"] + f)
            pois_new["category"] = cat
            # Shift Bornholm POIs
            if subarea == "bornholm":
                pois_new["geometry"] = pois_new["geometry"].apply(
                    shapely.affinity.translate,
                    xoff=BORNHOLM_DELTA[0],
                    yoff=BORNHOLM_DELTA[1],
                )

            pois = pd.concat([pois, pois_new], ignore_index=True)
pois = pois.drop(["type", "gruppe"], axis=1)
if debug:
    print(pois.head())

Turn into gdf and initialize:

In [None]:
# https://docs.momepy.org/en/stable/user_guide/graph/convert.html
nodes, edges = momepy.nx_to_gdf(Gnx, points=True, lines=True)
nodes.set_crs(epsg=25832, inplace=True)
nodes_id = list(nodes.nodeID)
nodes_x = list(nodes.geometry.x)
nodes_y = list(nodes.geometry.y)
nodes_coords = list(zip(normalize_data(nodes_x), normalize_data(nodes_y)))

edges.set_crs(epsg=25832, inplace=True)

# Initialize
edges["has_water"] = False
edges["has_facility"] = False
edges["has_service"] = False
edges["has_attraction"] = False
edges["poi_diversity"] = 0

if debug:
    print(edges.head())

### Snap POIs

Snap POIs to network. POIs come in 3 categories:
- Facilities (water station, bicycle repair station, supermarket, etc.)
- Services (camping ground, hotel, gas station, etc.)
- Attractions (church, museum, beach, etc.)  

If any of these is within reach of a link, the link is assumed to provide water. Further, we define a link's POI diversity $Y \in \{0,1,2,3\}$ as the unique number of POI categories within reach. For simplicity, reach is defined with the same constant `SNAP_THRESHOLD` for all POI categories.

In [None]:
# https://stackoverflow.com/questions/52582864/snap-a-geodataframe-of-points-to-nearest-line-in-separate-geodataframe

# Join the closest line geometry to each point
edges[
    "line_geom"
] = edges.geometry  # Save the line geometry in a new column, or it is lost in the sjoin
pois_snapped = pois.sjoin_nearest(
    edges[["line_geom", "geometry"]],
    how="left",
    max_distance=SNAP_THRESHOLD,
    distance_col="distance",
)
drop_indices = pois_snapped[pois_snapped["line_geom"] == None].index
pois_snapped.drop(drop_indices, inplace=True)  # Drop POIs that are too far from a link

# Snap each point to the nearest point on the nearest line
pois_snapped["geometry"] = pois_snapped.apply(
    lambda x: snap(
        x.geometry, nearest_points(x.geometry, x.line_geom)[1], tolerance=SNAP_THRESHOLD
    ),
    axis=1,
)

pois_snapped = pois_snapped.drop(columns=["line_geom"])
pois_snapped.rename(columns={"index_right": "index_edge"}, inplace=True)

if debug:
    print(pois_snapped.head())

With the snapped POIs, update the POI attributes of all affected edges:

In [None]:
# Use available poi files
e_haspoi = {"facility": set(), "service": set(), "attraction": set()}

for _, poirow in tqdm(pois_snapped.iterrows(), total=pois_snapped.shape[0]):
    edges.at[poirow["index_edge"], "has_water"] = True
    edges.at[poirow["index_edge"], "has_" + poirow["category"]] = True
edges["has_water"] = (
    edges["has_facility"] + edges["has_service"] + edges["has_attraction"]
)  # + on bools is an or
edges["poi_diversity"] = (
    edges["has_facility"].astype(int)
    + edges["has_service"].astype(int)
    + edges["has_attraction"].astype(int)
)
if debug:
    print(edges.tail())

gdf to nx to igraph 😅:

In [None]:
Gnx = momepy.gdf_to_nx(edges, approach="primal", integer_labels=True)
G = ig.Graph.from_networkx(Gnx)
G.vs["name"] = G.vs["_nx_name"]
del G.vs["_nx_name"]
del G["approach"]
del G["crs"]
del G.es["_nx_multiedge_key"]
del G.es["line_geom"]
G.es["weight"] = G.es["mm_len"]
del G.es["mm_len"]
if debug:
    print(G.summary())

In [None]:
if debug:  # Just some code needed for the plots below
    nodes, edges = momepy.nx_to_gdf(Gnx, points=True, lines=True)
    nodes.set_crs(epsg=25832, inplace=True)
    nodes_id = list(nodes.nodeID)
    nodes_x = list(nodes.geometry.x)
    nodes_y = list(nodes.geometry.y)
    nodes_coords = list(zip(normalize_data(nodes_x), normalize_data(nodes_y)))

#### Plot POI diversity

In [None]:
if debug:
    edge_widths = []
    for e in G.es:
        edge_widths.append((e["poi_diversity"] * 2) + 0.25)

    fig = plot_check(
        G,
        nodes_id,
        nodes_coords,
        vertex_size=get_vertex_size_constant(G.vcount()),
        edge_width=edge_widths,
    )
    plt.text(0, 0.04, "POI diversity")
    plt.tight_layout()

#### Plot water links

In [None]:
if debug:
    edge_colors = []
    for e in G.es:
        if e["has_water"]:
            edge_colors.append("blue")
        else:
            edge_colors.append("grey")

    fig = plot_check(
        G,
        nodes_id,
        nodes_coords,
        vertex_size=get_vertex_size_constant(G.vcount()),
        edge_color=edge_colors,
    )
    plt.text(0, 0.04, "Water links highlighted")
    plt.tight_layout()

#### Plot max slopes

In [None]:
if debug:
    edge_colors = []
    for e in G.es:
        if e["max_slope"] < 4:
            edge_colors.append("green")
        elif e["max_slope"] < 6:
            edge_colors.append("orange")
        else:
            edge_colors.append("red")

    fig = plot_check(
        G,
        nodes_id,
        nodes_coords,
        vertex_size=get_vertex_size_constant(G.vcount()),
        edge_color=edge_colors,
    )
    plt.text(0, 0.04, "Max slopes highlighted")
    plt.tight_layout()

## Save preprocessed network data

In [None]:
with lzma.open(PATH["data_out"] + "network_preprocessed.xz", "wb") as f:
    pickle.dump(G, f)