# NETWORK CONNECTIONS

For the tracing usecase, cross-border connection of networks is fundamental to the objectives. A logical approach is applied where a network connection object is created, to indicate two nodes that exist in different networks make reference to the same point in the real world object. 

To achieve this, the nodes that make reference to the same real world object, or at least indicate the flow of water from one region or country to another, are identified. 

This entails;
- Extracting the endpoints of one network with no begin points, falling along the border.
- Extracting the beginpoints of another networkfalling along the same border.
Performing an **sjoin_nearest** operation, results will include multiple output records for a single input record where there are multiple equidistant nearest or intersected neighbors.

Because of possible precision errors, a buffer along the border is used to identify the two sets of nodes.

These nodes then form the network connection object, as cross-border connected, cross-border identical or intermodal connections.

This code was developed using the three regions of Belgium, and can be replicated to any regions/countries by declaring the appropriate labels and columns.

In [1]:
import os
import sys
path = os.path.dirname(os.path.abspath(''))
os.chdir(path)
print(path)
sys.path.insert(0, path)

c:\Workdir\Develop\repository\go-peg


In [2]:
import geopandas as gpd
import pandas as pd

from shapely.geometry import Point, LineString, MultiLineString, MultiPoint, Polygon
from shapely import wkt
from shapely.ops import nearest_points

from src.config import config

### 1. Load datasets

In [26]:
from_region = 'de'
to_region = 'wal'

In [27]:
# PROJ_CRS = 'EPSG:31370'
PROJ_CRS= 'EPSG:3035'
connection_type = "cross-border connected"

In [28]:
def load_datasets(path):
    """
    Loads the data from the given path,
    and prints the shape and crs of the data.
    """
    data = gpd.read_file(path)
    data = data.drop_duplicates(subset=["geometry"]).dropna(subset='geometry').reset_index(drop=True)
    print(data.shape)
    # data_crs = data.crs
    data = data.to_crs(PROJ_CRS)
    print("Project crs:", data.crs)
    return data

In [34]:
from_region_nodes = config.data_dest / 'vl_nodes_PROCESSED.shp'
from_region_border = config.data_src / 'BE_boundaries/flanders.shp'

to_region_nodes = config.data_dest / 'BXL_waternodesPROCESSED.shp'
to_region_border = config.data_src / 'BE_boundaries/bruxelles.shp'

# bxl_border = config.data_src / 'BE_boundaries/bruxelles.shp'
# wal_nodes = config.data_dest / 'wal_waternodesPROCESSED.shp'
# wal_border = config.data_src / 'BE_boundaries/wallonie.shp'

In [35]:
df1 = load_datasets(from_region_nodes) #, PROJ_CRS) #wal_points
df2 = load_datasets(to_region_nodes)#, PROJ_CRS) #vl_points

dataset1_border = load_datasets(from_region_border) #, PROJ_CRS)
dataset2_border = load_datasets(to_region_border) #, PROJ_CRS)

(69760, 6)
Project crs: EPSG:3035
(90, 8)
Project crs: EPSG:3035
(1, 13)
Project crs: EPSG:3035
(1, 13)
Project crs: EPSG:3035


In [36]:
buffer_length = 50
dataset1_buffer = dataset1_border.buffer(buffer_length)
dataset2_buffer = dataset2_border.buffer(buffer_length)

#intersection of the two buffers
buffer_intersection = dataset1_buffer.intersection(dataset2_buffer)

#extract the point data within the border buffer strip
df1_points = df1.clip(buffer_intersection)
df2_points = df2.clip(buffer_intersection)

In [37]:
network_conn = (gpd.sjoin_nearest(df1_points, df2_points)
                    .merge(df2[["node_id", "geometry"]], left_on="node_id_right", right_on="node_id", how="left"))

In [40]:
network_conn.head(3)

Unnamed: 0,node_id_left,source_left,sewernode__left,STATUS,LBLTYPE,geometry_x,index_right,node_id_right,source_right,sewernode__right,dcpCode,dcpName,dcpWaterBo,dcpState,node_id,geometry_y
0,VL_HN96,water_node,,,,POINT (3930334.589 3097492.622),20,NO25019967_1,water_node,,,,,,NO25019967_1,POINT (3930341.556 3097478.045)
1,VL_HN22596,water_node,,,,POINT (3930317.133 3097533.810),20,NO25019967_1,water_node,,,,,,NO25019967_1,POINT (3930341.556 3097478.045)
2,VL_HN449,water_node,,,,POINT (3923343.254 3088880.041),0,NO25019898,water_node,,,,,,NO25019898,POINT (3921612.451 3089238.350)


In [41]:
def make_connection_lines(df, from_point, to_point):
    lines = []
    for index, row in df.iterrows():
        p_1 = Point(row[from_point])
        p_2 = Point(row[to_point])
        intersect = LineString([p_1, p_2])
        # linestring = loads(intersect)
        lines.append(intersect)
    return lines

network_conn['connection_lines'] = make_connection_lines(network_conn, 'geometry_x', 'geometry_y')
network_conn.head(2)

Unnamed: 0,node_id_left,source_left,sewernode__left,STATUS,LBLTYPE,geometry_x,index_right,node_id_right,source_right,sewernode__right,dcpCode,dcpName,dcpWaterBo,dcpState,node_id,geometry_y,connection_lines
0,VL_HN96,water_node,,,,POINT (3930334.589 3097492.622),20,NO25019967_1,water_node,,,,,,NO25019967_1,POINT (3930341.556 3097478.045),LINESTRING (3930334.5894555715 3097492.6217700...
1,VL_HN22596,water_node,,,,POINT (3930317.133 3097533.810),20,NO25019967_1,water_node,,,,,,NO25019967_1,POINT (3930341.556 3097478.045),LINESTRING (3930317.1331213666 3097533.8100565...


In [42]:
connection_links = gpd.GeoDataFrame(network_conn[["node_id_left", "node_id_right", "connection_lines"]]
                    .rename(columns={"node_id_left":"idElement1", "node_id_right":"idElement2", "connection_lines":"geometry"})
                    , geometry='geometry')

In [43]:
connection_links

Unnamed: 0,idElement1,idElement2,geometry
0,VL_HN96,NO25019967_1,"LINESTRING (3930334.589 3097492.622, 3930341.5..."
1,VL_HN22596,NO25019967_1,"LINESTRING (3930317.133 3097533.810, 3930341.5..."
2,VL_HN449,NO25019898,"LINESTRING (3923343.254 3088880.041, 3921612.4..."
3,VL_HN252,NO25019898,"LINESTRING (3923245.465 3088896.135, 3921612.4..."
4,VL_HN65319,NO25019898,"LINESTRING (3923026.186 3088963.901, 3921612.4..."
5,VL_HN68564,NO25019898,"LINESTRING (3922243.508 3088977.574, 3921612.4..."
6,VL_HN248,NO25019898,"LINESTRING (3921564.375 3089332.533, 3921612.4..."
7,VL_HN273,NO25019898,"LINESTRING (3921563.900 3089332.930, 3921612.4..."
8,VL_HN448,NO25019898,"LINESTRING (3921530.491 3089436.167, 3921612.4..."
9,VL_HN306,NO25020625,"LINESTRING (3928584.515 3103865.457, 3928586.4..."


In [45]:
connection_links = connection_links.set_crs(PROJ_CRS)
# connection_links = connection_links.to_crs(FINAL_CRS)

In [47]:
print(connection_links.crs)

EPSG:3035


In [48]:
import uuid
connection_links['UUID'] = [uuid.uuid4().hex for _ in range(len(connection_links.index))]

connection_links['watercourse_namespace'] = "gopeg.eu/tracing"
connection_links['connectionType'] = connection_type

In [49]:
connection_links['fictitious'] = 'true'

In [50]:
connection_links.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 28 entries, 0 to 27
Data columns (total 7 columns):
 #   Column                 Non-Null Count  Dtype   
---  ------                 --------------  -----   
 0   idElement1             28 non-null     object  
 1   idElement2             28 non-null     object  
 2   geometry               28 non-null     geometry
 3   UUID                   28 non-null     object  
 4   watercourse_namespace  28 non-null     object  
 5   connectionType         28 non-null     object  
 6   fictitious             28 non-null     object  
dtypes: geometry(1), object(6)
memory usage: 1.8+ KB


In [25]:
connection_links.to_file("harmonized_data/NetworkConnections.gpkg", layer=f"{from_region}_{to_region}_connections", driver='GPKG')