# Point Locations of Interest

Locations of interest refer to any phonomenon occuring along the networks that have the potential to affect or be affected by pollution.

The locations of interest have surface geometry, either point or polygon.

This notebook develops the methodology for point locations of interest.

In [2]:
import os
import sys
path = os.path.dirname(os.path.abspath(''))
os.chdir(path)
print(path)

c:\Workdir\Develop\repository\go-peg


In [3]:
import geopandas as gpd
import pandas as pd

from shapely.geometry import Point, LineString, MultiLineString, MultiPoint
from shapely import wkt
from shapely.ops import nearest_points
import shapely.wkt

import numpy as np 

import warnings
from shapely.errors import ShapelyDeprecationWarning
warnings.filterwarnings("ignore", category=ShapelyDeprecationWarning) 

pd.options.mode.chained_assignment = None  # default='warn'

from src.config import config

In [4]:
# config.data_src

**Declare global variables**

In [5]:
PROJ_CRS = 'EPSG:31370'
FINAL_CRS = 'EPSG:3035'

buffer_distance = 200
country='BE'

object_type = 'Production and Industrial site'

**Load the processed water dataset and the object of interest dataset**

In [6]:
def load_data(path):
    """
    Loads the data from the given path, 
    and prints the shape and crs of the data.
    """
    data = gpd.read_file(path)
    print(data.shape)

    data_crs = data.crs

    print("Data crs:", data.crs)
    return data, data_crs

In [7]:
PATH = config.data_dest / "vl_water_PROCESSED.shp"
water, data_crs = load_data(PATH)

(72069, 21)
Data crs: EPSG:31370


In [8]:
PROJ_CRS = data_crs

In [9]:
PATH = config.data_src / "flanders_locations/Production and industrial facilities/ProductionInstallation_points.shp"
prod_installations, data_crs = load_data(PATH)
prod_installations = prod_installations.to_crs(PROJ_CRS)
print('Project crs:', prod_installations.crs)

(1962, 9)
Data crs: EPSG:3857
Project crs: EPSG:31370


In [10]:
prod_installations.head(2)

Unnamed: 0,gml_id,identifier,name,localId,namespace,status,type,dist,geometry
0,,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,Van Looveren Leo,BE.VL.000000416.INSTALLATION,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,,,,POINT (174053.026 229391.163)
1,,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,Kela,BE.VL.000000132.INSTALLATION,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,,,,POINT (175372.287 230904.830)


## Working with point locations of interest

For working with point locations of interests, we will project a point to the nearest water geometry, applying a threshold distance to exclude points that are too far away from the nearest water geometry.

**Select relevant columns from the object of interest dataset, which includes identifiers and geometry**

In [11]:
prod_points = prod_installations[['identifier', 'name', 'localId', 'namespace', 'geometry']]
prod_points.head(2)
prod_points.shape

(1962, 5)

**Select relevant columns from the water dataset**

In [12]:
#Check for multiline strings in a dataset
def check_multiline(df):
    """This function checks for multiline strings
        from the geometry column in a given dataset"""
    lst = df['geometry'].to_list()
    multiline_count = 0
    for item in lst:
        if isinstance(item, MultiLineString):
            multiline_count += 1
    print("MultiLinesStrings:" , multiline_count)

check_multiline(water)   

MultiLinesStrings: 0


In [13]:
# water_df = water[['VHAS', 'NAAM', 'start_ID', 'end_ID', 'geometry']]
water_df = water[['line_id', 'line_name', 'basin', 'geometry']]

In [14]:
assert water_df.shape[0] == water_df.geometry.nunique()

**Perform an sjoin to get the two dataset properties into one dataset**

In [15]:
gdf_p = prod_points.copy()
gdf_l = water_df.copy()


df_n = (gpd.sjoin_nearest(gdf_p, gdf_l)
            .merge(gdf_l['geometry'], left_on="index_right", right_index=True)
            .drop(columns=['index_right'])
            .rename(columns={'index_left': 'ID', 'geometry_x':'point_geom', 'geometry_y':'line_geom'})
            .reset_index(drop=True)
            ) #merge operation adds the geometry column
            
#get distance of location of interest from water. With this distance we can filter out locations by distance from water
# df_n["distance"] = df_n.apply(lambda r: r["geometry_x"].distance(r["geometry_y"]), axis=1)
df_n = df_n.drop_duplicates(subset=['point_geom'])
assert df_n['point_geom'].isnull().values.any() == False
assert df_n['line_geom'].isnull().values.any() == False

Ensure the linestring geometry is not multilinestring

In [16]:
def multiline_to_linestring_col(df, geom_col):
    linestrings = []
    for idx, row in df.iterrows():
        if isinstance(row[geom_col], LineString):
            linestrings.append(row[geom_col])
        elif isinstance(row[geom_col], MultiLineString):
            inlines = row[geom_col]
            outcoords = [list(item.coords) for item in inlines]
            outline = shapely.geometry.LineString(
                [i for sublist in outcoords for i in sublist])
            linestrings.append(outline)
    return linestrings

df_n['line_geom'] = multiline_to_linestring_col(df_n, 'line_geom')

In [17]:
df_n.head(2)

Unnamed: 0,identifier,name,localId,namespace,point_geom,line_id,line_name,basin,line_geom
0,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,Van Looveren Leo,BE.VL.000000416.INSTALLATION,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,POINT (174053.026 229391.163),7007018_2,Laboureurloop,Maas,LINESTRING (174218.4779399991 229321.029899978...
1,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,Kela,BE.VL.000000132.INSTALLATION,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,POINT (175372.287 230904.830),6801187,Raamloop,Maas,LINESTRING (175886.86194000248 230719.80349997...


In [18]:
def project_point_to_line(df):
    nearest_geoms = []
    for i, row in df.iterrows():
        nearest_point = nearest_points(row['line_geom'], row['point_geom'])
        point = nearest_point[0]
        nearest_geoms.append(point)
    return nearest_geoms

df_n['nearest_point'] = project_point_to_line(df_n)
type(df_n['nearest_point'][0])

shapely.geometry.point.Point

In [19]:
df_n.head(2)

Unnamed: 0,identifier,name,localId,namespace,point_geom,line_id,line_name,basin,line_geom,nearest_point
0,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,Van Looveren Leo,BE.VL.000000416.INSTALLATION,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,POINT (174053.026 229391.163),7007018_2,Laboureurloop,Maas,LINESTRING (174218.4779399991 229321.029899978...,POINT (174186.72303999812 229374.21319997776)
1,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,Kela,BE.VL.000000132.INSTALLATION,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,POINT (175372.287 230904.830),6801187,Raamloop,Maas,LINESTRING (175886.86194000248 230719.80349997...,POINT (175811.0361400008 230893.71169997938)


In [20]:
# test_df = gpd.GeoDataFrame((df_n[['line_id', 'nearest_geoms']].rename(columns={'nearest_geoms':'geometry'})), geometry='geometry')
# test_df = test_df.set_crs(PROJ_CRS)
# test_df.to_file('data/test_data/links_test.shp')

**Filter out objects of interest by distance from network**

In [21]:
# Filret objects of interest by distance
df_n["distance"] = df_n.apply(lambda r: r["point_geom"].distance(r["nearest_point"]), axis=1)

df_filtered = df_n[df_n['distance'] < buffer_distance].reset_index(drop=True)
print(df_filtered.shape)

(1055, 11)


**Insert the nearest geoms into the linestring before making connection lines**

In [22]:
df_filtered.columns

Index(['identifier', 'name', 'localId', 'namespace', 'point_geom', 'line_id',
       'line_name', 'basin', 'line_geom', 'nearest_point', 'distance'],
      dtype='object')

**get new linestrings with added new point vertex**

In [23]:
## This is the function that works
def insert_coordinates():
    linestrings = []
    for row in df_filtered.iterrows():
        line = row[1]['line_geom']
        point = row[1]['nearest_point']

        min_dist = float('inf')
        for i, coord in enumerate(line.coords[:-1]):
            dist = LineString([coord, line.coords[i+1]]).distance(point)
            if dist < min_dist:
                min_dist = dist
                index = i + 1

        # Insert the new vertex into the LineString geometry
        new_coords = list(line.coords)
        # print(len(new_coords))
        new_coords.insert(index, point.coords[0])
        new_line = LineString(new_coords)
        linestrings.append(new_line)

    return linestrings

In [24]:
df_filtered['new_line_geom'] = insert_coordinates()

**Make connection lines from objects of interest to the point in the new line**

In [25]:
df_n.columns

Index(['identifier', 'name', 'localId', 'namespace', 'point_geom', 'line_id',
       'line_name', 'basin', 'line_geom', 'nearest_point', 'distance'],
      dtype='object')

In [26]:
def make_connection_lines(df, from_point, to_point):
    lines = []
    for index, row in df.iterrows():
        p_1 = Point(row[from_point])
        p_2 = Point(row[to_point])
        intersect = LineString([p_1, p_2])
        # linestring = loads(intersect)
        lines.append(intersect)
    return lines

df_filtered['connection_lines'] = make_connection_lines(df_filtered, 'point_geom', 'nearest_point')
df_filtered.head(2)

Unnamed: 0,identifier,name,localId,namespace,point_geom,line_id,line_name,basin,line_geom,nearest_point,distance,new_line_geom,connection_lines
0,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,Van Looveren Leo,BE.VL.000000416.INSTALLATION,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,POINT (174053.026 229391.163),7007018_2,Laboureurloop,Maas,LINESTRING (174218.4779399991 229321.029899978...,POINT (174186.72303999812 229374.21319997776),134.766839,LINESTRING (174218.4779399991 229321.029899978...,LINESTRING (174053.02636049603 229391.16308776...
1,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,Swaegers Slachthuis,BE.VL.000000186.INSTALLATION,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,POINT (176099.703 231237.668),6801187,Raamloop,Maas,LINESTRING (175886.86194000248 230719.80349997...,POINT (175991.44697913088 231201.23392304804),114.222335,LINESTRING (175886.86194000248 230719.80349997...,LINESTRING (176099.70271834361 231237.66792792...


In [27]:
df_n.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1962 entries, 0 to 2035
Data columns (total 11 columns):
 #   Column         Non-Null Count  Dtype   
---  ------         --------------  -----   
 0   identifier     1962 non-null   object  
 1   name           1962 non-null   object  
 2   localId        1962 non-null   object  
 3   namespace      1962 non-null   object  
 4   point_geom     1962 non-null   geometry
 5   line_id        1962 non-null   object  
 6   line_name      1485 non-null   object  
 7   basin          1962 non-null   object  
 8   line_geom      1962 non-null   object  
 9   nearest_point  1962 non-null   object  
 10  distance       1962 non-null   float64 
dtypes: float64(1), geometry(1), object(9)
memory usage: 248.5+ KB


In [28]:
type(df_filtered['new_line_geom'][0])

shapely.geometry.linestring.LineString

**Calculate distance on network segment where the object of interest is referenced**

In [29]:
df_filtered.head(2)

Unnamed: 0,identifier,name,localId,namespace,point_geom,line_id,line_name,basin,line_geom,nearest_point,distance,new_line_geom,connection_lines
0,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,Van Looveren Leo,BE.VL.000000416.INSTALLATION,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,POINT (174053.026 229391.163),7007018_2,Laboureurloop,Maas,LINESTRING (174218.4779399991 229321.029899978...,POINT (174186.72303999812 229374.21319997776),134.766839,LINESTRING (174218.4779399991 229321.029899978...,LINESTRING (174053.02636049603 229391.16308776...
1,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,Swaegers Slachthuis,BE.VL.000000186.INSTALLATION,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,POINT (176099.703 231237.668),6801187,Raamloop,Maas,LINESTRING (175886.86194000248 230719.80349997...,POINT (175991.44697913088 231201.23392304804),114.222335,LINESTRING (175886.86194000248 230719.80349997...,LINESTRING (176099.70271834361 231237.66792792...


In [59]:
df_filtered.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1055 entries, 0 to 1054
Data columns (total 16 columns):
 #   Column                 Non-Null Count  Dtype   
---  ------                 --------------  -----   
 0   identifier             1055 non-null   object  
 1   name                   1055 non-null   object  
 2   localId                1055 non-null   object  
 3   namespace              1055 non-null   object  
 4   point_geom             1055 non-null   geometry
 5   line_id                1055 non-null   object  
 6   line_name              758 non-null    object  
 7   basin                  1055 non-null   object  
 8   line_geom              1055 non-null   object  
 9   nearest_point          1055 non-null   object  
 10  distance               1055 non-null   float64 
 11  new_line_geom          1055 non-null   object  
 12  connection_lines       1055 non-null   object  
 13  atPosition             1055 non-null   float64 
 14  OOI_type               1055 non-null   o

In [60]:
val = 134.766839
round_val = round(val, 2)
round_val

134.77

In [63]:
# def get_ref_distance(df, line_geom, point_geom):
distances = []
for row in df_filtered.iterrows():
    dist = row[1]['new_line_geom'].project(row[1]['nearest_point'])
    distances.append(round(dist, 3))

df_filtered['atPosition'] = distances

In [64]:
df_filtered.head(2)

Unnamed: 0,identifier,name,localId,namespace,point_geom,line_id,line_name,basin,line_geom,nearest_point,distance,new_line_geom,connection_lines,atPosition,OOI_type,watercourse_namespace
0,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,Van Looveren Leo,BE.VL.000000416.INSTALLATION,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,POINT (174053.026 229391.163),7007018_2,Laboureurloop,Maas,LINESTRING (174218.4779399991 229321.029899978...,POINT (174186.72303999812 229374.21319997776),134.766839,LINESTRING (174218.4779399991 229321.029899978...,LINESTRING (174053.02636049603 229391.16308776...,61.991,Production and Industrial site,gopeg.eu/tracing
1,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,Swaegers Slachthuis,BE.VL.000000186.INSTALLATION,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,POINT (176099.703 231237.668),6801187,Raamloop,Maas,LINESTRING (175886.86194000248 230719.80349997...,POINT (175991.44697913088 231201.23392304804),114.222335,LINESTRING (175886.86194000248 230719.80349997...,LINESTRING (176099.70271834361 231237.66792792...,704.589,Production and Industrial site,gopeg.eu/tracing


**Add information about the type of objct of interest**

In [65]:
object_type

'Production and Industrial site'

In [66]:
def create_ooi_type(object_type):
    ooi_type = object_type
    return ooi_type

df_filtered['OOI_type'] = create_ooi_type(object_type)

**Add the namespace of the network**

In [44]:
def create_watercourse_namespace(country):
    namespace = 'gopeg.eu/tracing'
    return namespace

df_filtered['watercourse_namespace'] = create_watercourse_namespace(country)

**Create a df with the final columns**

In [69]:
df_links = (df_filtered.rename(columns={'connection_lines':'geometry',
                                'identifier':'OOI_identifier', 
                                 'name': 'OOI_name', 
                                 'localId': 'OOI_localId', 
                                 'namespace': 'OOI_namespace',
                                 'line_id': 'hydroId',
                                 'line_name': 'watercourse_localName',
                                 'basin': 'watercourseBasin'})
                                .reset_index(drop=True))

**Create unique id using UUID**

In [70]:
import uuid
df_links['UUID'] = [uuid.uuid4().hex for _ in range(len(df_links.index))]

In [71]:
df_links.head(2)

Unnamed: 0,OOI_identifier,OOI_name,OOI_localId,OOI_namespace,point_geom,hydroId,watercourse_localName,watercourseBasin,line_geom,nearest_point,distance,new_line_geom,geometry,atPosition,OOI_type,watercourse_namespace,UUID
0,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,Van Looveren Leo,BE.VL.000000416.INSTALLATION,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,POINT (174053.026 229391.163),7007018_2,Laboureurloop,Maas,LINESTRING (174218.4779399991 229321.029899978...,POINT (174186.72303999812 229374.21319997776),134.766839,LINESTRING (174218.4779399991 229321.029899978...,LINESTRING (174053.02636049603 229391.16308776...,61.991,Production and Industrial site,gopeg.eu/tracing,8bdbaa46c4e842dca4e1432b7337a889
1,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,Swaegers Slachthuis,BE.VL.000000186.INSTALLATION,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,POINT (176099.703 231237.668),6801187,Raamloop,Maas,LINESTRING (175886.86194000248 230719.80349997...,POINT (175991.44697913088 231201.23392304804),114.222335,LINESTRING (175886.86194000248 230719.80349997...,LINESTRING (176099.70271834361 231237.66792792...,704.589,Production and Industrial site,gopeg.eu/tracing,103548aba57449df8b8022859fc333c8


In [72]:
df_links.columns

Index(['OOI_identifier', 'OOI_name', 'OOI_localId', 'OOI_namespace',
       'point_geom', 'hydroId', 'watercourse_localName', 'watercourseBasin',
       'line_geom', 'nearest_point', 'distance', 'new_line_geom', 'geometry',
       'atPosition', 'OOI_type', 'watercourse_namespace', 'UUID'],
      dtype='object')

In [73]:
type(df_links['geometry'][0])

shapely.geometry.linestring.LineString

In [77]:
cols = ['UUID', 'OOI_type', 'OOI_identifier', 'OOI_name', 'OOI_localId', 'OOI_namespace', 'hydroId', 'atPosition', 'watercourse_namespace', 'geometry']

gdf_links = gpd.GeoDataFrame((df_links[cols]), geometry='geometry')

gdf_links= gdf_links.set_crs(PROJ_CRS)
gdf_links_final = gdf_links.to_crs(FINAL_CRS)

In [78]:
gdf_links_final.head(2)

Unnamed: 0,UUID,OOI_type,OOI_identifier,OOI_name,OOI_localId,OOI_namespace,hydroId,atPosition,watercourse_namespace,geometry
0,8bdbaa46c4e842dca4e1432b7337a889,Production and Industrial site,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,Van Looveren Leo,BE.VL.000000416.INSTALLATION,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,7007018_2,61.991,gopeg.eu/tracing,"LINESTRING (3953305.814 3153635.427, 3953437.7..."
1,103548aba57449df8b8022859fc333c8,Production and Industrial site,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,Swaegers Slachthuis,BE.VL.000000186.INSTALLATION,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,6801187,704.589,gopeg.eu/tracing,"LINESTRING (3955487.242 3155319.599, 3955376.5..."


In [42]:
pwd

'c:\\Workdir\\Develop\\repository\\go-peg'

In [76]:
gdf_links_final.to_file("harmonized_data/VL_ObjectsOfInterest.gpkg", layer="points_links", driver='GPKG')