# Point Locations of Interest

Locations of interest refer to any phonomenon occuring along the networks that have the potential to affect or be affected by pollution.

The locations of interest have surface geometry, either point or polygon.

This notebook develops the methodology for point locations of interest.

In [1]:
import os
import sys
path = os.path.dirname(os.path.abspath(''))
os.chdir(path)
print(path)

c:\Workdir\Develop\repository\go-peg


In [2]:
import geopandas as gpd
import pandas as pd

from shapely.geometry import Point, LineString, MultiLineString, MultiPoint
from shapely import wkt
from shapely.ops import nearest_points
import shapely.wkt

import numpy as np 

import warnings
from shapely.errors import ShapelyDeprecationWarning
warnings.filterwarnings("ignore", category=ShapelyDeprecationWarning) 

pd.options.mode.chained_assignment = None  # default='warn'

from src.config import config



In [3]:
config.data_src

WindowsPath('c:/Workdir/Develop/repository/go-peg/data/data_preprocess')

In [4]:
PROJ_CRS = 'EPSG:31370'
FINAL_CRS = 'EPSG:3035'

buffer_distance = 200
country='BE'

object_type = 'Production and Industrial site'

In [5]:
def load_data(path):
    """
    Loads the data from the given path, 
    and prints the shape and crs of the data.
    """
    data = gpd.read_file(path)
    print(data.shape)

    data_crs = data.crs

    print("Data crs:", data.crs)
    return data, data_crs

In [6]:
PATH = config.data_dest / "vl_water_PROCESSED.shp"
water, data_crs = load_data(PATH)

(72163, 21)
Data crs: epsg:31370


In [7]:
PROJ_CRS = data_crs

In [8]:
PATH = config.data_src / "flanders_locations/Production and industrial facilities/ProductionInstallation_points.shp"
prod_installations, data_crs = load_data(PATH)
prod_installations = prod_installations.to_crs(PROJ_CRS)
print('Project crs:', prod_installations.crs)

(1962, 9)
Data crs: epsg:3857
Project crs: epsg:31370


In [9]:
prod_installations.head(2)

Unnamed: 0,gml_id,identifier,name,localId,namespace,status,type,dist,geometry
0,,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,Van Looveren Leo,BE.VL.000000416.INSTALLATION,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,,,,POINT (174053.026 229391.163)
1,,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,Kela,BE.VL.000000132.INSTALLATION,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,,,,POINT (175372.287 230904.830)


In [10]:
water.columns

Index(['start_ID', 'end_ID', 'line_id', 'OIDN', 'UIDN', 'VHAG', 'line_name',
       'REGCODE', 'REGCODE1', 'BEHEER', 'CATC', 'category', 'BEKNR', 'BEKNAAM',
       'basin', 'GEO', 'LBLGEO', 'VHAZONENR', 'WTRLICHC', 'length',
       'geometry'],
      dtype='object')

## Working with point locations of interest

For working with point locations of interests, we will project a point to the nearest water geometry, applying a threshold distance to exclude points that are too far away from the nearest water geometry.

**Select relevant columns from the object of interest dataset, which includes identifiers and geometry**

In [11]:
prod_points = prod_installations[['identifier', 'name', 'localId', 'namespace', 'geometry']]
prod_points.head(2)
prod_points.shape

(1962, 5)

In [12]:
#Check for multiline strings in a dataset
def check_multiline(df):
    """This function checks for multiline strings
        from the geometry column in a given dataset"""
    lst = df['geometry'].to_list()
    multiline_count = 0
    for item in lst:
        if isinstance(item, MultiLineString):
            multiline_count += 1
    print("MultiLinesStrings:" , multiline_count)

In [13]:
check_multiline(water)

MultiLinesStrings: 0


In [14]:
water.columns

Index(['start_ID', 'end_ID', 'line_id', 'OIDN', 'UIDN', 'VHAG', 'line_name',
       'REGCODE', 'REGCODE1', 'BEHEER', 'CATC', 'category', 'BEKNR', 'BEKNAAM',
       'basin', 'GEO', 'LBLGEO', 'VHAZONENR', 'WTRLICHC', 'length',
       'geometry'],
      dtype='object')

**Select relevant columns from the water dataset**

In [15]:
# water_df = water[['VHAS', 'NAAM', 'start_ID', 'end_ID', 'geometry']]
water_df = water[['line_id', 'line_name', 'basin', 'geometry']]

In [16]:
assert water_df.shape[0] == water_df.geometry.nunique()

**Add a namespace of the water data before merging the datasets**

In [17]:
def create_watercourse_namespace(country):
    namespace = country + '.Tracing.HydroNetwork'
    return namespace

water_df['watercourse_namespace'] = create_watercourse_namespace(country)

In [18]:
gdf_p = prod_points.copy()
gdf_l = water_df.copy()


df_n = (gpd.sjoin_nearest(gdf_p, gdf_l)
            .merge(gdf_l['geometry'], left_on="index_right", right_index=True)
            .drop(columns=['index_right'])
            .rename(columns={'index_left': 'ID'})
            .reset_index(drop=True)
            ) #merge operation adds the geometry column
            
#get distance of location of interest from water. With this distance we can filter out locations by distance from water
# df_n["distance"] = df_n.apply(lambda r: r["geometry_x"].distance(r["geometry_y"]), axis=1)
df_n = df_n.drop_duplicates(subset=['geometry_x'])
assert df_n['geometry_x'].isnull().values.any() == False
assert df_n['geometry_y'].isnull().values.any() == False

In [19]:
df_n.head(2)

Unnamed: 0,identifier,name,localId,namespace,geometry_x,line_id,line_name,basin,watercourse_namespace,geometry_y
0,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,Van Looveren Leo,BE.VL.000000416.INSTALLATION,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,POINT (174053.026 229391.163),7007018_2,Laboureurloop,Maas,BE.Tracing.HydroNetwork,"LINESTRING (174218.478 229321.030, 174211.910 ..."
1,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,Kela,BE.VL.000000132.INSTALLATION,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,POINT (175372.287 230904.830),6801187,Raamloop,Maas,BE.Tracing.HydroNetwork,"LINESTRING (175886.862 230719.803, 175878.810 ..."


In [20]:
def get_nearest_point(df, line_col, point_col):
    """
    For each point in points_df, find the nearest point in lines_df.
    This identifies the projected point on the water network, from a location of interest point.
    """
    indexes = []
    geoms = []
    for idx, row in df.iterrows():
        # if row['distance'] > buffer_distance:
        destinations = MultiPoint(row[line_col].coords) #geometry_y
        nearest_geoms = nearest_points(row[point_col], destinations) #geometry_x
        try:
            for coord in destinations:
                if coord == nearest_geoms[1]:
                    geoms.append(coord)
                    indexes.append(idx)
                if idx in indexes:
                    break
                    #geoms.append(coord)
                    #indexes.append(idx)
        except ValueError:
            print("No nearest point found for {}".format(row.point_col))
    return geoms
    #return zip(indexes, geoms)

In [21]:
df_n['loc_nodes'] = get_nearest_point(df_n, 'geometry_y', 'geometry_x')

df_n["distance"] = df_n.apply(lambda r: r["geometry_x"].distance(r["loc_nodes"]), axis=1)

gdf_n = gpd.GeoDataFrame(df_n, geometry='loc_nodes').set_crs(PROJ_CRS)  #.drop(['geometry_x'], axis=1)

In [22]:
gdf_filtered = gdf_n[gdf_n['distance'] < buffer_distance].reset_index(drop=True)
print(gdf_filtered.shape)
# gdf_filtered.head()

(1032, 12)


In [23]:
def make_connection_lines(df, from_point, to_point):
    lines = []
    for index, row in df.iterrows():
        p_1 = Point(row[from_point])
        p_2 = Point(row[to_point])
        intersect = LineString([p_1, p_2])
        # linestring = loads(intersect)
        lines.append(intersect)
    return lines

gdf_filtered['connection_lines'] = make_connection_lines(gdf_filtered, 'loc_nodes', 'geometry_x')
gdf_filtered.head(2)

Unnamed: 0,identifier,name,localId,namespace,geometry_x,line_id,line_name,basin,watercourse_namespace,geometry_y,loc_nodes,distance,connection_lines
0,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,Van Looveren Leo,BE.VL.000000416.INSTALLATION,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,POINT (174053.026 229391.163),7007018_2,Laboureurloop,Maas,BE.Tracing.HydroNetwork,"LINESTRING (174218.478 229321.030, 174211.910 ...",POINT (174186.723 229374.213),134.766839,LINESTRING (174186.72303999812 229374.21319997...
1,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,Swaegers Slachthuis,BE.VL.000000186.INSTALLATION,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,POINT (176099.703 231237.668),6801187,Raamloop,Maas,BE.Tracing.HydroNetwork,"LINESTRING (175886.862 230719.803, 175878.810 ...",POINT (175999.420 231177.544),116.925285,LINESTRING (175999.4199400008 231177.543999978...


In [24]:
# def get_ref_distance(df, line_geom, point_geom):
distances = []
for row in gdf_filtered.iterrows():
    dist = row[1]['geometry_y'].project(row[1]['loc_nodes'])
    distances.append(dist)

gdf_filtered['atPosition'] = distances

In [25]:
gdf_filtered.head(2)

Unnamed: 0,identifier,name,localId,namespace,geometry_x,line_id,line_name,basin,watercourse_namespace,geometry_y,loc_nodes,distance,connection_lines,atPosition
0,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,Van Looveren Leo,BE.VL.000000416.INSTALLATION,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,POINT (174053.026 229391.163),7007018_2,Laboureurloop,Maas,BE.Tracing.HydroNetwork,"LINESTRING (174218.478 229321.030, 174211.910 ...",POINT (174186.723 229374.213),134.766839,LINESTRING (174186.72303999812 229374.21319997...,61.991412
1,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,Swaegers Slachthuis,BE.VL.000000186.INSTALLATION,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,POINT (176099.703 231237.668),6801187,Raamloop,Maas,BE.Tracing.HydroNetwork,"LINESTRING (175886.862 230719.803, 175878.810 ...",POINT (175999.420 231177.544),116.925285,LINESTRING (175999.4199400008 231177.543999978...,679.593676


In [26]:
object_type

'Production and Industrial site'

In [27]:
def create_ooi_type(object_type):
    ooi_type = object_type
    return ooi_type

gdf_filtered['OOI_type'] = create_ooi_type(object_type)

Create unique id using UUID

In [28]:
# gdf_filtered_copy = gdf_filtered.copy()

In [29]:
# import uuid
# gdf_filtered['UUID'] = [uuid.uuid4().hex for _ in range(len(gdf_filtered.index))]

In [30]:
gdf_filtered.head(3)

Unnamed: 0,identifier,name,localId,namespace,geometry_x,line_id,line_name,basin,watercourse_namespace,geometry_y,loc_nodes,distance,connection_lines,atPosition,OOI_type
0,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,Van Looveren Leo,BE.VL.000000416.INSTALLATION,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,POINT (174053.026 229391.163),7007018_2,Laboureurloop,Maas,BE.Tracing.HydroNetwork,"LINESTRING (174218.478 229321.030, 174211.910 ...",POINT (174186.723 229374.213),134.766839,LINESTRING (174186.72303999812 229374.21319997...,61.991412,Production and Industrial site
1,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,Swaegers Slachthuis,BE.VL.000000186.INSTALLATION,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,POINT (176099.703 231237.668),6801187,Raamloop,Maas,BE.Tracing.HydroNetwork,"LINESTRING (175886.862 230719.803, 175878.810 ...",POINT (175999.420 231177.544),116.925285,LINESTRING (175999.4199400008 231177.543999978...,679.593676,Production and Industrial site
2,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,Prat en Naessens Kathy,BE.VL.000001358.INSTALLATION,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,POINT (75447.466 177601.520),6020079,Hulstebeek,Schelde,BE.Tracing.HydroNetwork,"LINESTRING (74540.863 177195.750, 74550.111 17...",POINT (75401.452 177659.270),73.83987,LINESTRING (75401.45233999788 177659.269999979...,1287.816151,Production and Industrial site


In [31]:
gdf_links = gpd.GeoDataFrame((gdf_filtered.drop(['geometry_y', 'geometry_x', 'loc_nodes'], axis=1)
                        .rename(columns={'connection_lines':'geometry'})
                        ), geometry='geometry', crs=PROJ_CRS)

# gdf_links= gdf_links.set_crs(PROJ_CRS)
gdf_links_final = gdf_links.to_crs(FINAL_CRS)

In [32]:
gdf_links_final = (gdf_links_final.rename(columns={'identifier':'OOI_identifier', 
                                 'name': 'OOI_name', 
                                 'localId': 'OOI_localId', 
                                 'namespace': 'OOI_namespace',
                                 'line_id': 'hydroId',
                                 'line_name': 'watercourse_localName',
                                 'basin': 'watercourseBasin'})
                # .query(f'distance < {buffer_distance}')
                                .reset_index(drop=True))

In [33]:
import uuid
gdf_links_final['UUID'] = [uuid.uuid4().hex for _ in range(len(gdf_links_final.index))]

In [34]:
gdf_links_final.head()

Unnamed: 0,OOI_identifier,OOI_name,OOI_localId,OOI_namespace,hydroId,watercourse_localName,watercourseBasin,watercourse_namespace,distance,geometry,atPosition,OOI_type,UUID
0,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,Van Looveren Leo,BE.VL.000000416.INSTALLATION,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,7007018_2,Laboureurloop,Maas,BE.Tracing.HydroNetwork,134.766839,"LINESTRING (3953437.743 3153608.300, 3953305.8...",61.991412,Production and Industrial site,805f2c3ca0304f7eb280de4b176dfbf7
1,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,Swaegers Slachthuis,BE.VL.000000186.INSTALLATION,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,6801187,Raamloop,Maas,BE.Tracing.HydroNetwork,116.925285,"LINESTRING (3955382.677 3155267.444, 3955487.2...",679.593676,Production and Industrial site,360eb13147d944e0b0d40b99c3465b57
2,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,Prat en Naessens Kathy,BE.VL.000001358.INSTALLATION,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,6020079,Hulstebeek,Schelde,BE.Tracing.HydroNetwork,73.83987,"LINESTRING (3851030.001 3109631.647, 3851071.4...",1287.816151,Production and Industrial site,69f346b39dc440f9b3834e2223c133ac
3,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,Daelken,BE.VL.000001359.INSTALLATION,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,6002393,,Brugse Polders,BE.Tracing.HydroNetwork,55.96754,"LINESTRING (3838853.910 3146430.782, 3838854.5...",565.642033,Production and Industrial site,494c320f737047b18e2350d219ac8af1
4,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,Proot Bart,BE.VL.000001360.INSTALLATION,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,6005494,,Ijzer,BE.Tracing.HydroNetwork,123.668533,"LINESTRING (3809307.813 3120052.013, 3809220.6...",0.0,Production and Industrial site,84ea2324e986450e8fdb0e0a841bd19a


In [35]:
gdf_links_final.columns

Index(['OOI_identifier', 'OOI_name', 'OOI_localId', 'OOI_namespace', 'hydroId',
       'watercourse_localName', 'watercourseBasin', 'watercourse_namespace',
       'distance', 'geometry', 'atPosition', 'OOI_type', 'UUID'],
      dtype='object')

In [37]:
links_final = gdf_links_final[['UUID', 'OOI_type', 'OOI_identifier', 'OOI_name', 'OOI_localId', 'OOI_namespace', 'hydroId', 'watercourse_namespace', 'geometry']]

In [38]:
links_final.head()

Unnamed: 0,UUID,OOI_type,OOI_identifier,OOI_name,OOI_localId,OOI_namespace,hydroId,watercourse_namespace,geometry
0,805f2c3ca0304f7eb280de4b176dfbf7,Production and Industrial site,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,Van Looveren Leo,BE.VL.000000416.INSTALLATION,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,7007018_2,BE.Tracing.HydroNetwork,"LINESTRING (3953437.743 3153608.300, 3953305.8..."
1,360eb13147d944e0b0d40b99c3465b57,Production and Industrial site,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,Swaegers Slachthuis,BE.VL.000000186.INSTALLATION,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,6801187,BE.Tracing.HydroNetwork,"LINESTRING (3955382.677 3155267.444, 3955487.2..."
2,69f346b39dc440f9b3834e2223c133ac,Production and Industrial site,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,Prat en Naessens Kathy,BE.VL.000001358.INSTALLATION,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,6020079,BE.Tracing.HydroNetwork,"LINESTRING (3851030.001 3109631.647, 3851071.4..."
3,494c320f737047b18e2350d219ac8af1,Production and Industrial site,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,Daelken,BE.VL.000001359.INSTALLATION,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,6002393,BE.Tracing.HydroNetwork,"LINESTRING (3838853.910 3146430.782, 3838854.5..."
4,84ea2324e986450e8fdb0e0a841bd19a,Production and Industrial site,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,Proot Bart,BE.VL.000001360.INSTALLATION,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,6005494,BE.Tracing.HydroNetwork,"LINESTRING (3809307.813 3120052.013, 3809220.6..."


In [None]:
# gdf_n_filtered_test.to_file(r"C:\Workdir\Develop\test_data\points_links.shp")
# links_final.to_file(r"C:\Workdir\Develop\test_data\points_links4.shp")

In [39]:
# links_final.to_file(r"c:\Workdir\Develop\repository\go-peg\harmonized_data\VL_locations_of_interest.gpkg", layer="points_links", driver='GPKG')

  pd.Int64Index,
