# Locations of Interest

These refer to any phonomenon occuring along the networks that have the potential to affect or be affected by pollution.

The locations of interest have surface geometry, either point or polygon.

Given a certain buffer distance, a location of interest can be identified based on whether it overlaps with a section of the network.

uses gopeg env

In [1]:
import os
import sys
path = os.path.dirname(os.path.abspath(''))
os.chdir(path)
print(path)

c:\Workdir\Develop\repository\go-peg


In [3]:
import geopandas as gpd
import pandas as pd
import numpy as np 

from shapely.geometry import Point, LineString, MultiLineString, MultiPoint
from shapely import wkt
from shapely.ops import nearest_points
import shapely.wkt

import warnings
from shapely.errors import ShapelyDeprecationWarning
warnings.filterwarnings("ignore", category=ShapelyDeprecationWarning) 
pd.options.mode.chained_assignment = None  # default='warn'

from src.config import config

In [4]:
PROJ_CRS = 'EPSG:31370'
FINAL_CRS = 'EPSG:3035'

buffer_distance = 100
country='BE'

object_type = 'Production Facilities'

In [5]:
def load_data(path):
    """
    Loads the data from the given path, 
    and prints the shape and crs of the data.
    """
    data = gpd.read_file(path)
    print(data.shape)

    data_crs = data.crs

    print("Data crs:", data.crs)
    return data, data_crs

In [6]:
PATH = config.data_dest / "vl_water_PROCESSED.shp"  
water, data_crs = load_data(PATH)

(72069, 21)
Data crs: epsg:31370


In [7]:
PROJ_CRS = data_crs

In [8]:
PATH = config.data_src / "flanders_locations/Production and industrial facilities/ProductionInstallation_polygons.shp"
prod_installations, data_crs = load_data(PATH)
prod_installations = prod_installations.to_crs(PROJ_CRS)
print('Project crs:', prod_installations.crs)

(1895, 16)
Data crs: PROJCS["BD72 / Belgian Lambert 72",GEOGCS["BD72",DATUM["Reseau_National_Belge_1972",SPHEROID["International 1924",6378388,297,AUTHORITY["EPSG","7022"]],AUTHORITY["EPSG","6313"]],PRIMEM["Greenwich",0],UNIT["Degree",0.0174532925199433]],PROJECTION["Lambert_Conformal_Conic_2SP"],PARAMETER["latitude_of_origin",90],PARAMETER["central_meridian",4.36748666666667],PARAMETER["standard_parallel_1",49.8333339],PARAMETER["standard_parallel_2",51.1666672333333],PARAMETER["false_easting",150000.01256],PARAMETER["false_northing",5400088.4378],UNIT["metre",1,AUTHORITY["EPSG","9001"]],AXIS["Easting",EAST],AXIS["Northing",NORTH]]
Project crs: epsg:31370


In [9]:
prod_installations.head(2)

Unnamed: 0,RecId,CaPaKey,Type,CaSeKey,FiscSitId,UpdDate,Shape_area,gml_id,descriptio,identifier,localId,namespace,name,status,type_2,geometry
0,3714438,12302B0261/00X000,PR,12302B,1,2016-11-30,962.551,pf_gpbv.1491,industrie,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,BE.VL.000001687.INSTALLATION,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,Galvani,,industrie,"POLYGON ((162292.997 199677.392, 162286.199 19..."
1,4711050,44008A0463/00G000,PR,44008A,2,2017-04-18,2587.4769,pf_gpbv.1594,veeteelt,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,BE.VL.000001800.INSTALLATION,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,Verschraegen Patrick,,veeteelt,"POLYGON ((113791.970 195703.675, 113793.834 19..."


In [10]:
prod_installations.columns

Index(['RecId', 'CaPaKey', 'Type', 'CaSeKey', 'FiscSitId', 'UpdDate',
       'Shape_area', 'gml_id', 'descriptio', 'identifier', 'localId',
       'namespace', 'name', 'status', 'type_2', 'geometry'],
      dtype='object')

In [11]:
# water_data = gpd.read_file(r'C:\Workdir\Develop\repository\go-peg\harmonized_data\VL_HydroNetwork_3.gpkg', layer='Tracing_WatercourseLink')

## Working with polygon locations of interest

For working with polygon locations of interests, we will apply a buffer to the polygon. If part of the water network falls within this buffer zone, it is identified, and the points of intersection, i.e. starts-at point and ends-at point, following the direction of the flow of water, are extracted and recorded.

In [12]:
def create_buffers(df, buffer_size):
    """
    Creates a buffer around each point in the given dataframe.
    """
    poly_cols = ['Type', 'CaSeKey', 'descriptio', 'UpdDate', 'name', 'localId', 'namespace', 'identifier', 'geometry']
    
    df = (df[poly_cols]
            .assign(buffer_zone = df['geometry']
            .apply(lambda x: x.buffer(buffer_size))))

    # mergedpolys = []
    # for index, row in df.iterrows():
    #     mergedpoly = row['geometry'].union(row['buffer_zone']).wkt
    #     mergedpolys.append(wkt.loads(mergedpoly))

    # df['mergedpolys'] = mergedpolys

    return df

In [13]:
# poly_cols = ['Type', 'CaSeKey', 'descriptio', 'UpdDate', 'name', 'localId', 'identifier', 'geometry']
prod_install_df = gpd.GeoDataFrame((prod_installations
                                        .pipe(create_buffers, 100)), geometry='buffer_zone', crs=PROJ_CRS)

In [14]:
# prod_install_df = create_buffers(prod_installations, 100)

In [15]:
prod_install_df.iloc[0]

Type                                                          PR
CaSeKey                                                   12302B
descriptio                                             industrie
UpdDate                                               2016-11-30
name                                                     Galvani
localId                             BE.VL.000001687.INSTALLATION
namespace      https://data.gpbv.omgeving.vlaanderen.be/id/pr...
identifier     https://data.gpbv.omgeving.vlaanderen.be/id/pr...
geometry       POLYGON ((162292.9970761109 199677.39228480123...
buffer_zone    POLYGON ((162360.7253147308 199603.8180954041,...
Name: 0, dtype: object

In [16]:
buffers_gdf = prod_install_df.drop('geometry', axis=1)
type(buffers_gdf)

geopandas.geodataframe.GeoDataFrame

In [17]:
#buffers_gdf.to_file(r"C:\Workdir\Develop\TR_USECASE\data_transform\buffers.shp")

**Project polygon to line**

In [18]:
water.head(2)

Unnamed: 0,start_ID,end_ID,line_id,OIDN,UIDN,VHAG,line_name,REGCODE,REGCODE1,BEHEER,...,category,BEKNR,BEKNAAM,basin,GEO,LBLGEO,VHAZONENR,WTRLICHC,length,geometry
0,VL_HN55957,VL_HN66013,32768_1,44659,723201,6200,Zierbeek,B5111,,20001,...,"Geklasseerd, tweede categorie",7,Denderbekken,Schelde,2,< 0.25 m,422,213,7.928281,"LINESTRING (138979.165 172375.119, 138977.345 ..."
1,VL_HN66013,VL_HN42790,32768_2,44659,723201,6200,Zierbeek,B5111,,20001,...,"Geklasseerd, tweede categorie",7,Denderbekken,Schelde,2,< 0.25 m,422,213,858.196491,"LINESTRING (138977.309 172382.827, 138977.282 ..."


**Load water data to perform the intersection and identify the points of intersection between water and locations of iterest**

In [19]:
# import funcs

ModuleNotFoundError: No module named 'funcs'

In [20]:
#Check for multiline strings in a dataset
def check_multiline(df):
    """This function checks for multiline strings
        from the geometry column in a given dataset"""
    lst = df['geometry'].to_list()
    multiline_count = 0
    for item in lst:
        if isinstance(item, MultiLineString):
            multiline_count += 1
    print("MultiLinesStrings:" , multiline_count)

In [21]:
check_multiline(water)

MultiLinesStrings: 0


**Select relevant columns from the water dataset**

In [22]:
water_df = water[['line_id', 'line_name', 'basin', 'geometry']]
assert water_df.shape[0] == water_df.geometry.nunique()

**Add a namespace of the water data before merging the datasets**

In [23]:
def create_namespace(country):
    namespace = country + '.Tracing.HydroNetwork'
    return namespace

water_df['inspireId_namespace'] = create_namespace(country)

In [24]:
buffers_gdf.head(2)

Unnamed: 0,Type,CaSeKey,descriptio,UpdDate,name,localId,namespace,identifier,buffer_zone
0,PR,12302B,industrie,2016-11-30,Galvani,BE.VL.000001687.INSTALLATION,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,"POLYGON ((162360.725 199603.818, 162354.312 19..."
1,PR,44008A,veeteelt,2017-04-18,Verschraegen Patrick,BE.VL.000001800.INSTALLATION,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,"POLYGON ((113715.977 195787.489, 113717.315 19..."


In [25]:
# new_df2.to_file(r"C:\Workdir\Develop\test_data\polygon_links.shp")

To linearly reference a location of interest(polygon) onto a water network(linestring), we need to perform an overlay of the polygons onto the linesrings.
The resulting geometry is a linestring that falls within the boundaries of a polygon, with all the properties of the original linestring and polygon.

With this data, we can extract the point where the water intersects the polygon.

In [26]:
water_df.columns

Index(['line_id', 'line_name', 'basin', 'geometry', 'inspireId_namespace'], dtype='object')

In [27]:
# intersect_df = water_truncated.clip(buffers_gdf, keep_geom_type=True).reset_index(drop=True)
clipped_water_df = water_df.clip(buffers_gdf, keep_geom_type=True).reset_index(drop=True)
# clipped_water_df2 = clipGDF_keepgeomtyp_line(water_df, buffers_gdf).reset_index(drop=True)

In [28]:
print(water_df.shape)
clipped_water_df.shape

(72069, 5)


(1929, 5)

In [29]:
clipped_water_df.head(2)

Unnamed: 0,line_id,line_name,basin,geometry,inspireId_namespace
0,7075802,Cicindria,Schelde,"LINESTRING (207074.872 164195.147, 207082.422 ...",BE.Tracing.HydroNetwork
1,7076120,Loop IX,Schelde,"LINESTRING (215939.955 166439.262, 215938.681 ...",BE.Tracing.HydroNetwork


In [30]:
def multiline_to_linestring_col(df, geom_col):
    linestrings = []
    for idx, row in df.iterrows():
        if isinstance(row[geom_col], LineString):
            linestrings.append(row[geom_col])
        elif isinstance(row[geom_col], MultiLineString):
            inlines = row[geom_col]
            outcoords = [list(item.coords) for item in inlines]
            outline = shapely.geometry.LineString(
                [i for sublist in outcoords for i in sublist])
            linestrings.append(outline)
    return linestrings

In [31]:
# clipped_water_df2 = funcs.multiline_to_linestring(clipped_water_df, PROJ_CRS)
clipped_water_df['geometry'] = multiline_to_linestring_col(clipped_water_df, 'geometry')

In [32]:
clipped_water_df.columns

Index(['line_id', 'line_name', 'basin', 'geometry', 'inspireId_namespace'], dtype='object')

In [33]:
count = 0
for i, row in clipped_water_df.iterrows():
        if isinstance(row['geometry'], LineString):
            # print('This is a Line geometry')
            pass
        else:
            print(type(row['geometry']))
            count += 1
            # df[point_col] = df[point_col].apply(str_to_point_geom)
print(count)

0


In [34]:
clipped_water_df.head(2)

Unnamed: 0,line_id,line_name,basin,geometry,inspireId_namespace
0,7075802,Cicindria,Schelde,"LINESTRING (207074.872 164195.147, 207082.422 ...",BE.Tracing.HydroNetwork
1,7076120,Loop IX,Schelde,"LINESTRING (215939.955 166439.262, 215938.681 ...",BE.Tracing.HydroNetwork


In [35]:
print(clipped_water_df.shape)
assert clipped_water_df['line_id'].nunique() == clipped_water_df.geometry.nunique()
clipped_water_df.head(2)

(1929, 5)


AssertionError: 

In [37]:
clipped_water_df2 = clipped_water_df.drop_duplicates(subset='geometry')

In [38]:
clipped_water_df2.shape

(1909, 5)

In [36]:
clipped_water_df.geometry.nunique()

1909

**Perform a spatial join between buffers and clipped water**

In [39]:
joined_df = gpd.overlay(clipped_water_df2, buffers_gdf, how='union', keep_geom_type=False)

joined_df = joined_df[joined_df['line_id'].notna()]
joined_df = joined_df[joined_df['localId'].notna()]

In [40]:
joined_df.shape

(2385, 13)

In [41]:
# joined_df_sjoin = gpd.sjoin(clipped_water_df2, buffers_gdf, how='left')

In [42]:
joined_df_merged = joined_df.merge(prod_installations[['localId', 'geometry']], on='localId', how='left')


In [43]:
joined_df_merged.head(3)

Unnamed: 0,line_id,line_name,basin,inspireId_namespace,Type,CaSeKey,descriptio,UpdDate,name,localId,namespace,identifier,geometry_x,geometry_y
0,7075802,Cicindria,Schelde,BE.Tracing.HydroNetwork,PR,71353E,veeteelt,2019-09-14,SV-Agroplum,BE.VL.000000520.INSTALLATION,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,"LINESTRING (207074.872 164195.147, 207082.422 ...","POLYGON ((207078.090 164295.503, 207077.753 16..."
1,7076120,Loop IX,Schelde,BE.Tracing.HydroNetwork,PR,73031C,industrie,2020-09-26,Konings Juices & Drinks,BE.VL.000000598.INSTALLATION,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,"LINESTRING (215939.955 166439.262, 215938.681 ...","POLYGON ((215833.278 166551.772, 215829.574 16..."
2,6018841,Herkebeek,Schelde,BE.Tracing.HydroNetwork,PR,73031C,industrie,2020-09-26,Konings Juices & Drinks,BE.VL.000000598.INSTALLATION,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,"MULTILINESTRING ((215935.879 166474.094, 21593...","POLYGON ((215833.278 166551.772, 215829.574 16..."


In [44]:
#clipped.to_file(r"C:\Workdir\Develop\TR_USECASE\data_transform\clipped.shp")

To identify the 
Add begin and end points to the linestrings. These mark the start_at points and end_at point of the location of interest on a water network

In [45]:
from shapely.wkt import loads

In [46]:
def add_beginpoints(col, df):
    lst = df[col].to_list()
    beginpoints = []
    for item in lst:
            if isinstance(item, LineString):
                first = Point(item.coords[0])
                beginpoints.append(first)
            elif isinstance(item, MultiLineString):
                first = Point(item.boundary[0]) 
                beginpoints.append(first)
    return beginpoints

def add_endpoints(col, df):
    lst = df[col].to_list()
    endpoints = []
    for item in lst:
            if isinstance(item, LineString):
                last = Point(item.coords[-1])
                # last_precise = shapely.wkt.dumps(last) #, rounding_precision=5)
                # last_precise_geom = loads(last_precise) 
                # endpoints.append(loads(last_precise_geom))
                endpoints.append(last)
            elif isinstance(item, MultiLineString):
                last = Point(item.boundary[-1])
                # last_precise = shapely.wkt.dumps(last) #, rounding_precision=5)
                # last_precise_geom = loads(last_precise) 
                endpoints.append(last)
    return endpoints

In [47]:
joined_df_merged.columns

Index(['line_id', 'line_name', 'basin', 'inspireId_namespace', 'Type',
       'CaSeKey', 'descriptio', 'UpdDate', 'name', 'localId', 'namespace',
       'identifier', 'geometry_x', 'geometry_y'],
      dtype='object')

In [48]:
# clipped_df = clipped_water_df2.copy()
clipped_df = joined_df_merged[['identifier','localId', 'namespace', 'line_id', 'line_name', 'basin', 'inspireId_namespace', 'geometry_x', 'geometry_y']]

In [49]:
clipped_df.head(2)

Unnamed: 0,identifier,localId,namespace,line_id,line_name,basin,inspireId_namespace,geometry_x,geometry_y
0,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,BE.VL.000000520.INSTALLATION,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,7075802,Cicindria,Schelde,BE.Tracing.HydroNetwork,"LINESTRING (207074.872 164195.147, 207082.422 ...","POLYGON ((207078.090 164295.503, 207077.753 16..."
1,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,BE.VL.000000598.INSTALLATION,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,7076120,Loop IX,Schelde,BE.Tracing.HydroNetwork,"LINESTRING (215939.955 166439.262, 215938.681 ...","POLYGON ((215833.278 166551.772, 215829.574 16..."


In [50]:
clipped_df['start_point'] = add_beginpoints('geometry_x', clipped_df)
clipped_df['end_point'] = add_endpoints('geometry_x', clipped_df)

In [51]:
type(clipped_df.start_point[0])

shapely.geometry.point.Point

In [52]:
clipped_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2385 entries, 0 to 2384
Data columns (total 11 columns):
 #   Column               Non-Null Count  Dtype   
---  ------               --------------  -----   
 0   identifier           2385 non-null   object  
 1   localId              2385 non-null   object  
 2   namespace            2385 non-null   object  
 3   line_id              2385 non-null   object  
 4   line_name            1709 non-null   object  
 5   basin                2385 non-null   object  
 6   inspireId_namespace  2385 non-null   object  
 7   geometry_x           2385 non-null   geometry
 8   geometry_y           2385 non-null   geometry
 9   start_point          2385 non-null   object  
 10  end_point            2385 non-null   object  
dtypes: geometry(2), object(9)
memory usage: 288.1+ KB


In [53]:
clipped_df.head(2)

Unnamed: 0,identifier,localId,namespace,line_id,line_name,basin,inspireId_namespace,geometry_x,geometry_y,start_point,end_point
0,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,BE.VL.000000520.INSTALLATION,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,7075802,Cicindria,Schelde,BE.Tracing.HydroNetwork,"LINESTRING (207074.872 164195.147, 207082.422 ...","POLYGON ((207078.090 164295.503, 207077.753 16...",POINT (207074.8715567813 164195.14719801673),POINT (207164.93129834003 164496.30120820695)
1,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,BE.VL.000000598.INSTALLATION,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,7076120,Loop IX,Schelde,BE.Tracing.HydroNetwork,"LINESTRING (215939.955 166439.262, 215938.681 ...","POLYGON ((215833.278 166551.772, 215829.574 16...",POINT (215939.9552513256 166439.26173004115),POINT (215938.68073999733 166440.85979997925)


In [54]:
def get_nearest_point(df, line_col, point_col):
    """
    For each point in points_df, find the nearest point in lines_df.
    This identifies the projected point on the water network, from a location of interest point.
    """
    indexes = []
    geoms = []
    for idx, row in df.iterrows():
        # if row['distance'] > buffer_distance:
        destinations = MultiPoint(row[line_col].coords) #geometry_y
        nearest_geoms = nearest_points(row[point_col], destinations) #geometry_x
        try:
            for coord in destinations:
                if coord == nearest_geoms[1]:
                    geoms.append(coord)
                    indexes.append(idx)
                if idx in indexes:
                    break
                    #geoms.append(coord)
                    #indexes.append(idx)
        except ValueError:
            print("No nearest point found for {}".format(row.point_col))
    return geoms

In [55]:
clipped_df.head(2)

Unnamed: 0,identifier,localId,namespace,line_id,line_name,basin,inspireId_namespace,geometry_x,geometry_y,start_point,end_point
0,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,BE.VL.000000520.INSTALLATION,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,7075802,Cicindria,Schelde,BE.Tracing.HydroNetwork,"LINESTRING (207074.872 164195.147, 207082.422 ...","POLYGON ((207078.090 164295.503, 207077.753 16...",POINT (207074.8715567813 164195.14719801673),POINT (207164.93129834003 164496.30120820695)
1,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,BE.VL.000000598.INSTALLATION,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,7076120,Loop IX,Schelde,BE.Tracing.HydroNetwork,"LINESTRING (215939.955 166439.262, 215938.681 ...","POLYGON ((215833.278 166551.772, 215829.574 16...",POINT (215939.9552513256 166439.26173004115),POINT (215938.68073999733 166440.85979997925)


In [56]:
# clipped_df = clipped_df.rename(columns={'geometry_x': 'geometry'})

In [57]:
clipped_df['geometry_x'] = multiline_to_linestring_col(clipped_df, 'geometry_x')

In [58]:
clipped_df['start_point'] = get_nearest_point(clipped_df, 'geometry_x', 'start_point')
clipped_df['end_point'] = get_nearest_point(clipped_df, 'geometry_x', 'end_point')

In [59]:
count = 0
for i, row in clipped_df.iterrows():
        if isinstance(row['geometry_x'], LineString):
            # print('This is a Line geometry')
            pass
        else:
            print(type(row['geometry_x']))
            count += 1
            # df[point_col] = df[point_col].apply(str_to_point_geom)
print(count)

0


In [60]:
clipped_df.head(2)

Unnamed: 0,identifier,localId,namespace,line_id,line_name,basin,inspireId_namespace,geometry_x,geometry_y,start_point,end_point
0,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,BE.VL.000000520.INSTALLATION,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,7075802,Cicindria,Schelde,BE.Tracing.HydroNetwork,LINESTRING (207074.8715567813 164195.147198016...,"POLYGON ((207078.090 164295.503, 207077.753 16...",POINT (207074.8715567813 164195.14719801673),POINT (207164.93129834003 164496.30120820695)
1,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,BE.VL.000000598.INSTALLATION,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,7076120,Loop IX,Schelde,BE.Tracing.HydroNetwork,LINESTRING (215939.9552513256 166439.261730041...,"POLYGON ((215833.278 166551.772, 215829.574 16...",POINT (215939.9552513256 166439.26173004115),POINT (215938.68073999733 166440.85979997925)


In [61]:
def str_to_point_geom(point_str):
    point = loads(point_str)
    return point

def apply_str_to_point_geom(df, point_col):
    for i, row in df.iterrows():
        if isinstance(row[point_col], Point):
            print('This is a Point geometry')
            pass
        else:
            print('This is a NOT Point geometry')
            df[point_col] = df[point_col].apply(str_to_point_geom)

        return df

In [62]:
def get_ref_distance(df, line_geom, point_geom):
    distances = []
    for i, row in df.iterrows():
        # print(row[1]['geometry_x'])
        # print(row[1]['start_point'])
        # dist = row[1]['geometry_x'].project(loads(row[1]['start_point']))
        projected_point = row[line_geom].project(row[point_geom])
        distances.append(projected_point)
    return distances

# clipped_df['atPosition1'] = distances

In [64]:
# clipped_df['start_point'].nunique()

Merge points on original water dataframe

In [65]:
clipped_gdf = gpd.GeoDataFrame(clipped_df, geometry='geometry_x', crs=PROJ_CRS)

Merging the clipped df with water will give us the original line from which we can calculate the reference points

In [66]:
merge_clipped_water = clipped_df.merge(water_df[['line_id', 'geometry']], on='line_id', how='left')

In [67]:
type(merge_clipped_water['geometry'][0])

shapely.geometry.linestring.LineString

In [68]:
merge_clipped_water['atPosition1'] = get_ref_distance(merge_clipped_water, 'geometry', 'start_point')
merge_clipped_water['atPosition2'] = get_ref_distance(merge_clipped_water, 'geometry', 'end_point')

In [69]:
merge_clipped_water.head()

Unnamed: 0,identifier,localId,namespace,line_id,line_name,basin,inspireId_namespace,geometry_x,geometry_y,start_point,end_point,geometry,atPosition1,atPosition2
0,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,BE.VL.000000520.INSTALLATION,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,7075802,Cicindria,Schelde,BE.Tracing.HydroNetwork,"LINESTRING (207074.872 164195.147, 207082.422 ...","POLYGON ((207078.090 164295.503, 207077.753 16...",POINT (207074.8715567813 164195.14719801673),POINT (207164.93129834003 164496.30120820695),"LINESTRING (206949.344 163986.328, 206950.235 ...",252.702207,583.809623
1,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,BE.VL.000000598.INSTALLATION,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,7076120,Loop IX,Schelde,BE.Tracing.HydroNetwork,"LINESTRING (215939.955 166439.262, 215938.681 ...","POLYGON ((215833.278 166551.772, 215829.574 16...",POINT (215939.9552513256 166439.26173004115),POINT (215938.68073999733 166440.85979997925),"LINESTRING (216281.334 166014.426, 216172.590 ...",545.04854,547.092606
2,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,BE.VL.000000598.INSTALLATION,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,6018841,Herkebeek,Schelde,BE.Tracing.HydroNetwork,"LINESTRING (215935.879 166474.094, 215935.879 ...","POLYGON ((215833.278 166551.772, 215829.574 16...",POINT (215925.13423999998 166488.66029998194),POINT (215935.8790801232 166474.09414382753),"LINESTRING (216116.438 166246.047, 216111.704 ...",309.664388,291.2687
3,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,BE.VL.000000598.INSTALLATION,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,6038922,Gotemgracht,Schelde,BE.Tracing.HydroNetwork,"LINESTRING (215932.868 166542.527, 215925.536 ...","POLYGON ((215833.278 166551.772, 215829.574 16...",POINT (215932.86765657167 166542.52740126822),POINT (215903.6090399981 166544.41139997914),"LINESTRING (216043.469 166375.111, 215995.853 ...",210.546387,239.866456
4,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,BE.VL.000000598.INSTALLATION,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,6042412,Loop IX,Schelde,BE.Tracing.HydroNetwork,"LINESTRING (215938.681 166440.860, 215921.381 ...","POLYGON ((215833.278 166551.772, 215829.574 16...",POINT (215938.68073999733 166440.85979997925),POINT (215925.13423999998 166488.66029998194),"LINESTRING (215938.681 166440.860, 215921.381 ...",0.0,65.272639


In [70]:
clipped_df_final = merge_clipped_water.drop('geometry', axis=1)

For every linestring, collect coordinates into a numpy array and insert point cordinate into array

In [71]:
test = clipped_df_final.iloc[0:1,:]
test

Unnamed: 0,identifier,localId,namespace,line_id,line_name,basin,inspireId_namespace,geometry_x,geometry_y,start_point,end_point,atPosition1,atPosition2
0,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,BE.VL.000000520.INSTALLATION,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,7075802,Cicindria,Schelde,BE.Tracing.HydroNetwork,"LINESTRING (207074.872 164195.147, 207082.422 ...","POLYGON ((207078.090 164295.503, 207077.753 16...",POINT (207074.8715567813 164195.14719801673),POINT (207164.93129834003 164496.30120820695),252.702207,583.809623


In [72]:
line = test.geometry_x[0]
# line

In [73]:
def project_to_polygon(df):
    line_ids = []
    localIds = []
    namespaces = []
    identifiers = []
    start_geoms = []
    end_geoms = []
    line_names= []
    ref_points = []
    for i, row in df.iterrows():
        line_ids.append(row['line_id'])
        localIds.append(row['localId'])
        start_geoms.append(row['start_point'])
        namespaces.append(row['namespace'])
        identifiers.append(row['identifier'])
        line_names.append(row['line_name'])
        ref_points.append(row['atPosition1'])
        try:
            if isinstance(row.start_point, Point):
                end_geom = nearest_points(row['start_point'], row['geometry_y'])[1]
                end_geoms.append(end_geom)
                # dist = row['geometry_y'].project(row['start_point'])
                # ref_points.append(dist)
            else:
                end_geom = nearest_points(loads(row['start_point']), row['geometry_y'])[1]
                end_geoms.append(end_geom)
                # dist = row[1]['geometry_y'].project(loads(row[1]['start_point']))
                # ref_points.append(dist)
        except:
            print('Not a geometry')
        # else:
        #     end_geom = nearest_points(loads(row['start_point']), row['geometry_y'])[1]
        print(f'Index {i} done')

        line_ids.append(row['line_id'])
        localIds.append(row['localId'])
        start_geoms.append(row['end_point'])
        namespaces.append(row['namespace'])
        identifiers.append(row['identifier'])
        line_names.append(row['line_name'])
        ref_points.append(row['atPosition2'])
        try:
            if isinstance(row.start_point, Point):
                end_geom = nearest_points(row['end_point'], row['geometry_y'])[1]
                end_geoms.append(end_geom)
                # dist = row[1]['geometry_y'].project(row[1]['end_point'])
                # ref_points.append(dist)
            else:
                end_geom = nearest_points(loads(row['end_point']), row['geometry_x'])[1]
                end_geoms.append(end_geom)
                # dist = row[1]['geometry_x'].project(loads(row[1]['end_point']))
                # ref_points.append(dist)
        except:
            print('Not a geometry')
        print(f'Index {i} done')
        
    data = {'identifier': identifiers, 'localId': localIds, 'namespace': namespaces, 'line_id': line_ids, 'line_name': line_names, 'atPosition': ref_points, 'start_geom': start_geoms, 'end_geom': end_geoms}
    new_df = gpd.GeoDataFrame(data, geometry='end_geom')
    return new_df

In [74]:
clipped_df_final.columns

Index(['identifier', 'localId', 'namespace', 'line_id', 'line_name', 'basin',
       'inspireId_namespace', 'geometry_x', 'geometry_y', 'start_point',
       'end_point', 'atPosition1', 'atPosition2'],
      dtype='object')

In [75]:
# new_df = project_to_polygon(clipped_df)
new_df = project_to_polygon(clipped_df_final)

Index 0 done
Index 0 done
Index 1 done
Index 1 done
Index 2 done
Index 2 done
Index 3 done
Index 3 done
Index 4 done
Index 4 done
Index 5 done
Index 5 done
Index 6 done
Index 6 done
Index 7 done
Index 7 done
Index 8 done
Index 8 done
Index 9 done
Index 9 done
Index 10 done
Index 10 done
Index 11 done
Index 11 done
Index 12 done
Index 12 done
Index 13 done
Index 13 done
Index 14 done
Index 14 done
Index 15 done
Index 15 done
Index 16 done
Index 16 done
Index 17 done
Index 17 done
Index 18 done
Index 18 done
Index 19 done
Index 19 done
Index 20 done
Index 20 done
Index 21 done
Index 21 done
Index 22 done
Index 22 done
Index 23 done
Index 23 done
Index 24 done
Index 24 done
Index 25 done
Index 25 done
Index 26 done
Index 26 done
Index 27 done
Index 27 done
Index 28 done
Index 28 done
Index 29 done
Index 29 done
Index 30 done
Index 30 done
Index 31 done
Index 31 done
Index 32 done
Index 32 done
Index 33 done
Index 33 done
Index 34 done
Index 34 done
Index 35 done
Index 35 done
Index 36 don

In [76]:
new_df.head(2)

Unnamed: 0,identifier,localId,namespace,line_id,line_name,atPosition,start_geom,end_geom
0,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,BE.VL.000000520.INSTALLATION,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,7075802,Cicindria,252.702207,POINT (207074.8715567813 164195.14719801673),POINT (207077.753 164294.989)
1,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,BE.VL.000000520.INSTALLATION,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,7075802,Cicindria,583.809623,POINT (207164.93129834003 164496.30120820695),POINT (207116.398 164408.982)


In [77]:
def make_connection_lines(df):
    lines = []
    for index, row in new_df.iterrows():
        if isinstance(row.start_geom, Point):
            p_1 = Point(row['start_geom'])
        else:
            p_1 = loads(row['start_geom'])

        if isinstance(row.end_geom, Point):
            p_2 = row['end_geom']
        else:
            p_2 = Point(row['end_geom'])

        # p_2 = Point(row[to_point])
        intersect = LineString([p_1, p_2])
        # linestring = loads(intersect)
        lines.append(intersect)
            
    return lines

new_df['connection_lines'] = make_connection_lines(new_df)
new_df.head(2)

Unnamed: 0,identifier,localId,namespace,line_id,line_name,atPosition,start_geom,end_geom,connection_lines
0,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,BE.VL.000000520.INSTALLATION,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,7075802,Cicindria,252.702207,POINT (207074.8715567813 164195.14719801673),POINT (207077.753 164294.989),LINESTRING (207074.8715567813 164195.147198016...
1,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,BE.VL.000000520.INSTALLATION,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,7075802,Cicindria,583.809623,POINT (207164.93129834003 164496.30120820695),POINT (207116.398 164408.982),LINESTRING (207164.93129834003 164496.30120820...


In [78]:
cols = ['identifier','namespace', 'line_id', 'basin','line_name','atPosition' 'inspireId_namespace']


df_links = (gpd.GeoDataFrame((new_df.drop(columns=['start_geom', 'end_geom'])
                            .rename(columns={'connection_lines': 'geometry',
                                                'identifier':'OOI_identifier', 
                                                'name': 'OOI_name', 
                                                'localId': 'OOI_localId', 
                                                'namespace': 'OOI_namespace',
                                                'line_id': 'hydroId',
                                                'line_name': 'watercourse_localName'})) 
                            ,geometry='geometry'))


In [79]:
df_links.columns

Index(['OOI_identifier', 'OOI_localId', 'OOI_namespace', 'hydroId',
       'watercourse_localName', 'atPosition', 'geometry'],
      dtype='object')

In [80]:
import uuid
df_links['UUID'] = [uuid.uuid4().hex for _ in range(len(df_links.index))]

In [81]:
object_type = 'Production and Industrial Site'

In [82]:
def create_ooi_type(object_type):
    ooi_type = object_type
    return ooi_type

df_links['OOI_type'] = create_ooi_type(object_type)

In [83]:
country

'BE'

In [84]:
def create_watercourse_namespace(country):
    namespace = country + '.Tracing.HydroNetwork'
    return namespace

df_links['watercourse_namespace'] = create_watercourse_namespace(country)

In [85]:
df_links.head(2)

Unnamed: 0,OOI_identifier,OOI_localId,OOI_namespace,hydroId,watercourse_localName,atPosition,geometry,UUID,OOI_type,watercourse_namespace
0,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,BE.VL.000000520.INSTALLATION,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,7075802,Cicindria,252.702207,"LINESTRING (207074.872 164195.147, 207077.753 ...",1339946a25854a42a3d12953def6b2c9,Production and Industrial Site,BE.Tracing.HydroNetwork
1,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,BE.VL.000000520.INSTALLATION,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,7075802,Cicindria,583.809623,"LINESTRING (207164.931 164496.301, 207116.398 ...",53a204996f7a4cd68a0847ae6d17533c,Production and Industrial Site,BE.Tracing.HydroNetwork


In [86]:
df_links= df_links.set_crs(PROJ_CRS)
df_links = df_links.to_crs(FINAL_CRS)

In [87]:
df_links = df_links[['UUID', 'OOI_type', 'OOI_identifier', 'OOI_localId', 'OOI_namespace', 'hydroId', 'watercourse_localName', 'watercourse_namespace', 'atPosition', 'geometry']]

In [88]:
df_links.to_file("harmonized_data/VL_ObjectsOfInterest.gpkg", layer="polygon_links", driver='GPKG')