# Locations of Interest

These refer to any phonomenon occuring along the networks that have the potential to affect or be affected by pollution.

The locations of interest have surface geometry, either point or polygon.

Given a certain buffer distance, a location of interest can be identified based on whether it overlaps with a section of the network.

In [1]:
import os
import sys
path = os.path.dirname(os.path.abspath(''))
os.chdir(path)
print(path)

C:\workdir\develop\repository\go-peg


In [2]:
import geopandas as gpd
import pandas as pd
import numpy as np 

from shapely.geometry import Point, LineString, MultiLineString, MultiPoint
from shapely import wkt
from shapely.ops import nearest_points
import shapely.wkt

import warnings
from shapely.errors import ShapelyDeprecationWarning
warnings.filterwarnings("ignore", category=ShapelyDeprecationWarning) 
pd.options.mode.chained_assignment = None  # default='warn'

from src.config import config



In [5]:
PROJ_CRS = 'EPSG:31370'

In [6]:
def load_data(path, crs=PROJ_CRS):
    """
    Loads the data from the given path, 
    and prints the shape and crs of the data.
    """
    data = gpd.read_file(path)
    print(data.shape)
    #print("Original crs:", data.crs)
    data = data.to_crs(crs)
    print("Project crs:", data.crs)
    return data

In [7]:
PATH = config.data_src / "flanders_locations/Production and industrial facilities/ProductionInstallation_polygons.shp"
prod_installations = load_data(PATH)

(1895, 16)
Project crs: EPSG:31370


In [8]:
prod_installations.head(2)

Unnamed: 0,RecId,CaPaKey,Type,CaSeKey,FiscSitId,UpdDate,Shape_area,gml_id,descriptio,identifier,localId,namespace,name,status,type_2,geometry
0,3714438,12302B0261/00X000,PR,12302B,1,2016-11-30,962.551,pf_gpbv.1491,industrie,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,BE.VL.000001687.INSTALLATION,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,Galvani,,industrie,"POLYGON ((162292.997 199677.392, 162286.199 19..."
1,4711050,44008A0463/00G000,PR,44008A,2,2017-04-18,2587.4769,pf_gpbv.1594,veeteelt,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,BE.VL.000001800.INSTALLATION,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,Verschraegen Patrick,,veeteelt,"POLYGON ((113791.970 195703.675, 113793.834 19..."


In [9]:
PATH = config.data_dest / "vl_water_PROCESSED_V2.shp"
water = load_data(PATH)

(71983, 21)
Project crs: epsg:31370


## Working with polygon locations of interest

For working with polygon locations of interests, we will apply a buffer to the polygon. If part of the water network falls within this buffer zone, it is identified, and the points of intersection, i.e. starts-at point and ends-at point, following the direction of the flow of water, are extracted and recorded.

In [10]:
def create_buffers(df, buffer_size):
    """
    Creates a buffer around each point in the given dataframe.
    """
    poly_cols = ['Type', 'CaSeKey', 'descriptio', 'UpdDate', 'name', 'localId', 'identifier', 'geometry']
    
    df = (df[poly_cols]
            .assign(buffer_zone = df['geometry']
            .apply(lambda x: x.buffer(buffer_size))))

    # mergedpolys = []
    # for index, row in df.iterrows():
    #     mergedpoly = row['geometry'].union(row['buffer_zone']).wkt
    #     mergedpolys.append(wkt.loads(mergedpoly))

    # df['mergedpolys'] = mergedpolys

    return df

In [11]:
# poly_cols = ['Type', 'CaSeKey', 'descriptio', 'UpdDate', 'name', 'localId', 'identifier', 'geometry']
prod_install_df = gpd.GeoDataFrame((prod_installations
                                        .pipe(create_buffers, 100)), geometry='buffer_zone', crs=PROJ_CRS)

In [12]:
# prod_install_df = create_buffers(prod_installations, 100)

In [13]:
prod_install_df.iloc[0]

Type                                                          PR
CaSeKey                                                   12302B
descriptio                                             industrie
UpdDate                                               2016-11-30
name                                                     Galvani
localId                             BE.VL.000001687.INSTALLATION
identifier     https://data.gpbv.omgeving.vlaanderen.be/id/pr...
geometry       POLYGON ((162292.9970761109 199677.39228480123...
buffer_zone    POLYGON ((162360.7253147308 199603.8180954041,...
Name: 0, dtype: object

In [14]:
prod_install_df.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 1895 entries, 0 to 1894
Data columns (total 9 columns):
 #   Column       Non-Null Count  Dtype   
---  ------       --------------  -----   
 0   Type         1895 non-null   object  
 1   CaSeKey      1895 non-null   object  
 2   descriptio   1895 non-null   object  
 3   UpdDate      1895 non-null   object  
 4   name         1895 non-null   object  
 5   localId      1895 non-null   object  
 6   identifier   1895 non-null   object  
 7   geometry     1895 non-null   geometry
 8   buffer_zone  1895 non-null   geometry
dtypes: geometry(2), object(7)
memory usage: 133.4+ KB


In [15]:
poly_cols = ['Type', 'CaSeKey', 'descriptio', 'UpdDate', 'name', 'localId', 'identifier', 'buffer_zone']
buffers_df = prod_install_df[poly_cols]
buffers_gdf = gpd.GeoDataFrame(buffers_df, geometry='buffer_zone')

In [16]:
type(buffers_gdf)

geopandas.geodataframe.GeoDataFrame

In [19]:
#buffers_gdf.to_file(r"C:\Workdir\Develop\TR_USECASE\data_transform\buffers.shp")

**Load water data to perform the intersection and identify the points of intersection between water and locations of iterest**

In [20]:
#Check for multiline strings in a dataset
def check_multiline(df):
    """This function checks for multiline strings
        from the geometry column in a given dataset"""
    lst = df['geometry'].to_list()
    multiline_count = 0
    for item in lst:
        if isinstance(item, MultiLineString):
            multiline_count += 1
    print("MultiLinesStrings:" , multiline_count)

In [21]:
check_multiline(water)

MultiLinesStrings: 0


In [22]:
buffers_gdf.head(2)

Unnamed: 0,Type,CaSeKey,descriptio,UpdDate,name,localId,identifier,buffer_zone
0,PR,12302B,industrie,2016-11-30,Galvani,BE.VL.000001687.INSTALLATION,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,"POLYGON ((162360.725 199603.818, 162354.312 19..."
1,PR,44008A,veeteelt,2017-04-18,Verschraegen Patrick,BE.VL.000001800.INSTALLATION,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,"POLYGON ((113715.977 195787.489, 113717.315 19..."


To linearly reference a location of interest(polygon) onto a water network(linestring), we need to perform an overlay of the polygons onto the linesrings.
The resulting geometry is a linestring that falls within the boundaries of a polygon, with all the properties of the original linestring and polygon.

With this data, we can extract the point where the water intersects the polygon.

In [23]:
water_truncated = water[['VHAS', 'geometry']]

In [24]:
intersect_df = water_truncated.clip(buffers_gdf, keep_geom_type=True).reset_index(drop=True)
clipped_water_df2 = water_truncated.clip(buffers_gdf, keep_geom_type=True).reset_index(drop=True)

In [27]:
#intersect_df.to_file(r"C:\Workdir\Develop\TR_USECASE\data_transform\intersect_df.shp")

In [25]:
def clipGDF_keepgeomtyp_line(gdf,mask):
        """
        Clips Line Geodataframe with Polygon GeoDataFrame

        Input Variables:
        --------------------------------------------------------------------
        gdf
            GeoDataFrame to clip
        mask
            Geodataframe defining clipping extent

        Returns:
        --------------------------------------------------------------------
        clipped GeoDataFrame inclulding only line geometries
        """

        from shapely.geometry import LineString, MultiLineString,GeometryCollection
        clipped_gdf=gpd.clip(gdf,mask)
        clipped_gdf.reset_index(inplace=True,drop=True) #index now newly assigned, old dropped
        
        if isinstance(clipped_gdf,gpd.GeoSeries):
            #print("GeoSeries")
            features_rmv=[] #list to store indexes to features to remove
            GCunpack_list=[] #list to store indexes to features with GeometryCollection
            for idx in range(len(clipped_gdf)):
                #if feature is neither LineString, MultiLineStrin or GeomCollection --> remove
                if not (isinstance(clipped_gdf.iloc[idx],LineString) or isinstance(clipped_gdf.iloc[idx],MultiLineString) or isinstance(clipped_gdf.iloc[idx],GeometryCollection)): 
                    features_rmv.append(idx)
                #if feature is GeomCollection --> unpack, remove points and replace feature with new geometry
                elif isinstance(clipped_gdf.iloc[idx],GeometryCollection):
                    GCunpack_list.append(idx)

            for idx in GCunpack_list: #iterate over features with Geomcollection
                unpack_list=[]
                #print(clipped_gdf.iloc[idx])
                for geom in clipped_gdf.iloc[idx]: #grab all lines in this GC
                    if isinstance(geom,LineString):
                        unpack_list.append(geom)

                if len(unpack_list)>1: #if more than 1 lines --> create MuLineString
                    new_geom = MultiLineString(unpack_list)
                    clipped_gdf.iloc[idx]= new_geom #insert at index of feature
                elif len(unpack_list)==1: #if 1 line, create LineString
                    new_geom = LineString(unpack_list[0])
                    clipped_gdf.iloc[idx]=new_geom

            clipped_gdf.drop(features_rmv,inplace=True) #remove all point features
            clipped_gdf.reset_index(inplace=True,drop=True) #reset index

        if isinstance(clipped_gdf,gpd.GeoDataFrame):
            #print("GeoDataFrame")
            features_rmv=[] #list to store indexes to features to remove
            GCunpack_list=[] #list to store indexes to features with GeometryCollection 
            for idx,row in clipped_gdf.iterrows(): #iterate over features
                #if feature is neither LineString, MultiLineStrin or GeomCollection --> remove
                if not (isinstance(clipped_gdf.iloc[idx].geometry,LineString) or isinstance(clipped_gdf.iloc[idx].geometry,MultiLineString) or isinstance(clipped_gdf.iloc[idx].geometry,GeometryCollection)):  
                    features_rmv.append(idx)
                #if feature is GeomCollection --> unpack, remove points and replace feature with new geometry
                elif isinstance(clipped_gdf.iloc[idx].geometry,GeometryCollection):
                    GCunpack_list.append(idx)


            for idx in GCunpack_list: #iterate over features with Geomcollection
                unpack_list=[]
                for geom in clipped_gdf.iloc[idx].geometry: #grab all lines in this GC
                    if isinstance(geom,LineString):
                        unpack_list.append(geom)
                #print('unpacked_list',unpack_list)
                if len(unpack_list)>1: #if more than 1 lines --> create MuLineString
                    geom_df = pd.DataFrame({'id':['geometry'],0:[MultiLineString(unpack_list)]}) #create new Dataframe with geometry
                    geom_df.set_index('id',inplace=True)
                    newline = clipped_gdf.iloc[idx,clipped_gdf.columns != 'geometry'].append(geom_df) #concat new geometry with feature attributes
                    clipped_gdf.iloc[idx]=newline[0] #insert at index of feature
                elif len(unpack_list)==1: #if 1 line, create LineString
                    geom_df = pd.DataFrame([LineString(unpack_list[0])],index=['geometry'])
                    newline = clipped_gdf.iloc[idx,clipped_gdf.columns != 'geometry'].append(geom_df)
                    clipped_gdf.iloc[idx]=newline[0]

            clipped_gdf.drop(features_rmv,inplace=True) #remove all point features
            clipped_gdf.reset_index(inplace=True,drop=True) #reset index

        return clipped_gdf

In [26]:
clipped_water_df = clipGDF_keepgeomtyp_line(water_truncated, buffers_gdf)

In [27]:
print(clipped_water_df.shape)
assert clipped_water_df.VHAS.nunique() == clipped_water_df.geometry.nunique()
clipped_water_df.head()

(1907, 2)


Unnamed: 0,VHAS,geometry
0,7075802,"LINESTRING (207074.872 164195.147, 207082.422 ..."
1,7076120,"LINESTRING (215939.955 166439.262, 215938.681 ..."
2,6018841,"LINESTRING (215935.879 166474.094, 215933.407 ..."
3,6038922,"LINESTRING (215932.868 166542.527, 215925.536 ..."
4,6042412,"LINESTRING (215938.681 166440.860, 215921.381 ..."


In [29]:
print(intersect_df.shape)
clipped_water_df2.shape

(1907, 2)


(1907, 2)

In [32]:
#clipped.to_file(r"C:\Workdir\Develop\TR_USECASE\data_transform\clipped.shp")

Add begin and end points to the linestrings. These mark the start_at points and end_at point of the location of interest on a water network

In [30]:
def add_beginpoints(col, df):
    lst = df[col].to_list()
    beginpoints = []
    for item in lst:
            if isinstance(item, LineString):
                first = Point(item.coords[0])
                first_precise = shapely.wkt.dumps(first) #, rounding_precision=5)
                beginpoints.append(first_precise)
            elif isinstance(item, MultiLineString):
                first = Point(item.boundary[0])
                first_precise = shapely.wkt.dumps(first) #, rounding_precision=5)
                beginpoints.append(first_precise)
    return beginpoints

def add_endpoints(col, df):
    lst = df[col].to_list()
    endpoints = []
    for item in lst:
            if isinstance(item, LineString):
                last = Point(item.coords[-1])
                last_precise = shapely.wkt.dumps(last) #, rounding_precision=5)
                endpoints.append(last_precise)
            elif isinstance(item, MultiLineString):
                last = Point(item.boundary[-1])
                last_precise = shapely.wkt.dumps(last) #, rounding_precision=5)
                endpoints.append(last_precise)
    return endpoints

In [31]:
clipped_df = clipped_water_df.copy()

In [32]:
clipped_df['start_point'] = add_beginpoints('geometry', clipped_df)
clipped_df['end_point'] = add_endpoints('geometry', clipped_df)

In [33]:
clipped_df.head(2)

Unnamed: 0,VHAS,geometry,start_point,end_point
0,7075802,"LINESTRING (207074.872 164195.147, 207082.422 ...",POINT (207074.8715567812905647 164195.14719801...,POINT (207164.9312983400304802 164496.30120820...
1,7076120,"LINESTRING (215939.955 166439.262, 215938.681 ...",POINT (215939.9552513255912345 166439.26173004...,POINT (215938.6807399973331485 166440.85979997...


In [34]:
clipped_df.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 1907 entries, 0 to 1906
Data columns (total 4 columns):
 #   Column       Non-Null Count  Dtype   
---  ------       --------------  -----   
 0   VHAS         1907 non-null   object  
 1   geometry     1907 non-null   geometry
 2   start_point  1907 non-null   object  
 3   end_point    1907 non-null   object  
dtypes: geometry(1), object(3)
memory usage: 59.7+ KB


In [35]:
#clipped_water_merge = clipped_water_df.merge(water[[]] ,how='left',on='VHAS')

In [36]:
#clipped.rename(columns={'geometry':'loc_geom'}, inplace=True)
#clipped.head()

### Merge with original linestring, and identify point on linestring using get_nearest_point

In [37]:
def get_nearest_point(df, line_col, point_col):
    """
    For each point in points_df, find the nearest point in lines_df.
    """
    geoms = []
    for idx, row in df.iterrows():
        destinations = MultiPoint(row[line_col].coords) #geometry_y
        nearest_geoms = nearest_points(row[point_col], destinations) #geometry_x
        try:
            for coord in destinations:
                if coord == nearest_geoms[1]:
                    geoms.append(coord)
        except ValueError:
            print("No nearest point found for {}".format(row.CODEKOPPNT))
    return geoms

In [38]:
clipped_df.columns

Index(['VHAS', 'geometry', 'start_point', 'end_point'], dtype='object')

In [39]:
def get_start_points_df():
    start_pts = clipped_df[['VHAS', 'start_point']].merge(water[['VHAS', 'geometry']], how='left', on='VHAS')
    start_pts['start_point'] = clipped_df['start_point'].apply(wkt.loads)
    start_pts = gpd.GeoDataFrame(start_pts, geometry='start_point')
    return start_pts

In [40]:
start_pts = get_start_points_df()

In [41]:
start_pts['new_start_points'] = get_nearest_point(start_pts, 'geometry', 'start_point')

In [42]:
start_pts['from_distance'] = start_pts.apply(lambda row: row.geometry.project(row.new_start_points), axis=1)

In [43]:
start_pts

Unnamed: 0,VHAS,start_point,geometry,new_start_points,from_distance
0,7075802,POINT (207074.872 164195.147),"LINESTRING (206949.344 163986.328, 206950.235 ...",POINT (207069.70363999758 164182.70349997934),239.228047
1,7076120,POINT (215939.955 166439.262),"LINESTRING (216281.334 166014.426, 216172.590 ...",POINT (215938.68073999733 166440.85979997925),547.092606
2,6018841,POINT (215935.879 166474.094),"LINESTRING (216116.438 166246.047, 216111.704 ...",POINT (215933.40674000088 166476.84389997926),294.966486
3,6038922,POINT (215932.868 166542.527),"LINESTRING (216043.469 166375.111, 215995.853 ...",POINT (215938.38164000065 166542.09959997982),205.015833
4,6042412,POINT (215938.681 166440.860),"LINESTRING (215938.681 166440.860, 215921.381 ...",POINT (215938.68073999733 166440.85979997925),0.000000
...,...,...,...,...,...
1902,6027683,POINT (182257.411 241036.331),"LINESTRING (182257.411 241036.331, 182259.815 ...",POINT (182257.41103999916 241036.33069997746),0.000000
1903,7052456,POINT (177921.297 243818.824),"LINESTRING (177916.572 243772.020, 177918.229 ...",POINT (177921.08634000126 243813.94779998064),42.181990
1904,7050824,POINT (178081.937 243891.321),"LINESTRING (178081.937 243891.321, 178016.585 ...",POINT (178081.9368399963 243891.32109998167),0.000000
1905,7050825,POINT (177839.922 243936.449),"LINESTRING (177631.272 243868.403, 177636.167 ...",POINT (177846.4043399975 243938.5492999805),226.288547


In [44]:
def get_end_points_df():
    end_pts = clipped_df[['VHAS', 'end_point']].merge(water[['VHAS', 'geometry']], how='left', on='VHAS')
    end_pts['end_point'] = clipped_df['end_point'].apply(wkt.loads)
    end_pts = gpd.GeoDataFrame(end_pts, geometry='end_point')
    return end_pts

end_pts = get_end_points_df()

end_pts['new_end_points'] = get_nearest_point(end_pts, 'geometry', 'end_point')

end_pts['to_distance'] = end_pts.apply(lambda row: row.geometry.project(row.new_end_points), axis=1)

In [45]:
end_pts

Unnamed: 0,VHAS,end_point,geometry,new_end_points,to_distance
0,7075802,POINT (207164.931 164496.301),"LINESTRING (206949.344 163986.328, 206950.235 ...",POINT (207165.60993999842 164494.09419997968),581.500632
1,7076120,POINT (215938.681 166440.860),"LINESTRING (216281.334 166014.426, 216172.590 ...",POINT (215938.68073999733 166440.85979997925),547.092606
2,6018841,POINT (215925.134 166488.660),"LINESTRING (216116.438 166246.047, 216111.704 ...",POINT (215925.13423999998 166488.66029998194),309.664388
3,6038922,POINT (215903.609 166544.411),"LINESTRING (216043.469 166375.111, 215995.853 ...",POINT (215903.6090399981 166544.41139997914),239.866456
4,6042412,POINT (215925.134 166488.660),"LINESTRING (215938.681 166440.860, 215921.381 ...",POINT (215925.13423999998 166488.66029998194),65.272639
...,...,...,...,...,...
1902,6027683,POINT (182274.384 241117.100),"LINESTRING (182257.411 241036.331, 182259.815 ...",POINT (182276.38174000237 241128.0882999804),94.127051
1903,7052456,POINT (177925.134 243899.620),"LINESTRING (177916.572 243772.020, 177918.229 ...",POINT (177925.13444000334 243899.61999998149),127.950740
1904,7050824,POINT (177925.134 243899.620),"LINESTRING (178081.937 243891.321, 178016.585 ...",POINT (177925.13444000334 243899.61999998149),157.027604
1905,7050825,POINT (177921.493 243962.679),"LINESTRING (177631.272 243868.403, 177636.167 ...",POINT (177921.49323999733 243962.67929997947),305.159326


Merge the two dfs

In [46]:
linear_reference_df = start_pts[['VHAS', 'new_start_points', 'from_distance']].merge(end_pts[['VHAS', 'new_end_points', 'to_distance']], how='left', on='VHAS')

In [47]:
linear_reference_df.head(2)

Unnamed: 0,VHAS,new_start_points,from_distance,new_end_points,to_distance
0,7075802,POINT (207069.70363999758 164182.70349997934),239.228047,POINT (207165.60993999842 164494.09419997968),581.500632
1,7076120,POINT (215938.68073999733 166440.85979997925),547.092606,POINT (215938.68073999733 166440.85979997925),547.092606


In [48]:
linear_reference_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1907 entries, 0 to 1906
Data columns (total 5 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   VHAS              1907 non-null   object 
 1   new_start_points  1907 non-null   object 
 2   from_distance     1907 non-null   float64
 3   new_end_points    1907 non-null   object 
 4   to_distance       1907 non-null   float64
dtypes: float64(2), object(3)
memory usage: 89.4+ KB


## Merge locations of interest

In [49]:
print(buffers_gdf.shape)
buffers_gdf.head()

(1895, 8)


Unnamed: 0,Type,CaSeKey,descriptio,UpdDate,name,localId,identifier,buffer_zone
0,PR,12302B,industrie,2016-11-30,Galvani,BE.VL.000001687.INSTALLATION,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,"POLYGON ((162360.725 199603.818, 162354.312 19..."
1,PR,44008A,veeteelt,2017-04-18,Verschraegen Patrick,BE.VL.000001800.INSTALLATION,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,"POLYGON ((113715.977 195787.489, 113717.315 19..."
2,PR,24006D,veeteelt,2017-04-24,Both Marrigje en Vaneynde Ellen,BE.VL.000000463.INSTALLATION,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,"POLYGON ((177791.394 188622.409, 177789.393 18..."
3,PR,44034D,veeteelt,2017-04-18,M+Pigs/Walschland,BE.VL.000001191.INSTALLATION,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,"POLYGON ((110166.649 198394.433, 110166.778 19..."
4,PR,44034D,veeteelt,2017-04-18,Balthau Stany,BE.VL.000001083.INSTALLATION,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,"POLYGON ((110716.460 198288.067, 110717.643 19..."


In [50]:
water_polygon = gpd.sjoin(buffers_gdf, water_truncated).reset_index()

In [51]:
water_polygon

Unnamed: 0,index,Type,CaSeKey,descriptio,UpdDate,name,localId,identifier,buffer_zone,index_right,VHAS
0,1,PR,44008A,veeteelt,2017-04-18,Verschraegen Patrick,BE.VL.000001800.INSTALLATION,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,"POLYGON ((113715.977 195787.489, 113717.315 19...",19387,5040128
1,2,PR,24006D,veeteelt,2017-04-24,Both Marrigje en Vaneynde Ellen,BE.VL.000000463.INSTALLATION,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,"POLYGON ((177791.394 188622.409, 177789.393 18...",56881,6028465
2,2,PR,24006D,veeteelt,2017-04-24,Both Marrigje en Vaneynde Ellen,BE.VL.000000463.INSTALLATION,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,"POLYGON ((177791.394 188622.409, 177789.393 18...",34839,6028466
3,3,PR,44034D,veeteelt,2017-04-18,M+Pigs/Walschland,BE.VL.000001191.INSTALLATION,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,"POLYGON ((110166.649 198394.433, 110166.778 19...",61621,7068473
4,3,PR,44034D,veeteelt,2017-04-18,M+Pigs/Walschland,BE.VL.000001191.INSTALLATION,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,"POLYGON ((110166.649 198394.433, 110166.778 19...",2128,6034092_1
...,...,...,...,...,...,...,...,...,...,...,...
2375,1891,PR,24108B,veeteelt,2019-01-14,Porky Farm FV,BE.VL.000000577.INSTALLATION,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,"POLYGON ((171607.173 181438.814, 171603.587 18...",38038,6028079
2376,1891,PR,24108B,veeteelt,2019-01-14,Porky Farm FV,BE.VL.000000577.INSTALLATION,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,"POLYGON ((171607.173 181438.814, 171603.587 18...",10095,6017624_1
2377,1891,PR,24108B,veeteelt,2019-01-14,Porky Farm FV,BE.VL.000000577.INSTALLATION,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,"POLYGON ((171607.173 181438.814, 171603.587 18...",10096,6017624_2
2378,1891,PR,24108B,veeteelt,2019-01-14,Porky Farm FV,BE.VL.000000577.INSTALLATION,https://data.gpbv.omgeving.vlaanderen.be/id/pr...,"POLYGON ((171607.173 181438.814, 171603.587 18...",10097,6017624_3


In [52]:
water_polygon.columns

Index(['index', 'Type', 'CaSeKey', 'descriptio', 'UpdDate', 'name', 'localId',
       'identifier', 'buffer_zone', 'index_right', 'VHAS'],
      dtype='object')

In [53]:
linear_reference_gdf = water_polygon[['Type', 'CaSeKey', 'localId', 'VHAS', 'buffer_zone']]\
                        .merge(linear_reference_df[['VHAS', 'from_distance', 'to_distance']], how='left', on='VHAS') #\
                            #.rename(columns={'buffer_zone':'geometry'})

In [54]:
linear_reference_gdf

Unnamed: 0,Type,CaSeKey,localId,VHAS,buffer_zone,from_distance,to_distance
0,PR,44008A,BE.VL.000001800.INSTALLATION,5040128,"POLYGON ((113715.977 195787.489, 113717.315 19...",530.855019,751.960694
1,PR,24006D,BE.VL.000000463.INSTALLATION,6028465,"POLYGON ((177791.394 188622.409, 177789.393 18...",370.973634,733.580196
2,PR,24006D,BE.VL.000000463.INSTALLATION,6028466,"POLYGON ((177791.394 188622.409, 177789.393 18...",0.000000,0.000000
3,PR,44034D,BE.VL.000001191.INSTALLATION,7068473,"POLYGON ((110166.649 198394.433, 110166.778 19...",438.139223,798.815637
4,PR,44034D,BE.VL.000001191.INSTALLATION,6034092_1,"POLYGON ((110166.649 198394.433, 110166.778 19...",691.927203,950.760934
...,...,...,...,...,...,...,...
2375,PR,24108B,BE.VL.000000577.INSTALLATION,6028079,"POLYGON ((171607.173 181438.814, 171603.587 18...",340.387133,589.604391
2376,PR,24108B,BE.VL.000000577.INSTALLATION,6017624_1,"POLYGON ((171607.173 181438.814, 171603.587 18...",0.000000,142.623498
2377,PR,24108B,BE.VL.000000577.INSTALLATION,6017624_2,"POLYGON ((171607.173 181438.814, 171603.587 18...",0.000000,9.205141
2378,PR,24108B,BE.VL.000000577.INSTALLATION,6017624_3,"POLYGON ((171607.173 181438.814, 171603.587 18...",0.000000,10.305196


In [55]:
linear_reference_gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 2380 entries, 0 to 2379
Data columns (total 7 columns):
 #   Column         Non-Null Count  Dtype   
---  ------         --------------  -----   
 0   Type           2380 non-null   object  
 1   CaSeKey        2380 non-null   object  
 2   localId        2380 non-null   object  
 3   VHAS           2380 non-null   object  
 4   buffer_zone    2380 non-null   geometry
 5   from_distance  2380 non-null   float64 
 6   to_distance    2380 non-null   float64 
dtypes: float64(2), geometry(1), object(4)
memory usage: 148.8+ KB


In [None]:
# linear_reference_gdf2 = linear_reference_gdf.to_crs(epsg=3035)

In [57]:
linear_reference_gdf.crs

<Derived Projected CRS: EPSG:31370>
Name: BD72 / Belgian Lambert 72
Axis Info [cartesian]:
- X[east]: Easting (metre)
- Y[north]: Northing (metre)
Area of Use:
- name: Belgium - onshore.
- bounds: (2.5, 49.5, 6.4, 51.51)
Coordinate Operation:
- name: Belgian Lambert 72
- method: Lambert Conic Conformal (2SP)
Datum: Reseau National Belge 1972
- Ellipsoid: International 1924
- Prime Meridian: Greenwich

In [None]:
# linear_reference_gdf2.to_file(r"C:\Workdir\Develop\TR_USECASE\data_transform\vl_polygon_loc2.shp")