In [1]:
import geopandas as gpd
import pandas as pd

import swisslandstats as sls

In [2]:
vaud_ldf = sls.read_csv('data/vaud_lulc.csv')

In [3]:
prodreg_gdf = gpd.read_file('data/lfi_prodreg_19082011.shp')
prodreg_gdf = prodreg_gdf.to_crs(vaud_ldf.crs)
prodreg_gdf = prodreg_gdf[prodreg_gdf['PRODREG'] != 0] # Drop the 0
prodreg_name_map = {1: 'Jura', 2: 'Central Plateau', 3: 'Pre Alps', 4: 'Alps', 5: 'Southern Alps'}
prodreg_gdf['PRODREG'] = prodreg_gdf['PRODREG'].replace(prodreg_name_map)

I am densified (external_values, 13 elements)


In [4]:
vaud_gdf = vaud_ldf.to_geodataframe()

prodreg_ser = gpd.sjoin(prodreg_gdf, vaud_gdf, how='right', op='contains')['PRODREG'] # .fillna(method='ffill')

In [5]:
# Add a buffer for points that lay outside the border due to shp resolution

nan_prodreg_gser = vaud_gdf.loc[prodreg_ser.isna()]['geometry']
nan_prodreg_buffer_gdf = gpd.GeoDataFrame(geometry=nan_prodreg_gser.apply(lambda x: x.buffer(1500, cap_style=3)), index=nan_prodreg_gser.index, crs=vaud_ldf.crs)
fillna_prodreg_ser = gpd.sjoin(nan_prodreg_buffer_gdf, prodreg_gdf, op='intersects')['PRODREG']

I am densified (external_values, 37 elements)


In [6]:
dup_fillna_prodreg_ser = fillna_prodreg_ser[fillna_prodreg_ser.index.duplicated(keep=False)] # Mark all duplicates as True
non_dup_fillna_prodreg_ser = fillna_prodreg_ser[~fillna_prodreg_ser.index.isin(dup_fillna_prodreg_ser.index)] # non duplicated, succesfully (uniquely) solved through the buffer

In [7]:
# Compute, for each duplicate index's series, the intersection area between the buffer and conflicting prodreg regions and return the prodreg region with maximum intersection area

def get_idmax_buffer_intersection(dup_ser, nan_prodreg_buffer_gdf):
    # The index of `dup_ser` will consist of duplicates ; just pick the first element (it really does not matter
    poly_geom = nan_prodreg_buffer_gdf.loc[dup_ser.index[0]]
    dup_prodreg_gdf = prodreg_gdf[prodreg_gdf['PRODREG'].isin(dup_ser)]
    return dup_prodreg_gdf.loc[dup_prodreg_gdf.intersection(poly_geom).apply(lambda p: p.area).idxmax()]['PRODREG']

In [8]:
dup_fillna_prodreg_ser = dup_fillna_prodreg_ser.groupby(by=dup_fillna_prodreg_ser.index).apply(get_idmax_buffer_intersection, (nan_prodreg_buffer_gdf))

I am densified (external_values, 5 elements)
I am densified (external_values, 5 elements)
I am densified (external_values, 5 elements)


I am densified (external_values, 5 elements)


I am densified (external_values, 5 elements)
I am densified (external_values, 5 elements)


I am densified (external_values, 5 elements)


I am densified (external_values, 5 elements)
I am densified (external_values, 5 elements)


I am densified (external_values, 5 elements)


I am densified (external_values, 5 elements)
I am densified (external_values, 5 elements)


I am densified (external_values, 6 elements)
I am densified (external_values, 4 elements)


In [9]:
fillna_prodreg_ser = pd.concat([dup_fillna_prodreg_ser, non_dup_fillna_prodreg_ser])

In [10]:
# Check that the number of nan in prodreg_ser is the same as the number of filled nans from the buffer after solving the duplicates
len(prodreg_ser[prodreg_ser.isna()]) == len(fillna_prodreg_ser)

True

In [11]:
prodreg_ser = prodreg_ser.fillna(fillna_prodreg_ser)

In [12]:
prodreg_ser.to_csv('data/vaud_prodreg.csv', header=True)