# Import modules

In [1]:
from ntpath import join
import geopandas as gpd
import pandas as pd
from itertools import combinations
import math
import os, sys
import argparse
from tqdm import tqdm
import yaml

os.chdir('..')

In [2]:
with open('03_Scripts/config.yaml') as fp:
    cfg = yaml.load(fp, Loader=yaml.FullLoader)['prepare_data.py']    #  [os.path.basename(__file__)]


# Task to do
DETERMINE_ROAD_SURFACES = cfg['tasks']['determine_roads_surfaces']
DETERMINE_RESTRICTED_AOI = cfg['tasks']['determine_restricted_AOI']
MAKE_RASTER_MOSAIC = cfg['tasks']['make_raster_mosaic']
DOWNLOAD_TILES=cfg['tasks']['determine_tiles']

if not DETERMINE_ROAD_SURFACES and not DETERMINE_RESTRICTED_AOI and not MAKE_RASTER_MOSAIC and not DOWNLOAD_TILES:
    print('Nothing to do. Exiting!')
    sys.exit(0)
else:

    INPUT = cfg['input']
    INPUT_DIR =INPUT['input_folder']

    ROADS_IN = INPUT_DIR + INPUT['input_files']['roads']
    ROADS_PARAM = INPUT_DIR + INPUT['input_files']['roads_param']
    FORESTS = INPUT_DIR + INPUT['input_files']['forests']
    TILES_SWISSIMAGES = INPUT['input_files']['tiles_swissimages10']

    OUTPUT = cfg['output']
    OUTPUT_DIR = OUTPUT['output_folder']

    if DETERMINE_ROAD_SURFACES:
        ROADS_OUT = OUTPUT_DIR +  OUTPUT['output_files']['roads']

    if DETERMINE_RESTRICTED_AOI:
        RESTRICTED_AOI = OUTPUT_DIR +  OUTPUT['output_files']['restricted_AOI']
    
    if DOWNLOAD_TILES:
        TILES_AOI = OUTPUT_DIR + OUTPUT['output_files']['tiles_aoi']



# Define functions

In [3]:
def polygons_diff_without_artifacts(polygons, p1_idx, p2_idx):
    # Make the difference of the geometry at row p2_idx with the one at the row p1_idx
    
    diff=polygons.loc[p2_idx,'geometry']-polygons.loc[p1_idx,'geometry']

    # Store intermediary results back to poly
    if diff.geom_type == 'Polygon':
        polygons.loc[p2_idx,'geometry'] -= polygons.loc[p1_idx,'geometry']

    elif diff.geom_type == 'MultiPolygon':
        # if a multipolygone is created, only keep the largest part to avoid the following error: https://github.com/geopandas/geopandas/issues/992
        polygons.loc[p2_idx,'geometry'] = max((polygons.loc[p2_idx,'geometry']-polygons.loc[p1_idx,'geometry']).geoms, key=lambda a: a.area)

    return polygons

def test_crs(crs1,crs2 = "EPSG:2056"):
    try:
        assert(crs1 == crs2), "CRS mismatch between the roads file and the forests file."
    except Exception as e:
        print(e)
        sys.exit(1)

# Main
## Import files

In [4]:
# Import files ------------------------------------------------------------------------------------------
print('Importing files...')
## Data
roads=gpd.read_file(ROADS_IN)
forests=gpd.read_file(FORESTS)
tiles_swissimages=gpd.read_file(TILES_SWISSIMAGES)

## Other informations
roads_parameters=pd.read_excel(ROADS_PARAM)

print('Importations done!')

Importing files...
Importations done!


## Information treatment

### Filter the roads to work on

In [5]:
# Supress non-roads elements
roads_parameters=roads_parameters[roads_parameters['to keep']=='yes']
roads_parameters.drop_duplicates(subset='GDB-Code',inplace=True)       # Keep first by default 

joined_roads=roads.merge(roads_parameters[['GDB-Code','Width']], how='right',left_on='OBJEKTART',right_on='GDB-Code')

### Buffer the roads

In [6]:
# Buffer the roads
buffered_roads=joined_roads.copy()
buffered_roads['buffered_geom']=buffered_roads.buffer(joined_roads['Width'], cap_style=2)


In [7]:
buffered_roads.drop(columns=['geometry'],inplace=True)
buffered_roads.rename(columns={'buffered_geom':'geometry'},inplace=True)

In [8]:
## Do not let roundabouts make artifacts
for idx in buffered_roads.index:
    geom=buffered_roads.loc[idx,'geometry']
    if geom.geom_type == 'MultiPolygon':
        buffered_roads.loc[idx,'geometry'] = max(buffered_roads.loc[idx,'geometry'].geoms, key=lambda a: a.area)
        print(f'ID: {buffered_roads.loc[idx,"OBJECTID"]}')

ID: 314.0
ID: 315.0
ID: 373.0
ID: 374.0
ID: 375.0
ID: 376.0
ID: 1580.0
ID: 1581.0
ID: 1582.0
ID: 25294.0
ID: 25297.0
ID: 25298.0
ID: 25299.0
ID: 26669.0
ID: 1576.0
ID: 1577.0
ID: 1578.0


In [9]:
buffered_roads.shape

(11565, 32)

### Erease overlapping zones of polygons

In [10]:
buffered_roads['saved_geom']=buffered_roads.geometry
joined_roads=gpd.sjoin(buffered_roads,buffered_roads[['OBJECTID','OBJEKTART','saved_geom','geometry']],how='left', lsuffix='1', rsuffix='2')

### Drop excessive rows
intersected=joined_roads[joined_roads['OBJECTID_2'].notna()].copy()
intersected_not_itself=intersected[intersected['OBJECTID_1']!=intersected['OBJECTID_2']].copy()
intersected_roads=intersected_not_itself.drop_duplicates(subset=['OBJECTID_1','OBJECTID_2'])

intersected_roads.reset_index(inplace=True, drop=True)
print(intersected_roads.shape)

(30476, 37)


In [11]:
### Sort the roads so that the widest ones come first
intersected_roads.loc[intersected_roads['OBJEKTART_1']==20,'OBJEKTART_1']=8.5

intersect_other_width=intersected_roads[intersected_roads['OBJEKTART_1']<intersected_roads['OBJEKTART_2']].copy()

intersect_other_width.sort_values(by=['OBJEKTART_1'],inplace=True)
intersect_other_width.loc[intersect_other_width['OBJEKTART_1']==8.5,'OBJEKTART_1']=20

intersect_other_width.reset_index(inplace=True, drop=True)

print(intersect_other_width.shape)

(3478, 37)


In [19]:
### Suppress the overlapping intersection
# from https://stackoverflow.com/questions/71738629/expand-polygons-in-geopandas-so-that-they-do-not-overlap-each-other

corr_overlap1 = buffered_roads.copy()

for idx in tqdm(intersect_other_width.index, total=intersect_other_width.shape[0],
               desc='Suppressing the overlap of roads with different width'):
    
    poly1_id = corr_overlap1.index[corr_overlap1['OBJECTID'] == intersect_other_width.loc[idx,'OBJECTID_1']].values.astype(int)[0]
    poly2_id = corr_overlap1.index[corr_overlap1['OBJECTID'] == intersect_other_width.loc[idx,'OBJECTID_2']].values.astype(int)[0]
    
    corr_overlap1=polygons_diff_without_artifacts(corr_overlap1,poly1_id,poly2_id)

Supressing the overlapping intersections of roads of different width:: 100%|█| 3


In [20]:
corr_overlap1.drop(columns=['saved_geom'],inplace=True)

In [21]:
corr_overlap1.columns

Index(['OBJECTID', 'UUID', 'DATUM_AEND', 'DATUM_ERST', 'ERSTELLUNG',
       'ERSTELLU_1', 'REVISION_J', 'REVISION_M', 'GRUND_AEND', 'HERKUNFT',
       'HERKUNFT_J', 'HERKUNFT_M', 'REVISION_Q', 'OBJEKTART', 'KUNSTBAUTE',
       'WANDERWEGE', 'VERKEHRSBE', 'BEFAHRBARK', 'EROEFFNUNG', 'STUFE',
       'RICHTUNGSG', 'BELAGSART', 'KREISEL', 'EIGENTUEME', 'VERKEHRS_1',
       'NAME', 'TLM_STRASS', 'STRASSENNA', 'SHAPE_Leng', 'GDB-Code', 'Width',
       'geometry'],
      dtype='object')

## Remove overlapping area between roads of the same class

In [22]:
## Remove overlapping area between roads of the same class

save_geom=corr_overlap1.copy()
save_geom['saved_geom']=save_geom.geometry
joined_roads=gpd.sjoin(save_geom,save_geom[['OBJECTID','saved_geom','geometry']],how='left', lsuffix='1', rsuffix='2')

### Drop excessive rows
intersected=joined_roads[joined_roads['OBJECTID_2'].notna()].copy()
intersected_not_itself=intersected[intersected['OBJECTID_1']!=intersected['OBJECTID_2']].copy()
intersected_roads=intersected_not_itself.drop_duplicates(subset=['OBJECTID_1','OBJECTID_2'])

intersected_roads.reset_index(inplace=True, drop=True)
print(intersected_roads.shape)

(29656, 36)


In [24]:
### Get rid of duplicates not on the same row
to_drop=[]
for idx in tqdm(intersected_roads.index, total=intersected_roads.shape[0],
               desc='Erease duplicates from spatial join'):
    ir1_objid=intersected_roads.loc[idx,'OBJECTID_1']
    ir2_objid=intersected_roads.loc[idx,'OBJECTID_2']
    
    for ss_idx in intersected_roads[intersected_roads['OBJECTID_1']==ir2_objid].index:
        
        if ir1_objid==intersected_roads.loc[ss_idx,'OBJECTID_2'] and idx<ss_idx:
            to_drop.append(ss_idx)


Erease duplicates from spatial join:: 100%|█| 29656/29656 [00:32<00:00, 913.92it


In [25]:
intersected_roads.drop(to_drop,inplace=True)

In [26]:
corr_overlap2=corr_overlap1.copy()

### from https://stackoverflow.com/questions/71738629/expand-polygons-in-geopandas-so-that-they-do-not-overlap-each-other
for idx in tqdm(intersected_roads.index, total=intersected_roads.shape[0],
                desc='Suppressing overlap between equivalent roads'):
    
    poly1_id = corr_overlap2.index[corr_overlap2['OBJECTID'] == intersected_roads.loc[idx,'OBJECTID_1']].values.astype(int)[0]
    poly2_id = corr_overlap2.index[corr_overlap2['OBJECTID'] == intersected_roads.loc[idx,'OBJECTID_2']].values.astype(int)[0]
    
    geom1 = corr_overlap2.loc[poly1_id,'geometry']
    geom2 = corr_overlap2.loc[poly2_id,'geometry']

    # Store intermediary results in variable
    diff = geom2 - geom1
    
    if diff.geom_type == 'Polygon':
        temp = geom2 - geom1
        
    elif diff.geom_type == 'MultiPolygon':
        # if a multipolygone is created, only keep the largest part to avoid the following error: https://github.com/geopandas/geopandas/issues/992
        temp = max((geom2 - geom1).geoms, key=lambda a: a.area)

    corr_overlap2=polygons_diff_without_artifacts(corr_overlap2,poly2_id,poly1_id)

    corr_overlap2.loc[poly2_id,'geometry']=temp

Suppressing overlap between equivalent roads: 100%|█| 14828/14828 [00:12<00:00, 


In [27]:
corr_overlap2.columns

Index(['OBJECTID', 'UUID', 'DATUM_AEND', 'DATUM_ERST', 'ERSTELLUNG',
       'ERSTELLU_1', 'REVISION_J', 'REVISION_M', 'GRUND_AEND', 'HERKUNFT',
       'HERKUNFT_J', 'HERKUNFT_M', 'REVISION_Q', 'OBJEKTART', 'KUNSTBAUTE',
       'WANDERWEGE', 'VERKEHRSBE', 'BEFAHRBARK', 'EROEFFNUNG', 'STUFE',
       'RICHTUNGSG', 'BELAGSART', 'KREISEL', 'EIGENTUEME', 'VERKEHRS_1',
       'NAME', 'TLM_STRASS', 'STRASSENNA', 'SHAPE_Leng', 'GDB-Code', 'Width',
       'geometry'],
      dtype='object')

In [28]:
# corr_overlap1.to_file("/mnt/data-01/gsalamin/proj-roadsurf-b/02_Data/processed/shapefiles_gpkg/test_buffer1.shp")

# corr_overlap2.to_file("/mnt/data-01/gsalamin/proj-roadsurf-b/02_Data/processed/shapefiles_gpkg/test_buffer2.shp")

### Make the difference with the forested area

In [29]:
# Do not consider the roads under forest canopy
test_crs(corr_overlap2.crs, forests.crs)

In [30]:
non_forest_roads=corr_overlap2.copy()
non_forest_roads=non_forest_roads.overlay(forests[['UUID','geometry']],how='difference')

# Make a vector of the zones to keep on the pictures

In [None]:
width=roads_parameters['Width'].max()+1

buffered_roads_aoi=joined_roads.copy()
buffered_roads_aoi['buffered_geom']=buffered_roads_aoi.buffer(width)

buffered_roads_aoi.drop(columns=['geometry'],inplace=True)
buffered_roads_aoi.rename(columns={'buffered_geom':'geometry'},inplace=True)

AOI_roads=buffered_roads_aoi.unary_union


In [None]:
test_crs(AOI_roads.crs, forests.crs)
    
geom={'geometry':[x for x in AOI_roads.geoms]}
AOI_roads_no_forest=gpd.GeoDataFrame(geom, crs=roads.crs)
AOI_roads_no_forest=AOI_roads_no_forest.overlay(forests[['UUID','geometry']],how='difference')

# Get the information of the tiles for the aoi

In [None]:
if not DETERMINE_RESTRICTED_AOI:
    AOI_roads_no_forest = gpd.read_file(OUTPUT_DIR +  OUTPUT['output_files']['restricted_AOI'])

In [None]:
test_crs(tiles_swissimages.crs, AOI_roads_no_forest.crs)

In [None]:
tiles_in_restricted_aoi=gpd.sjoin(tiles_swissimages,AOI_roads_no_forest, how='inner')

In [None]:
# We want the images from 2018
tiles_in_restricted_aoi['datenstand']=2018

tiles_in_restricted_aoi.drop(columns=['index_right', 'FID'], inplace=True)
tiles_in_restricted_aoi.reset_index(inplace=True)

In [None]:
print(tiles_swissimages.columns)
print(tiles_in_restricted_aoi.columns)

In [None]:
tiles_in_restricted_aoi.info(memory_usage="deep")

In [None]:
tiles_in_restricted_aoi.to_file("02_Data/processed/json/tiles_aoi.geojson")

In [None]:
# https://github.com/swiss-territorial-data-lab/detector-interface/blob/master/tools/wmts-geoquery/src/wmts-geoquery.py

## Saving files

In [31]:
non_forest_roads.drop(columns=['UUID','GDB-Code'],inplace=True)
non_forest_roads.to_file("/mnt/data-01/gsalamin/proj-roadsurf-b/02_Data/processed/shapefiles_gpkg/non_forest_wide_roads.shp")

In [None]:
# AOI_roads_poly = gpd.GeoSeries([x for x in AOI_roads_no_forest.geoms], crs=roads.crs)

AOI_roads_no_forest.to_file("02_Data/processed/shapefiles_gpkg/restricted_AOI.shp")