# Import modules

In [1]:
from ntpath import join
import geopandas as gpd
import pandas as pd
from itertools import combinations
import math
import os, sys
import argparse
import yaml

os.chdir('..')

In [4]:
with open('03_Scripts/config.yaml') as fp:
    cfg = yaml.load(fp, Loader=yaml.FullLoader)['prepare_data.py']    #  [os.path.basename(__file__)]


# Task to do
DETERMINE_ROAD_SURFACES = cfg['tasks']['determine_roads_surfaces']
DETERMINE_RESTRICTED_AOI = cfg['tasks']['determine_restricted_AOI']
MAKE_RASTER_MOSAIC = cfg['tasks']['make_raster_mosaic']
DOWNLOAD_TILES=cfg['tasks']['determine_tiles']

if not DETERMINE_ROAD_SURFACES and not DETERMINE_RESTRICTED_AOI and not MAKE_RASTER_MOSAIC and not DOWNLOAD_TILES:
    print('Nothing to do. Exiting!')
    sys.exit(0)
else:

    INPUT = cfg['input']
    INPUT_DIR =INPUT['input_folder']

    ROADS_IN = INPUT_DIR + INPUT['input_files']['roads']
    ROADS_PARAM = INPUT_DIR + INPUT['input_files']['roads_param']
    FORESTS = INPUT_DIR + INPUT['input_files']['forests']
    TILES_SWISSIMAGES = INPUT['input_files']['tiles_swissimages10']

    OUTPUT = cfg['output']
    OUTPUT_DIR = OUTPUT['output_folder']

    if DETERMINE_ROAD_SURFACES:
        ROADS_OUT = OUTPUT_DIR +  OUTPUT['output_files']['roads']

    if DETERMINE_RESTRICTED_AOI:
        RESTRICTED_AOI = OUTPUT_DIR +  OUTPUT['output_files']['restricted_AOI']
    
    if DOWNLOAD_TILES:
        TILES_AOI = OUTPUT_DIR + OUTPUT['output_files']['tiles_aoi']



# Define functions

In [5]:
def polygons_diff_without_artifacts(polygons, p1_idx, p2_idx):
    # Make the difference of the geometry at row p2_idx with the one at the row p1_idx
    
    # Store intermediary results back to poly
    diff=polygons.loc[p2_idx,'geometry']-polygons.loc[p1_idx,'geometry']

    if diff.geom_type == 'Polygon':
        polygons.loc[p2_idx,'geometry'] -= polygons.loc[p1_idx,'geometry']

    elif diff.geom_type == 'MultiPolygon':
        # if a multipolygone is created, only keep the largest part to avoid the following error: https://github.com/geopandas/geopandas/issues/992
        polygons.loc[p2_idx,'geometry'] = max((polygons.loc[p2_idx,'geometry']-polygons.loc[p1_idx,'geometry']).geoms, key=lambda a: a.area)

    return polygons

def test_crs(crs1,crs2 = "EPSG:2056"):
    try:
        assert(crs1 == crs2), "CRS mismatch between the roads file and the forests file."
    except Exception as e:
        print(e)
        sys.exit(1)

# Main
## Import files

In [6]:
# Import files ------------------------------------------------------------------------------------------
print('Importing files...')
## Data
roads=gpd.read_file(ROADS_IN)
forests=gpd.read_file(FORESTS)
tiles_swissimages=gpd.read_file(TILES_SWISSIMAGES)

## Other informations
roads_parameters=pd.read_excel(ROADS_PARAM)

print('Importations done!')

Importing files...
Importations done!


## Information treatment

### Filter the roads to work on

In [7]:
# Supress non-roads elements
roads_parameters=roads_parameters[roads_parameters['to keep']=='yes']
roads_parameters.drop_duplicates(subset='GDB-Code',inplace=True)       # Keep first by default 

joined_roads=roads.merge(roads_parameters[['GDB-Code','Width']], how='right',left_on='OBJEKTART',right_on='GDB-Code')

### Buffer the roads

In [None]:
# Buffer the roads
buffered_roads=joined_roads.copy()
buffered_roads['buffered_geom']=buffered_roads.buffer(joined_roads['Width'], cap_style=2)


In [None]:
buffered_roads.drop(columns=['geometry'],inplace=True)
buffered_roads.rename(columns={'buffered_geom':'geometry'},inplace=True)

In [None]:
## Do not let roundabouts make artifacts
for idx in buffered_roads.index:
    geom=buffered_roads.loc[idx,'geometry']
    if geom.geom_type == 'MultiPolygon':
        buffered_roads.loc[idx,'geometry'] = max(buffered_roads.loc[idx,'geometry'].geoms, key=lambda a: a.area)
        print(f'ID: {buffered_roads.loc[idx,"OBJECTID"]}')

### Erease overlapping zones of polygons

In [None]:
## Get the features that intersect with a different class of roads
intersections=gpd.overlay(buffered_roads[['OBJECTID','geometry','OBJEKTART']],buffered_roads,how='intersection')

In [None]:
intersect_others_dupli=intersections[intersections['OBJECTID_1']!=intersections['OBJECTID_2']].copy()
road_diff_width=intersect_others_dupli[intersect_others_dupli['OBJEKTART_1']!=intersect_others_dupli['OBJEKTART_2']].copy()
intersect_others=road_diff_width.drop_duplicates(subset=['OBJECTID_1'])


In [None]:
id_to_test=intersect_others['OBJECTID_1'].tolist()

In [None]:
## Sort the dataframe by road size
buffered_roads.loc[buffered_roads['OBJEKTART']==20,'OBJEKTART']=8
buffered_roads.sort_values(by=['OBJEKTART'],inplace=True)
buffered_roads.loc[buffered_roads['OBJEKTART']==8,'OBJEKTART']=20


In [None]:
## Remove th intersection between roads of different classes
poly=buffered_roads.loc[buffered_roads['OBJECTID'].isin(id_to_test),['OBJECTID','geometry','OBJEKTART']].copy()

In [None]:
### from https://stackoverflow.com/questions/71738629/expand-polygons-in-geopandas-so-that-they-do-not-overlap-each-other
iteration=0
nbr_tot_iter=math.comb(poly.shape[0],2)
for p1_idx, p2_idx in combinations(poly.index,2):
    
    if poly.loc[p1_idx,'geometry'].intersects(poly.loc[p2_idx,'geometry']) and poly.at[p1_idx,'OBJEKTART']!=poly.at[p2_idx,'OBJEKTART']:
        
        poly=polygons_diff_without_artifacts(poly,p1_idx,p2_idx)
        
    iteration += 1
    if iteration%1000000==0:
        percentage=iteration/nbr_tot_iter*100
        print(f'{round(percentage)}% done')

print('100% done!')

poly.rename(columns={'geometry':'geometry_cropped'}, inplace=True)
corr_overlap1=buffered_roads.merge(poly,how='left',on='OBJECTID',suffixes=('_org','_cropped'))


In [None]:
### Change the corrected geometry
geom=[]
for idx in corr_overlap1.index:
    
    if not pd.isnull(corr_overlap1.at[idx,'geometry_cropped']):
        geom.append(corr_overlap1.at[idx,'geometry_cropped'])
    else:
        geom.append(corr_overlap1.at[idx,'geometry'])

corr_overlap1.drop(columns={'geometry'},inplace=True)
corr_overlap1['geometry']=geom
corr_overlap1.set_crs(buffered_roads.crs,inplace=True)
corr_overlap1.drop(columns=['OBJEKTART_cropped','geometry_cropped'],inplace=True)

In [None]:
## Remove overlapping area between roads of the same class
road_same_width=intersect_others_dupli[intersect_others_dupli['OBJEKTART_1']==intersect_others_dupli['OBJEKTART_2']].copy()
intersect_others=road_same_width.drop_duplicates(subset=['OBJECTID_1'])

df=corr_overlap1.rename(columns={'OBJEKTART_org':'OBJEKTART'},errors='raise')
id_to_test=intersect_others['OBJECTID_1'].tolist()
poly=df.loc[df['OBJECTID'].isin(id_to_test),['OBJECTID','geometry','OBJEKTART']].copy()

### from https://stackoverflow.com/questions/71738629/expand-polygons-in-geopandas-so-that-they-do-not-overlap-each-other
iteration=0
nbr_iter_tot=math.comb(poly.shape[0],2)
for p1_idx, p2_idx in combinations(poly.index,2):
    
    if poly.loc[p1_idx,'geometry'].intersects(poly.loc[p2_idx,'geometry']) and poly.at[p1_idx,'OBJEKTART']==poly.at[p2_idx,'OBJEKTART']:
        
        # Store intermediary results back to poly
        diff2=poly.loc[p2_idx,'geometry']-poly.loc[p1_idx,'geometry']
        if diff2.geom_type == 'Polygon':
            temp= poly.loc[p2_idx,'geometry']-poly.loc[p1_idx,'geometry']
        elif diff2.geom_type == 'MultiPolygon':
            # if a multipolygone is created, only keep the largest part to avoid the following error: https://github.com/geopandas/geopandas/issues/992
            temp = max((poly.loc[p2_idx,'geometry']-poly.loc[p1_idx,'geometry']).geoms, key=lambda a: a.area)
            
        poly=polygons_diff_without_artifacts(poly,p2_idx,p1_idx)
    
        poly.loc[p2_idx,'geometry']=temp
        
    iteration += 1
    if iteration%1000000==0:
        percentage=iteration/nbr_iter_tot*100
        print(f'{round(percentage,1)}% done')

print('100 % done!')        
        
poly.rename(columns={'geometry':'geometry_cropped'}, inplace=True)
corr_overlap2=df.merge(poly,how='left',on='OBJECTID',suffixes=('_org','_cropped'))

### Change the corrected geometry
geom=[]
for idx in corr_overlap2.index:
    if not pd.isnull(corr_overlap2.at[idx,'geometry_cropped']):
        geom.append(corr_overlap2.at[idx,'geometry_cropped'])
    else:
        geom.append(corr_overlap2.at[idx,'geometry'])

corr_overlap2.drop(columns={'geometry'},inplace=True)
corr_overlap2['geometry']=geom
corr_overlap2.set_crs(buffered_roads.crs,inplace=True)
corr_overlap2.drop(columns=['OBJEKTART_cropped','geometry_cropped'],inplace=True)

In [None]:
'''test=corr_overlap1.rename(columns={'OBJEKTART_org':'OBJEKTART'},errors='raise')
test['saved_geom']=test.geometry

joined_test=gpd.sjoin(test,test[['OBJECTID','saved_geom','geometry']],how='left', lsuffix='1', rsuffix='2')
intersected=joined_test[joined_test['OBJECTID_1']!=joined_test['OBJECTID_2']].copy()
intersected_no_dupl=intersected.drop_duplicates(subset=['OBJECTID_1','OBJECTID_2'])
print(intersected_no_dupl.shape)

intersected_no_dupl['new_geom']=intersected_no_dupl['geometry'].difference(intersected_no_dupl['saved_geom_2'])
intersected_no_dupl.drop(columns={'geometry'},inplace=True)
intersected_no_dupl['geometry']=intersected_no_dupl['new_geom']

intersected_no_dupl.drop(columns={'new_geom','saved_geom_1','index_2','OBJECTID_2','saved_geom_2'},inplace=True)'''

In [None]:
corr_overlap2.to_file("02_Data/processed/shapefiles/test_buffer2.shp")

### Make the difference with the forested area

In [None]:
# Do not consider the roads under forest canopy
test_crs(corr_overlap2.crs, forests.crs)

In [None]:
non_forest_roads=corr_overlap2.copy()
non_forest_roads=non_forest_roads.overlay(forests[['UUID','geometry']],how='difference')

# Make a vector of the zones to keep on the pictures

In [None]:
width=roads_parameters['Width'].max()+1

buffered_roads_aoi=joined_roads.copy()
buffered_roads_aoi['buffered_geom']=buffered_roads_aoi.buffer(width)

buffered_roads_aoi.drop(columns=['geometry'],inplace=True)
buffered_roads_aoi.rename(columns={'buffered_geom':'geometry'},inplace=True)

AOI_roads=buffered_roads_aoi.unary_union


In [None]:
test_crs(AOI_roads.crs, forests.crs)
    
geom={'geometry':[x for x in AOI_roads.geoms]}
AOI_roads_no_forest=gpd.GeoDataFrame(geom, crs=roads.crs)
AOI_roads_no_forest=AOI_roads_no_forest.overlay(forests[['UUID','geometry']],how='difference')

# Get the information of the tiles for the aoi

In [8]:
if not DETERMINE_RESTRICTED_AOI:
    AOI_roads_no_forest = gpd.read_file(OUTPUT_DIR +  OUTPUT['output_files']['restricted_AOI'])

In [40]:
test_crs(tiles_swissimages.crs, AOI_roads_no_forest.crs)

In [41]:
tiles_in_restricted_aoi=gpd.sjoin(tiles_swissimages,AOI_roads_no_forest, how='inner')

In [43]:
# We want the images from 2018
tiles_in_restricted_aoi['datenstand']=2018

tiles_in_restricted_aoi.drop(columns=['index_right', 'FID'], inplace=True)
tiles_in_restricted_aoi.reset_index(inplace=True)

In [44]:
print(tiles_swissimages.columns)
print(tiles_in_restricted_aoi.columns)

Index(['fid', 'id', 'datenstand', 'resolution', 'geometry'], dtype='object')
Index(['index', 'fid', 'id', 'datenstand', 'resolution', 'geometry'], dtype='object')


In [46]:
tiles_in_restricted_aoi.info(memory_usage="deep")

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 828 entries, 0 to 827
Data columns (total 6 columns):
 #   Column      Non-Null Count  Dtype   
---  ------      --------------  -----   
 0   index       828 non-null    int64   
 1   fid         828 non-null    int64   
 2   id          828 non-null    object  
 3   datenstand  828 non-null    int64   
 4   resolution  828 non-null    object  
 5   geometry    828 non-null    geometry
dtypes: geometry(1), int64(3), object(2)
memory usage: 127.1 KB


In [51]:
tiles_in_restricted_aoi.to_file("02_Data/processed/json/tiles_aoi.geojson")

In [None]:
# https://github.com/swiss-territorial-data-lab/detector-interface/blob/master/tools/wmts-geoquery/src/wmts-geoquery.py

## Saving files

In [None]:
non_forest_roads.drop(columns=['UUID','GDB-Code'],inplace=True)
non_forest_roads.to_file("02_Data/processed/shapefiles_gpkg/non_forest_wide_roads.shp")

In [None]:
# AOI_roads_poly = gpd.GeoSeries([x for x in AOI_roads_no_forest.geoms], crs=roads.crs)

AOI_roads_no_forest.to_file("02_Data/processed/shapefiles_gpkg/restricted_AOI.shp")