## Load package and seth path

In [1]:
%matplotlib inline

import os,sys
import pandas as pd
import numpy as np
import geopandas as gpd
import rasterio
import rioxarray as rxr
import xarray as xr
#!pip install geopy
#!pip install boltons
from pathlib import Path
from geofeather.pygeos import to_geofeather, from_geofeather
from tqdm import tqdm
from mpl_toolkits.axes_grid1 import make_axes_locatable
from rasterio.plot import show
from IPython.display import display #when printing geodataframes, put it in columns -> use display(df)
from matplotlib.ticker import (MultipleLocator, FormatStrFormatter,
                               AutoMinorLocator, LinearLocator, MaxNLocator)
from pygeos.constructive import normalize
import pygeos
from pgpkg import Geopackage
import matplotlib.pyplot as plt
import copy

sys.path.append("C:\Projects\gmhcira\scripts")
import functions
import preprocessing_hazard

plt.rcParams['figure.figsize'] = [20, 20]

from osgeo import gdal
gdal.SetConfigOption("OSM_CONFIG_FILE", os.path.join("..","osmconf.ini"))



### Set pathways

In [2]:
base_path = os.path.abspath('C:/Users/snn490/surfdrive/Outputs/Exposure/health_polygons') #this path contains all data that's needed as input and will contain directories to export outputs

# Set path to inputdata
osm_data_path = os.path.abspath(os.path.join('C:/Users/snn490/surfdrive/Datasets/OpenStreetMap')) #path to map with infra-gpkg's
hazard_data_path = os.path.abspath(r'\\labsdfs.labs.vu.nl\labsdfs\BETA-IVM-BAZIS\data_catalogue\open_street_map\global_hazards')
fathom_data_path = os.path.abspath(r'\\labsdfs.labs.vu.nl\labsdfs\BETA-IVM-BAZIS\eks510\fathom-global')

shapes_file = 'global_countries_advanced.geofeather'
country_shapefile_path = os.path.join(os.path.abspath(r'\\labsdfs.labs.vu.nl\labsdfs\BETA-IVM-BAZIS\snn490\Datasets\Administrative_boundaries\global_countries_buffer'), shapes_file)
#country_shapefile_path = os.path.abspath(os.path.join(local_path,'Datasets','Administrative_boundaries', 'global_countries_buffer', shapes_file)) #shapefiles with buffer around country

# path to save outputs - automatically made, not necessary to change output pathways
output_infra_path = os.path.abspath(os.path.join(base_path, 'extract')) #save interim calculations

# path to our python scripts
sys.path.append(os.path.join('..','scripts'))

#Create folders for outputs (GPKGs and pngs)
Path(output_infra_path).mkdir(parents=True, exist_ok=True)



### Set variables

In [4]:
hazard_dct = {'coastal_flooding': [2, 5, 10, 25, 50, 100, 250, 500, 1000], 
              'fluvial_flooding': [5, 10, 20, 50, 75, 100, 200, 250, 500, 1000], 
              'earthquakes': [250, 475, 975, 1500, 2475], 
              'tropical_cyclones': [10, 20 , 50, 100, 200, 1000, 2000, 5000, 10000], 
              'landslides': [None]} #keys are hazards, lists are return periods associated with hazard



In [11]:
hazard_dct = {'coastal_flooding': [2, 5, 10, 25, 50, 100, 250, 500, 1000]} #keys are hazards, lists are return periods associated with hazard

hazard_dct = {'landslides': [None]} #keys are hazards, lists are return periods associated with hazard

landslide_methods = ['LS_RF_Mean','LS_RF_Median','LS_EQ'] #RF = rainfaill triggered, EQ = earthquake-triggered landslides

In [22]:
hazard_type = 'landslides'
file_path_lst = [os.path.abspath(os.path.join(hazard_data_path, hazard_type, '{}_1980-2018_COG.tif'.format(landslide_method).replace('LS_EQ_1980-2018_COG.tif', 'LS_EQ'))) for landslide_method in landslide_methods] #pathway to file

In [23]:
file_path_lst

['\\\\labsdfs.labs.vu.nl\\labsdfs\\BETA-IVM-BAZIS\\data_catalogue\\open_street_map\\global_hazards\\landslides\\LS_RF_Mean_1980-2018_COG.tif',
 '\\\\labsdfs.labs.vu.nl\\labsdfs\\BETA-IVM-BAZIS\\data_catalogue\\open_street_map\\global_hazards\\landslides\\LS_RF_Median_1980-2018_COG.tif',
 '\\\\labsdfs.labs.vu.nl\\labsdfs\\BETA-IVM-BAZIS\\data_catalogue\\open_street_map\\global_hazards\\landslides\\LS_EQ']

In [21]:
for landslide_method in landslide_methods:
    print('{}_1980-2018_COG.tif'.format(landslide_method).replace('LS_EQ_1980-2018_COG.tif', 'LS_EQ'))

LS_RF_Mean_1980-2018_COG.tif
LS_RF_Median_1980-2018_COG.tif
LS_EQ


In [24]:
landslide_method

'LS_EQ'

In [28]:
for file_path in file_path_lst: 
    print(landslide_methods[file_path_lst.index(file_path)], file_path)

LS_RF_Mean \\labsdfs.labs.vu.nl\labsdfs\BETA-IVM-BAZIS\data_catalogue\open_street_map\global_hazards\landslides\LS_RF_Mean_1980-2018_COG.tif
LS_RF_Median \\labsdfs.labs.vu.nl\labsdfs\BETA-IVM-BAZIS\data_catalogue\open_street_map\global_hazards\landslides\LS_RF_Median_1980-2018_COG.tif
LS_EQ \\labsdfs.labs.vu.nl\labsdfs\BETA-IVM-BAZIS\data_catalogue\open_street_map\global_hazards\landslides\LS_EQ


## Hazard data pre-processing

In [4]:
shape_countries = from_geofeather(country_shapefile_path) #open as geofeather

for hazard_type in hazard_dct:
    if hazard_type in ['coastal_flooding', 'earthquakes']:
        if hazard_type == 'coastal_flooding':
            file_path_lst = [os.path.abspath(os.path.join(hazard_data_path, hazard_type, 'inuncoast_historical_nosub_hist_rp{:0>4d}_0.tif'.format(rp))) for rp in hazard_dct[hazard_type]] #pathway to file
        elif hazard_type == 'earthquakes':
            file_path_lst = [os.path.abspath(os.path.join(hazard_data_path, hazard_type, 'rp_'.format(rp), 'gar17pga{}.tif'.format(rp))) for rp in hazard_dct[hazard_type]] #pathway to file
        
        #transform to vector data
        for file_path in file_path_lst:
            hazard_df = transform_raster_to_vectorgrid(file_path,hazard_type)
            print('Hazard data for has been loaded in polygon format for the following hazard type: {}'.format(hazard_type))

            #soft overlay of hazard data with countries
            country_shape = shape_countries[shape_countries['ISO_3digit'] == country]
            if country_shape.empty == False: #if ISO_3digit in shape_countries
                print("Time to overlay and output '{}' hazard data for the following country: {}".format(hazard_type, country))
                spat_tree = pygeos.STRtree(hazard_df.geometry)
                hazard_country_df = (hazard_df.loc[spat_tree.query(country_shape.geometry.iloc[0],predicate='intersects').tolist()]).sort_index(ascending=True) #get grids that overlap with country
                hazard_country_df = hazard_country_df.reset_index() #.rename(columns = {'index':'grid_number'}) #get index as column and name column grid_number

                #save data
                rp = str(hazard_dct[hazard_type][file_path_lst.index(file_path)]) #get return period using index
                Path(os.path.abspath(os.path.join(hazard_data_path, hazard_type, rp))).mkdir(parents=True, exist_ok=True) 
                to_geofeather(hazard_country_df, os.path.join(hazard_data_path, hazard_type, rp, '{}_{}_{}.feather'.format(hazard_type, rp, country)), crs="EPSG:4326") #save as geofeather #save file for each country
                #to_geofeather(hazard_country_df, os.path.join(hazard_data_path, hazard_type, rp, '{}_{}_{}.feather'.format(hazard_type, rp, country)), crs="EPSG:4326") #save as geofeather #save file for each country
                temp_df = functions.transform_to_gpd(hazard_country_df) #transform df to gpd with shapely geometries
                temp_df.to_file(os.path.join(hazard_data_path, hazard_type, rp, '{}_{}_{}.gpkg'.format(hazard_type, rp, country)), layer=' ', driver="GPKG")              
            else:
                print("Country '{}' not specified in file containing shapefiles of countries with ISO_3digit codes. Please check inconsistency".format(country))
                
    elif hazard_type == 'fluvial_flooding':
        for country_full in country_lst:
            file_path_lst = os.path.abspath(os.path.join(dathom_data_path, country_full, 'fluvial_undefended', 'FU_1in{}'.format(rp))) #pathway to file
            hazard_country_df = transform_raster_to_vectorgrid.core(file_path, hazard_type)
        
    
    elif hazard_type == 'tropical_cyclones':
        basins = ['EP', 'NA', 'NI', 'SI', 'SP', 'WP']
        for basin in basins: 
            file_path = os.path.abspath(os.path.join(hazard_data_path, hazard_type, 'STORM_FIXED_RETURN_PERIODS_{}.nc'.format(basin))) #pathway to file
            
            #open file
            #transform to vector files
            #clip per country
            #save as feather
            
    elif hazard_type == 'landslides':
        methods = ['mean', 'median']
        for method in methods:
            file_path = os.path.abspath(os.path.join(hazard_data_path, hazard_type, 'LS_RF_{}_1980-2018_COG.tif'.format(method))) #pathway to file
            
            #open file
            #transform to vector files
            #clip per country
            #save as feather
    else:
        print('Hazard data pre-processing failed for {}: code does not exist to pre-process this data'.format(hazard_type))

NameError: name 'transform_raster_to_vectorgrid' is not defined

In [None]:
def soft_overlay():
    shape_countries = from_geofeather(country_shapefile_path) #open as geofeather
    country_shape = shape_countries[shape_countries['ISO_3digit'] == country]
    if country_shape.empty == False: #if ISO_3digit in shape_countries
        spat_tree = pygeos.STRtree(grid_data.geometry)
        grid_data_area = (grid_data.loc[spat_tree.query(country_shape.geometry.iloc[0],predicate='intersects').tolist()]).sort_index(ascending=True) #get grids that overlap with cover_box
        grid_data_area = grid_data_area.reset_index().rename(columns = {'index':'grid_number'}) #get index as column and name column grid_number

## Damage assessment

In [None]:
# import 0.25 degrees global gridded file
for hazard in hazard_lst:
    for rp in rp_lst:
        for country in country_lst:
            #open hazard data at country-level
            #soft clip global gridded file at country-level
            #open pre-processed osm-files at country-level

            for grid in grid_file: 
                for asset in asset_lst:
                    #hard clip assets with grid
                    #overlay asset with hazard data
                    #calculate damage at asset-level
                    #save damages at grid-level --> contains columsn with damages per infrastructure type --> 
                    
                            

In [None]:
## def regional_analysis(country,reg_index,asset_type='roads',hazard='pluvial'):
    """[summary]

    Args:
        country ([type]): [description]
        reg_index ([type]): [description]
        asset_type (str, optional): [description]. Defaults to 'roads'.
        hazard (str, optional): [description]. Defaults to 'pluvial'.

    Returns:
        [type]: [description]
    """ 

try:

    print('{} {} started!'.format(country,reg_index))

    if hazard == 'pluvial':
        haz_short = 'P'
        hazard_data_path = os.path.join('..','hazard_data','pluvial')
        return_periods = [5,10,20,50,75,100,200,250,500,1000]

    elif hazard == 'fluvial':
        haz_short = 'FD'
        hazard_data_path = os.path.join('..','hazard_data','fluvial_defended')
        return_periods = [5,10,20,50,75,100,200,250,500,1000]

    elif hazard == 'coastal':
        haz_short = 'CF'
        hazard_data_path = os.path.join('..','hazard_data','coastal_flooding')
        return_periods = #[XX,XX]

    elif hazard == 'earthquake':
        haz_short = 'EQ'
        hazard_data_path = os.path.join('..','hazard_data','coastal_flooding')
        return_periods = #[XX,XX]

    elif hazard == 'tropical_cyclones':
        haz_short = 'TC'
        hazard_data_path = os.path.join('..','hazard_data','tropical_cyclones')
        return_periods = #[XX,XX]           

    elif hazard == 'landslides':
        haz_short = 'LS'
        hazard_data_path = os.path.join('..','hazard_data','landslides')
        return_periods = [1]


    #check if file is already finished
    if hazard in ['pluvial','fluvial','coastal','earthquake','landslide','tropical_cyclones']:
        if os.path.exists(os.path.join('..','{}_damage'.format(hazard),'{}_{}_{}.csv'.format(country,reg_index,asset_type))):
            return None

    elif hazard in ['wildfires','temperature']:
        if os.path.exists(os.path.join('..','{}_exposure'.format(hazard),'{}_{}_{}.csv'.format(country,reg_index,asset_type))):
            return None    

    # specify list of sub systems
    point_assets = ['health','telecom']
    polygon_assets = ['airports','educational_facilities','waste_solid','waste_water','water_supply']
    line_assets = ['railways','roads']        

    # load curves and maxdam
    curves,maxdam = load_curves_maxdam(data_path=os.path.join('..','data','infra_vulnerability_data.xlsx'))

    #load regions
    lev1_gadm = pd.DataFrame(gpd.read_file(os.path.join(gadm_data_path,'gadm36_levels.gpkg'),layer='level1'))
    lev1_gadm = lev1_gadm.loc[lev1_gadm.GID_0 == country].reset_index()
    lev1_gadm.geometry = pygeos.from_shapely(lev1_gadm.geometry)

    #load asset data
    assets = load_assets(osm_data_path,asset_type,country)
    assets = pygeos.intersection(assets.geometry,lev1_gadm.iloc[reg_index].geometry)

    if (asset_type in line_assets) | (asset_type in point_assets):
        ### get line assets, clip and buffer them
        assets = buffer_assets(assets,buffer_size=100)

    elif asset_type == 'power':
        power_lines = buffer_assets(assets.loc[assets.asset.isin(['cable','minor_cable','line','minor_line'])],buffer_size=100).reset_index(drop=True)
        power_poly = assets.loc[assets.asset.isin(['plant','substation'])].reset_index(drop=True)
        power_points = buffer_assets(assets.loc[assets.asset.isin(['power_tower','power_pole'])],buffer_size=100).reset_index(drop=True)

    print('Asset data loaded for {} in {}'.format(asset_type,country))

    if len(assets) == 0:
        assets.to_csv(os.path.join('..','{}_damage'.format(hazard),'{}_{}.csv'.format(reg_index,asset_type)))
        return None


    #reproject roads
    assets_reg.geometry = reproject_assets(assets_reg,current_crs="epsg:4326",approximate_crs ="epsg:3857")

    #rename roads
    del df_asset

    print('NOTE: {} loaded and clipped to region {}'.format(asset_type,reg_index))

    # set damage presets
    water_depths = np.arange(0,1100,100)
    fragility_values = np.arange(0,1.1,0.1)

    #hazard impacts
    for return_period in tqdm(return_periods,total=len(return_periods),desc='Assessment for {} {} {} {}'.format(country,reg_index,hazard,asset_type)):

        # load hazard data and create spatial index
        if hazard in ['pluvial','fluvial']:
            df_ds = load_feather(os.path.join(hazard_data_path),country,'{}_1in{}.ft'.format(haz_short,return_period))
            grid_size = 0.0008333333333333333868
        elif hazard == 'landslides':
            df_ds = load_feather(os.path.join(hazard_data_path),'landslide_rainfall_trigger.ft')
            grid_size = 0.008333333332237680136
        elif hazard == 'wildfires':
            df_ds = load_feather(os.path.join(hazard_data_path),'csiro_wf_max_fwi_rp{}.ft'.format(return_period))
            grid_size = 0.5
        elif hazard == 'temperature':
            df_ds = load_feather(os.path.join(hazard_data_path),'VITO-EH-{}y.ft'.format(return_period))
            grid_size = 0.08333000000000000129

        sindex_hazard = pygeos.STRtree(df_ds.geometry)

        #get regional hazard data
        hazard_reg = df_ds.iloc[sindex_hazard.query(lev1_gadm.iloc[reg_index].geometry,predicate='intersects')].reset_index() #NOTE lev1 --> grid

        # create squares again from flood points
        hazard_reg.geometry =  pygeos.buffer(hazard_reg.geometry.values,radius=grid_size/2,cap_style='square')

        #reproject
        hazard_reg.geometry = reproject_assets(hazard_reg,current_crs="epsg:4326",approximate_crs ="epsg:3857")

        del df_ds

        #overlay hazards and roads    
        hazard_overlay = overlay_hazard_assets(hazard_reg,assets_reg)

        #calculate damage
        asset_geoms = assets_reg.geometry
        flood_geoms = hazard_reg.geometry.values
        flood_intensities = hazard_reg.hazard_intensity.values

        # load paved file
        paved_ratios = pd.read_csv(os.path.join('..','input_data','paved_ratios.csv'),index_col=[0,1]).loc[country]
        paved_dict = (paved_ratios['paved']/100).to_dict()

        asset_types = assets_reg.assets.values

        # prepare sampling with all possible combinations
        sampling = [[1,2,3,4,5],[0.75,1,1.25],[0,0.2,0.4,0.6,0.8,1]]
        sample_set = (list(itertools.product(*sampling)))

        # simple fragility curve used in Koks et al. (2019)
        if (asset_type == 'roads') | (asset_type == 'railway'): 
            water_depths = np.asarray([0,10,25,50,75,100,150])
            fragility_values = np.asarray([0,0,0.01,0.02,0.05,0.1,0.2])

        if asset_type == 'roads':                
            # paved costs for europe and central asia
            Paved_4L_costs = 1718347/1e3 
            Paved_2L_costs = 1587911/1e3
            Gravel_cost = 26546/1e3

        elif asset_type == 'railways':
            electric = 1000000/1e3 
            nonelectric = 750000/1e3

        if hazard in ['pluvial','fluvial']:

            collect_damage = {}
            for asset_id in (np.unique(hazard_overlay[0])):
                asset_geom = asset_geoms[asset_id]
                road_type = asset_types[asset_id]

                match_hazard_overlays = hazard_overlay[1][hazard_overlay[0]==asset_id]

                local_hazard_geoms = flood_geoms[match_hazard_overlays]
                local_hazard_intensities = flood_intensities[match_hazard_overlays]

                overlay_meters = pygeos.length(pygeos.intersection(local_hazard_geoms,asset_geom))

                uncer_output = []
                for sample in sample_set:
                    damage_ratios = np.interp(local_hazard_intensities*100,water_depths,fragility_values*sample[0])

                    if asset_type == 'roads':
                        uncer_output.append(np.sum(paved_dict[road_type]*(damage_ratios*Paved_4L_costs*overlay_meters*sample[1]*(1-sample[2]) +
                                    damage_ratios*Paved_2L_costs*overlay_meters*sample[1]*(sample[2])) +
                    (1-paved_dict[road_type])*damage_ratios*Gravel_cost*overlay_meters*sample[1]))

                    elif asset_type == 'railways':
                        uncer_output.append(np.sum((damage_ratios*electric*overlay_meters*sample[1]*(1-sample[2]) +
                                    damage_ratios*nonelectric*overlay_meters*sample[1]*(sample[2]))))                     

                uncer_output = np.asarray(uncer_output)

                collect_damage[asset_id] = np.percentile(uncer_output,[0,20,40,50,60,80,100],axis=0)    

            df_damages = pd.DataFrame.from_dict(collect_damage,orient='index',columns = ['{}_perc_1in{}'.format(x,return_period) for x in [0,20,40,50,60,80,100]])
            assets_reg = assets_reg.join(df_damages)

        elif hazard == 'landslides':
            collect_probabilities = {}
            for asset_id in (np.unique(hazard_overlay[0])):
                asset_geom = asset_geoms[asset_id]
                road_type = asset_types[asset_id]

                match_hazard_overlays = hazard_overlay[1][hazard_overlay[0]==asset_id]

                local_hazard_intensities = flood_intensities[match_hazard_overlays]

                collect_probabilities[asset_id] = np.mean(local_hazard_intensities),np.min(local_hazard_intensities),np.max(local_hazard_intensities)

            df_exposure = pd.DataFrame.from_dict(collect_probabilities,orient='index',columns = ['mean','min','max'])
            assets_reg = assets_reg.join(df_exposure)

        elif hazard in ['wildfires','temperature']:

            collect_probabilities = {}
            for asset_id in (np.unique(hazard_overlay[0])):
                asset_geom = asset_geoms[asset_id]
                road_type = asset_types[asset_id]

                match_hazard_overlays = hazard_overlay[1][hazard_overlay[0]==asset_id]

                local_hazard_intensities = flood_intensities[match_hazard_overlays]

                collect_probabilities[asset_id] = np.mean(local_hazard_intensities),np.min(local_hazard_intensities),np.max(local_hazard_intensities)

            df_exposure = pd.DataFrame.from_dict(collect_probabilities,orient='index',columns = ['mean_{}'.format(return_period),'min_{}'.format(return_period),'max_{}'.format(return_period)])
            assets_reg = assets_reg.join(df_exposure)

    if hazard in ['pluvial','fluvial']:
        assets_reg.to_csv(os.path.join('..','{}_damage'.format(hazard),'{}_{}.csv'.format(reg_index,asset_type)))

    elif (hazard == 'landslides') | (hazard == 'wildfires') | (hazard == 'temperature') :
        assets_reg.to_csv(os.path.join('..','{}_exposure'.format(hazard),'{}_{}.csv'.format(reg_index,asset_type)))

except Exception as e: 
    print('{} failed because of {}'.format(reg_index,e))




In [93]:
return_periods = [10]
hazard_type = 'coastal_flooding'
country = 'NLD'

#open files
gridded_world_df = from_geofeather(os.path.join(os.path.abspath(r'\\labsdfs.labs.vu.nl\labsdfs\BETA-IVM-BAZIS\snn490\Outputs\Grid_data'), 'global_grid_010degree.geofeather')) #open gridded world data at country level as geofeather file
#shape_countries = from_geofeather(os.path.join(country_shapefile_path)) #open country boundaries as geofeather file
#open pre-processed osm-files at country-level

for rp in return_periods:
    hazard_df = from_geofeather(os.path.join(hazard_data_path, hazard_type, 'rp_{}'.format(rp), '{}_rp{}_{}.feather'.format(hazard_type, rp, country))) #open hazard data at country level as geofeather file
    hazard_df_geometries = hazard_df['geometry'].to_list()
    hazard_united = normalize(pygeos.coverage_union_all(hazard_df_geometries))
    hazard_united_df = pd.DataFrame({'geometry': [hazard_united]}) # Create DataFrame

    #soft overlay of united hazard data with gridded file to get cisi-level grids that overlap with hazard zone
    if hazard_united_df.empty == False: #if ISO_3digit in shape_countries
        print("Time to overlay and output '{}' hazard data for the following country: {}".format(hazard_type, country))
        spat_tree = pygeos.STRtree(gridded_world_df.geometry)
        gridded_country_df = (gridded_world_df.loc[spat_tree.query(hazard_united_df.geometry.iloc[0],predicate='intersects').tolist()]).sort_index(ascending=True) #get grids that overlap with country
        gridded_country_df = gridded_country_df.reset_index().rename(columns = {'index':'grid_number'}) #get index as column and name column grid_number
        
    ##for each grid, overlay with hazard intensities, and save df hazard intensities within df of all grid cells 
    #print("Time to overlay filtered grid cells with hazard data {} for the following country: {}".format(hazard_type, country))
    #for grid_cell in tqdm(gridded_country_df.itertuples(),total=len(gridded_country_df)):
    #    spat_tree = pygeos.STRtree(hazard_df.geometry)
    #    hazard_grid_df = (hazard_df.loc[spat_tree.query(grid_cell.geometry,predicate='intersects').tolist()]).sort_index(ascending=True) #get grids that overlap with country
    #    #hazard_grid_df = hazard_df.reset_index(drop=True) #.rename(columns = {'index':'grid_number'}) #get index as column and name column grid_number
    

    
    
    
    #save data
    #Path(os.path.abspath(os.path.join(hazard_data_path, hazard_type, 'rp_{}'.format(rp)))).mkdir(parents=True, exist_ok=True) 
    #to_geofeather(gridded_country_df, os.path.join(hazard_data_path, hazard_type, 'rp_{}'.format(rp), '{}_rp{}_{}_test.feather'.format(hazard_type, rp, country)), crs="EPSG:4326") #save as geofeather #save file for each country
    ##to_geofeather(hazard_country_df, os.path.join(hazard_data_path, hazard_type, rp, '{}_{}_{}.feather'.format(hazard_type, rp, country)), crs="EPSG:4326") #save as geofeather #save file for each country
    #temp_df = functions.transform_to_gpd(gridded_country_df) #transform df to gpd with shapely geometries
    #temp_df.to_file(os.path.join(hazard_data_path, hazard_type, 'rp_{}'.format(rp), '{}_rp{}_{}_test.gpkg'.format(hazard_type, rp, country)), layer=' ', driver="GPKG")  
    
    
    
    
    
    
    #be aware to save grid numbers as seperate column!!!! In gridded country file with the hazard intensities
    #organize hazard data per CISI grid


Time to overlay and output 'coastal_flooding' hazard data for the following country: NLD


In [94]:
gridded_country_df

Unnamed: 0,grid_number,geometry
0,623017,"POLYGON ((3.3 51.5, 3.4 51.5, 3.4 51.4, 3.3 51..."
1,623018,"POLYGON ((3.3 51.4, 3.4 51.4, 3.4 51.3, 3.3 51..."
2,623411,"POLYGON ((3.4 51.7, 3.5 51.7, 3.5 51.6, 3.4 51..."
3,623412,"POLYGON ((3.4 51.6, 3.5 51.6, 3.5 51.5, 3.4 51..."
4,623413,"POLYGON ((3.4 51.5, 3.5 51.5, 3.5 51.4, 3.4 51..."
...,...,...
283,639282,"POLYGON ((7 53.3, 7.1 53.3, 7.1 53.2, 7 53.2, ..."
284,639761,"POLYGON ((7.1 53.4, 7.2 53.4, 7.2 53.3, 7.1 53..."
285,639762,"POLYGON ((7.1 53.3, 7.2 53.3, 7.2 53.2, 7.1 53..."
286,639763,"POLYGON ((7.1 53.2, 7.2 53.2, 7.2 53.1, 7.1 53..."


In [174]:
gridded_country_df = gridded_country_df.loc[gridded_country_df['grid_number'] == 629421] 
gridded_country_df = gridded_country_df.assign(hazard_grids='')

hazard_per_grid_dct = {}
print("Time to overlay filtered grid cells with hazard data {} for the following country: {}".format(hazard_type, country))
for grid_cell in tqdm(gridded_country_df.itertuples(),total=len(gridded_country_df)):
    spat_tree = pygeos.STRtree(hazard_df.geometry)
    hazard_grid_df = (hazard_df.loc[spat_tree.query(grid_cell.geometry,predicate='intersects').tolist()]).sort_index(ascending=True) #get grids that overlap with country
    #hazard_grid_df = hazard_df.reset_index(drop=True) #.rename(columns = {'index':'grid_number'}) #get index as column and name column grid_number
    
    import pickle
    #dus grid_cell en hazard_grid_df in OF dictionary of in pd??
    gridded_country_df.loc[grid_cell.Index, 'hazard_grids'] = pickle.dumps(hazard_grid_df.to_dict()) #transform dataset with hazard intensitief for grid into dict --> string
    
    #pd.DataFrame(pickle.loads(gridded_country_df.loc[grid_cell.Index, 'hazard_grids'] )) #and transform pickle dictionairy back to df

100%|████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 34.46it/s]

Time to overlay filtered grid cells with hazard data coastal_flooding for the following country: NLD





Unnamed: 0,index,hazard_intensity,geometry
1848,103725,0.598977,"POLYGON ((4.96 53.3, 4.96 53.2, 4.95 53.2, 4.9..."
1849,103726,1.179299,"POLYGON ((4.97 53.3, 4.97 53.2, 4.96 53.2, 4.9..."
1850,103727,0.592581,"POLYGON ((4.97 53.3, 4.97 53.2, 4.97 53.2, 4.9..."
1851,103728,2.341018,"POLYGON ((4.98 53.3, 4.98 53.2, 4.97 53.2, 4.9..."
1890,103854,2.991494,"POLYGON ((4.92 53.3, 4.92 53.2, 4.91 53.2, 4.9..."
1891,103855,1.933878,"POLYGON ((4.93 53.3, 4.93 53.2, 4.93 53.2, 4.9..."
1892,103856,0.547952,"POLYGON ((4.94 53.3, 4.94 53.2, 4.93 53.2, 4.9..."
1893,103857,0.706858,"POLYGON ((4.95 53.3, 4.95 53.2, 4.94 53.2, 4.9..."
1894,103858,0.440429,"POLYGON ((4.96 53.3, 4.96 53.2, 4.95 53.2, 4.9..."
1895,103859,2.126787,"POLYGON ((4.98 53.3, 4.98 53.2, 4.97 53.2, 4.9..."


In [168]:
#https://www.delftstack.com/howto/python/dict-to-string-in-python/
import pickle
dict = {'Hello': 60, 'World': 100}
s = pickle.dumps(dict)
print(s)
d = pickle.loads(s)
print(d)

b'\x80\x04\x95\x19\x00\x00\x00\x00\x00\x00\x00}\x94(\x8c\x05Hello\x94K<\x8c\x05World\x94Kdu.'
{'Hello': 60, 'World': 100}


In [173]:
#https://www.delftstack.com/howto/python/dict-to-string-in-python/
s = pickle.dumps(hazard_grid_df.to_dict())
print(s)
d = pickle.loads(s)
print(d)
pd.DataFrame(d) # and back to DataFrame

b'\x80\x04\x95\x8a\x0b\x00\x00\x00\x00\x00\x00}\x94(\x8c\x05index\x94}\x94(M8\x07J-\x95\x01\x00M9\x07J.\x95\x01\x00M:\x07J/\x95\x01\x00M;\x07J0\x95\x01\x00Mb\x07J\xae\x95\x01\x00Mc\x07J\xaf\x95\x01\x00Md\x07J\xb0\x95\x01\x00Me\x07J\xb1\x95\x01\x00Mf\x07J\xb2\x95\x01\x00Mg\x07J\xb3\x95\x01\x00M\x92\x07J2\x96\x01\x00M\x93\x07J3\x96\x01\x00M\x94\x07J4\x96\x01\x00M\x95\x07J5\x96\x01\x00M\xc2\x07J\xa1\x96\x01\x00M\xc3\x07J\xa2\x96\x01\x00M\xef\x07J\x19\x97\x01\x00M\xf0\x07J\x1a\x97\x01\x00M\xf1\x07J\x1b\x97\x01\x00M\xf2\x07J\x1c\x97\x01\x00M\x1b\x08J\xa6\x97\x01\x00M\x1c\x08J\xa7\x97\x01\x00M\xd9\x08J\x9d\x9a\x01\x00u\x8c\x10hazard_intensity\x94}\x94(M8\x07G?\xe3*\xd1\xe0\x00\x00\x00M9\x07G?\xf2\xdei \x00\x00\x00M:\x07G?\xe2\xf6k\xc0\x00\x00\x00M;\x07G@\x02\xbag\xc0\x00\x00\x00Mb\x07G@\x07\xee\x94\x80\x00\x00\x00Mc\x07G?\xfe\xf1)\xa0\x00\x00\x00Md\x07G?\xe1\x88\xd1\xe0\x00\x00\x00Me\x07G?\xe6\x9e\x94`\x00\x00\x00Mf\x07G?\xdc/\xfb\x80\x00\x00\x00Mg\x07G@\x01\x03\xa9\x00\x00\x00\x00M\x92\x07G

Unnamed: 0,index,hazard_intensity,geometry
1848,103725,0.598977,"POLYGON ((4.96 53.3, 4.96 53.2, 4.95 53.2, 4.9..."
1849,103726,1.179299,"POLYGON ((4.97 53.3, 4.97 53.2, 4.96 53.2, 4.9..."
1850,103727,0.592581,"POLYGON ((4.97 53.3, 4.97 53.2, 4.97 53.2, 4.9..."
1851,103728,2.341018,"POLYGON ((4.98 53.3, 4.98 53.2, 4.97 53.2, 4.9..."
1890,103854,2.991494,"POLYGON ((4.92 53.3, 4.92 53.2, 4.91 53.2, 4.9..."
1891,103855,1.933878,"POLYGON ((4.93 53.3, 4.93 53.2, 4.93 53.2, 4.9..."
1892,103856,0.547952,"POLYGON ((4.94 53.3, 4.94 53.2, 4.93 53.2, 4.9..."
1893,103857,0.706858,"POLYGON ((4.95 53.3, 4.95 53.2, 4.94 53.2, 4.9..."
1894,103858,0.440429,"POLYGON ((4.96 53.3, 4.96 53.2, 4.95 53.2, 4.9..."
1895,103859,2.126787,"POLYGON ((4.98 53.3, 4.98 53.2, 4.97 53.2, 4.9..."


In [170]:
#https://www.delftstack.com/howto/python/dict-to-string-in-python/

dict = hazard_grid_df.to_dict()
s = pickle.dumps(dict)
print(s)
d = pickle.loads(s)
print(d)
pd.DataFrame(d) # and back to DataFrame

b'\x80\x04\x95\x8a\x0b\x00\x00\x00\x00\x00\x00}\x94(\x8c\x05index\x94}\x94(M8\x07J-\x95\x01\x00M9\x07J.\x95\x01\x00M:\x07J/\x95\x01\x00M;\x07J0\x95\x01\x00Mb\x07J\xae\x95\x01\x00Mc\x07J\xaf\x95\x01\x00Md\x07J\xb0\x95\x01\x00Me\x07J\xb1\x95\x01\x00Mf\x07J\xb2\x95\x01\x00Mg\x07J\xb3\x95\x01\x00M\x92\x07J2\x96\x01\x00M\x93\x07J3\x96\x01\x00M\x94\x07J4\x96\x01\x00M\x95\x07J5\x96\x01\x00M\xc2\x07J\xa1\x96\x01\x00M\xc3\x07J\xa2\x96\x01\x00M\xef\x07J\x19\x97\x01\x00M\xf0\x07J\x1a\x97\x01\x00M\xf1\x07J\x1b\x97\x01\x00M\xf2\x07J\x1c\x97\x01\x00M\x1b\x08J\xa6\x97\x01\x00M\x1c\x08J\xa7\x97\x01\x00M\xd9\x08J\x9d\x9a\x01\x00u\x8c\x10hazard_intensity\x94}\x94(M8\x07G?\xe3*\xd1\xe0\x00\x00\x00M9\x07G?\xf2\xdei \x00\x00\x00M:\x07G?\xe2\xf6k\xc0\x00\x00\x00M;\x07G@\x02\xbag\xc0\x00\x00\x00Mb\x07G@\x07\xee\x94\x80\x00\x00\x00Mc\x07G?\xfe\xf1)\xa0\x00\x00\x00Md\x07G?\xe1\x88\xd1\xe0\x00\x00\x00Me\x07G?\xe6\x9e\x94`\x00\x00\x00Mf\x07G?\xdc/\xfb\x80\x00\x00\x00Mg\x07G@\x01\x03\xa9\x00\x00\x00\x00M\x92\x07G

Unnamed: 0,index,hazard_intensity,geometry
1848,103725,0.598977,"POLYGON ((4.96 53.3, 4.96 53.2, 4.95 53.2, 4.9..."
1849,103726,1.179299,"POLYGON ((4.97 53.3, 4.97 53.2, 4.96 53.2, 4.9..."
1850,103727,0.592581,"POLYGON ((4.97 53.3, 4.97 53.2, 4.97 53.2, 4.9..."
1851,103728,2.341018,"POLYGON ((4.98 53.3, 4.98 53.2, 4.97 53.2, 4.9..."
1890,103854,2.991494,"POLYGON ((4.92 53.3, 4.92 53.2, 4.91 53.2, 4.9..."
1891,103855,1.933878,"POLYGON ((4.93 53.3, 4.93 53.2, 4.93 53.2, 4.9..."
1892,103856,0.547952,"POLYGON ((4.94 53.3, 4.94 53.2, 4.93 53.2, 4.9..."
1893,103857,0.706858,"POLYGON ((4.95 53.3, 4.95 53.2, 4.94 53.2, 4.9..."
1894,103858,0.440429,"POLYGON ((4.96 53.3, 4.96 53.2, 4.95 53.2, 4.9..."
1895,103859,2.126787,"POLYGON ((4.98 53.3, 4.98 53.2, 4.97 53.2, 4.9..."


In [169]:
hazard_grid_df.to_dict()

{'index': {1848: 103725,
  1849: 103726,
  1850: 103727,
  1851: 103728,
  1890: 103854,
  1891: 103855,
  1892: 103856,
  1893: 103857,
  1894: 103858,
  1895: 103859,
  1938: 103986,
  1939: 103987,
  1940: 103988,
  1941: 103989,
  1986: 104097,
  1987: 104098,
  2031: 104217,
  2032: 104218,
  2033: 104219,
  2034: 104220,
  2075: 104358,
  2076: 104359,
  2265: 105117},
 'hazard_intensity': {1848: 0.5989770293235779,
  1849: 1.1792994737625122,
  1850: 0.5925806760787964,
  1851: 2.3410181999206543,
  1890: 2.9914941787719727,
  1891: 1.9338775873184204,
  1892: 0.5479516386985779,
  1893: 0.7068578600883484,
  1894: 0.44042861461639404,
  1895: 2.1267871856689453,
  1938: 1.403645396232605,
  1939: 0.8703636527061462,
  1940: 0.9264361262321472,
  1941: 1.378872275352478,
  1986: 0.16665923595428467,
  1987: 2.3688111305236816,
  2031: 0.45531511306762695,
  2032: 0.16883063316345215,
  2033: 2.1287763118743896,
  2034: 2.7843105792999268,
  2075: 1.5095337629318237,
  2076: 6.35

In [166]:
import ast
ast.literal_eval(test)

SyntaxError: invalid syntax (<unknown>, line 1)

In [160]:
dict = {'Hello': 60}
s = json.dumps(dict)
print(s)
d = json.loads(s)
print(d)

{"Hello": 60}
{'Hello': 60}


In [None]:
json.dumps(dict)

In [151]:
gridded_country_df.loc[grid_cell.Index, 'hazard_grids']

"{'index': {1848: 103725, 1849: 103726, 1850: 103727, 1851: 103728, 1890: 103854, 1891: 103855, 1892: 103856, 1893: 103857, 1894: 103858, 1895: 103859, 1938: 103986, 1939: 103987, 1940: 103988, 1941: 103989, 1986: 104097, 1987: 104098, 2031: 104217, 2032: 104218, 2033: 104219, 2034: 104220, 2075: 104358, 2076: 104359, 2265: 105117}, 'hazard_intensity': {1848: 0.5989770293235779, 1849: 1.1792994737625122, 1850: 0.5925806760787964, 1851: 2.3410181999206543, 1890: 2.9914941787719727, 1891: 1.9338775873184204, 1892: 0.5479516386985779, 1893: 0.7068578600883484, 1894: 0.44042861461639404, 1895: 2.1267871856689453, 1938: 1.403645396232605, 1939: 0.8703636527061462, 1940: 0.9264361262321472, 1941: 1.378872275352478, 1986: 0.16665923595428467, 1987: 2.3688111305236816, 2031: 0.45531511306762695, 2032: 0.16883063316345215, 2033: 2.1287763118743896, 2034: 2.7843105792999268, 2075: 1.5095337629318237, 2076: 6.3562726974487305, 2265: 0.9157838225364685}, 'geometry': {1848: <pygeos.Geometry POLYGON

In [153]:
import json
# using json.loads()
# convert dictionary string to dictionary
res = json.loads(gridded_country_df.loc[grid_cell.Index, 'hazard_grids'])

JSONDecodeError: Expecting property name enclosed in double quotes: line 1 column 2 (char 1)

In [156]:
test = hazard_grid_df.to_dict()

Unnamed: 0,index,hazard_intensity,geometry
1848,103725,0.598977,"POLYGON ((4.96 53.3, 4.96 53.2, 4.95 53.2, 4.9..."
1849,103726,1.179299,"POLYGON ((4.97 53.3, 4.97 53.2, 4.96 53.2, 4.9..."
1850,103727,0.592581,"POLYGON ((4.97 53.3, 4.97 53.2, 4.97 53.2, 4.9..."
1851,103728,2.341018,"POLYGON ((4.98 53.3, 4.98 53.2, 4.97 53.2, 4.9..."
1890,103854,2.991494,"POLYGON ((4.92 53.3, 4.92 53.2, 4.91 53.2, 4.9..."
1891,103855,1.933878,"POLYGON ((4.93 53.3, 4.93 53.2, 4.93 53.2, 4.9..."
1892,103856,0.547952,"POLYGON ((4.94 53.3, 4.94 53.2, 4.93 53.2, 4.9..."
1893,103857,0.706858,"POLYGON ((4.95 53.3, 4.95 53.2, 4.94 53.2, 4.9..."
1894,103858,0.440429,"POLYGON ((4.96 53.3, 4.96 53.2, 4.95 53.2, 4.9..."
1895,103859,2.126787,"POLYGON ((4.98 53.3, 4.98 53.2, 4.97 53.2, 4.9..."


In [124]:
df_store.loc[grid_cell.Index, "{}_count".format(asset_type)] = count.get(key = asset_type)

Unnamed: 0,grid_number,geometry,hazard_grids
126,629421,"POLYGON ((4.9 53.3, 5 53.3, 5 53.2, 4.9 53.2, ...",


In [118]:
gridded_country_df.loc[grid_cell.Index]['hazard_grids']

''

In [119]:
hazard_grid_df

Unnamed: 0,index,hazard_intensity,geometry
1848,103725,0.598977,"POLYGON ((4.96 53.3, 4.96 53.2, 4.95 53.2, 4.9..."
1849,103726,1.179299,"POLYGON ((4.97 53.3, 4.97 53.2, 4.96 53.2, 4.9..."
1850,103727,0.592581,"POLYGON ((4.97 53.3, 4.97 53.2, 4.97 53.2, 4.9..."
1851,103728,2.341018,"POLYGON ((4.98 53.3, 4.98 53.2, 4.97 53.2, 4.9..."
1890,103854,2.991494,"POLYGON ((4.92 53.3, 4.92 53.2, 4.91 53.2, 4.9..."
1891,103855,1.933878,"POLYGON ((4.93 53.3, 4.93 53.2, 4.93 53.2, 4.9..."
1892,103856,0.547952,"POLYGON ((4.94 53.3, 4.94 53.2, 4.93 53.2, 4.9..."
1893,103857,0.706858,"POLYGON ((4.95 53.3, 4.95 53.2, 4.94 53.2, 4.9..."
1894,103858,0.440429,"POLYGON ((4.96 53.3, 4.96 53.2, 4.95 53.2, 4.9..."
1895,103859,2.126787,"POLYGON ((4.98 53.3, 4.98 53.2, 4.97 53.2, 4.9..."


In [100]:
hazard_grid_df

Unnamed: 0,index,hazard_intensity,geometry
1848,103725,0.598977,"POLYGON ((4.96 53.3, 4.96 53.2, 4.95 53.2, 4.9..."
1849,103726,1.179299,"POLYGON ((4.97 53.3, 4.97 53.2, 4.96 53.2, 4.9..."
1850,103727,0.592581,"POLYGON ((4.97 53.3, 4.97 53.2, 4.97 53.2, 4.9..."
1851,103728,2.341018,"POLYGON ((4.98 53.3, 4.98 53.2, 4.97 53.2, 4.9..."
1890,103854,2.991494,"POLYGON ((4.92 53.3, 4.92 53.2, 4.91 53.2, 4.9..."
1891,103855,1.933878,"POLYGON ((4.93 53.3, 4.93 53.2, 4.93 53.2, 4.9..."
1892,103856,0.547952,"POLYGON ((4.94 53.3, 4.94 53.2, 4.93 53.2, 4.9..."
1893,103857,0.706858,"POLYGON ((4.95 53.3, 4.95 53.2, 4.94 53.2, 4.9..."
1894,103858,0.440429,"POLYGON ((4.96 53.3, 4.96 53.2, 4.95 53.2, 4.9..."
1895,103859,2.126787,"POLYGON ((4.98 53.3, 4.98 53.2, 4.97 53.2, 4.9..."


Unnamed: 0,grid_number,geometry
126,629421,"POLYGON ((4.9 53.3, 5 53.3, 5 53.2, 4.9 53.2, ..."


In [99]:
hazard_grid_df

Unnamed: 0,index,hazard_intensity,geometry
1848,103725,0.598977,"POLYGON ((4.96 53.3, 4.96 53.2, 4.95 53.2, 4.9..."
1849,103726,1.179299,"POLYGON ((4.97 53.3, 4.97 53.2, 4.96 53.2, 4.9..."
1850,103727,0.592581,"POLYGON ((4.97 53.3, 4.97 53.2, 4.97 53.2, 4.9..."
1851,103728,2.341018,"POLYGON ((4.98 53.3, 4.98 53.2, 4.97 53.2, 4.9..."
1890,103854,2.991494,"POLYGON ((4.92 53.3, 4.92 53.2, 4.91 53.2, 4.9..."
1891,103855,1.933878,"POLYGON ((4.93 53.3, 4.93 53.2, 4.93 53.2, 4.9..."
1892,103856,0.547952,"POLYGON ((4.94 53.3, 4.94 53.2, 4.93 53.2, 4.9..."
1893,103857,0.706858,"POLYGON ((4.95 53.3, 4.95 53.2, 4.94 53.2, 4.9..."
1894,103858,0.440429,"POLYGON ((4.96 53.3, 4.96 53.2, 4.95 53.2, 4.9..."
1895,103859,2.126787,"POLYGON ((4.98 53.3, 4.98 53.2, 4.97 53.2, 4.9..."


In [84]:
hazard_per_grid_dct = {}

In [87]:
hazard_per_grid_dct[grid_cell] = hazard_grid_df

In [89]:
hazard_per_grid_dct[0]

KeyError: 0

In [83]:
gridded_country_df

Unnamed: 0,grid_number,geometry
126,629421,"POLYGON ((4.9 53.3, 5 53.3, 5 53.2, 4.9 53.2, ..."


In [81]:
hazard_grid_df

Unnamed: 0,index,hazard_intensity,geometry
1848,103725,0.598977,"POLYGON ((4.96 53.3, 4.96 53.2, 4.95 53.2, 4.9..."
1849,103726,1.179299,"POLYGON ((4.97 53.3, 4.97 53.2, 4.96 53.2, 4.9..."
1850,103727,0.592581,"POLYGON ((4.97 53.3, 4.97 53.2, 4.97 53.2, 4.9..."
1851,103728,2.341018,"POLYGON ((4.98 53.3, 4.98 53.2, 4.97 53.2, 4.9..."
1890,103854,2.991494,"POLYGON ((4.92 53.3, 4.92 53.2, 4.91 53.2, 4.9..."
1891,103855,1.933878,"POLYGON ((4.93 53.3, 4.93 53.2, 4.93 53.2, 4.9..."
1892,103856,0.547952,"POLYGON ((4.94 53.3, 4.94 53.2, 4.93 53.2, 4.9..."
1893,103857,0.706858,"POLYGON ((4.95 53.3, 4.95 53.2, 4.94 53.2, 4.9..."
1894,103858,0.440429,"POLYGON ((4.96 53.3, 4.96 53.2, 4.95 53.2, 4.9..."
1895,103859,2.126787,"POLYGON ((4.98 53.3, 4.98 53.2, 4.97 53.2, 4.9..."


In [82]:
hazard_df

Unnamed: 0,index,hazard_intensity,geometry
0,98782,1.737798,"POLYGON ((6.33 53.5, 6.33 53.5, 6.32 53.5, 6.3..."
1,98783,1.799556,"POLYGON ((6.34 53.5, 6.34 53.5, 6.33 53.5, 6.3..."
2,98784,1.351003,"POLYGON ((6.35 53.5, 6.35 53.5, 6.34 53.5, 6.3..."
3,98785,1.658486,"POLYGON ((6.36 53.5, 6.36 53.5, 6.35 53.5, 6.3..."
4,98786,1.692883,"POLYGON ((6.37 53.5, 6.37 53.5, 6.36 53.5, 6.3..."
...,...,...,...
18357,127807,0.476356,"POLYGON ((3.83 51.3, 3.83 51.2, 3.82 51.2, 3.8..."
18358,127808,0.559874,"POLYGON ((3.84 51.3, 3.84 51.2, 3.83 51.2, 3.8..."
18359,127840,0.258580,"POLYGON ((3.82 51.2, 3.82 51.2, 3.81 51.2, 3.8..."
18360,127841,0.424705,"POLYGON ((3.82 51.2, 3.82 51.2, 3.82 51.2, 3.8..."


## Calculate EAD for each country and put damage data in global file