In [4]:
import os
import geopandas as gpd
import pandas as pd
import xarray as xr
import numpy as np 
import shapely 
import csv
import ast

import osm_flex.download as dl
import osm_flex.extract as ex
from osm_flex.simplify import remove_contained_points,remove_exact_duplicates,remove_contained_polys
from osm_flex.config import OSM_DATA_DIR,DICT_GEOFABRIK

from tqdm import tqdm

from lonboard import viz
from lonboard.colormap import apply_continuous_cmap
from palettable.colorbrewer.sequential import Blues_9

from pathlib import Path
import pathlib

## Set pathways

In [3]:
# change paths to make it work on your own machine

# Set path to inputdata
hazard_data_path = os.path.abspath(r'\\labsdfs.labs.vu.nl\labsdfs\BETA-IVM-BAZIS\data_catalogue\open_street_map\global_hazards')
fathom_data_path = os.path.abspath(r'\\labsdfs.labs.vu.nl\labsdfs\BETA-IVM-BAZIS\eks510\fathom-global')
shapes_file = 'global_countries_advanced.geofeather'
country_shapefile_path = os.path.join(os.path.abspath(r'\\labsdfs.labs.vu.nl\labsdfs\BETA-IVM-BAZIS\snn490\Datasets\Administrative_boundaries\global_countries_buffer'), shapes_file)
#country_shapefile_path = os.path.abspath(os.path.join(local_path,'Datasets','Administrative_boundaries', 'global_countries_buffer', shapes_file)) #shapefiles with buffer around country

# path to our python scripts
sys.path.append(os.path.join('..','scripts'))

In [5]:
#define paths
p = Path('..')
data_path = Path(pathlib.Path.home().parts[0]) / 'Projects' / 'gmhcira' / 'data' #should contain folder 'Vulnerability' with vulnerability data
flood_data_path = Path(pathlib.Path('Z:') / 'eks510' / 'fathom-global') # Flood data
eq_data_path = Path(pathlib.Path('Z:') / 'data_catalogue' / 'open_street_map' / 'global_hazards' / 'earthquakes') # Earthquake data
landslide_data_path = Path(pathlib.Path('Z:') / 'data_catalogue' / 'open_street_map' / 'global_hazards' / 'landslides') # Landslide data
landslide_data_path = Path(pathlib.Path.home().parts[0]) / 'Projects' / 'gmhcira' / 'data' # Landslide data
cyclone_data_path = Path(pathlib.Path('Z:') / 'data_catalogue' / 'open_street_map' / 'global_hazards' / 'tropical_cyclones') # Cyclone data

## Set variables

In [4]:
hazard_dct = {'coastal_flooding': [2, 5, 10, 25, 50, 100, 250, 500, 1000], 
              'fluvial_flooding': [5, 10, 20, 50, 75, 100, 200, 250, 500, 1000], 
              'earthquakes': [250, 475, 975, 1500, 2475], 
              'tropical_cyclones': [10, 20 , 50, 100, 200, 1000, 2000, 5000, 10000], 
              'landslides': [None]} #keys are hazards, lists are return periods associated with hazard

#and open country geometry file
shape_countries = from_geofeather(country_shapefile_path) #open as geofeather

def soft_overlay():
    shape_countries = from_geofeather(country_shapefile_path) #open as geofeather
    country_shape = shape_countries[shape_countries['ISO_3digit'] == country]
    if country_shape.empty == False: #if ISO_3digit in shape_countries
        spat_tree = pygeos.STRtree(grid_data.geometry)
        grid_data_area = (grid_data.loc[spat_tree.query(country_shape.geometry.iloc[0],predicate='intersects').tolist()]).sort_index(ascending=True) #get grids that overlap with cover_box
        grid_data_area = grid_data_area.reset_index().rename(columns = {'index':'grid_number'}) #get index as column and name column grid_number

NameError: name 'from_geofeather' is not defined

In [None]:
def read_hazard_data(hazard_data_path,hazard_type):
    """
    Read hazard data files for a specific hazard type.
    Arguments:
        *hazard_data_path* (Path): Base directory path where hazard data is stored.
        *hazard_type* (str): Type of hazard for which data needs to be read ('fluvial', 'pluvial', 'windstorm', 'earthquake', 'landslides').
    
    Returns:
        *list*: A list of Path objects representing individual hazard data files for the specified hazard type.
    """  

    if hazard_type == 'fluvial':
        hazard_data = hazard_data_path / 'Jamaica' / 'fluvial_undefended' # need to make country an input
        return list(hazard_data.iterdir())

    elif hazard_type == 'pluvial':
        hazard_data = hazard_data_path / 'Jamaica' / 'pluvial' # need to make country an input
        return list(hazard_data.iterdir())

    elif hazard_type == 'windstorm':
        hazard_data = hazard_data_path 
        return list(hazard_data.iterdir())

    elif hazard_type == 'earthquake':
        hazard_data = hazard_data_path
        return list(hazard_data.iterdir())

    elif hazard_type == 'landslides':
        hazard_data = hazard_data_path 
        return list(hazard_data.iterdir())


def read_flood_map(flood_map_path,diameter_distance=0.00083/2):
    """
    Read flood map data from a NetCDF file and process it into a GeoDataFrame.
    Arguments:
        *flood_map_path* (Path): Path to the NetCDF file containing flood map data.
        *diameter_distance* (float, optional): The diameter distance used for creating square geometries around data points. Default is 0.00083/2.
    
    Returns:
        *geopandas.GeoDataFrame*: A GeoDataFrame representing the processed flood map data.
    """
    
    flood_map = xr.open_dataset(flood_map_path, engine="rasterio")

    flood_map_vector = flood_map['band_data'].to_dataframe().reset_index() #transform to dataframe
    
    #remove data that will not be used
    flood_map_vector = flood_map_vector.loc[(flood_map_vector.band_data > 0) & (flood_map_vector.band_data < 100)]
    
    # create geometry values and drop lat lon columns
    flood_map_vector['geometry'] = [shapely.points(x) for x in list(zip(flood_map_vector['x'],flood_map_vector['y']))]
    flood_map_vector = flood_map_vector.drop(['x','y','band','spatial_ref'],axis=1)
    
    # drop all non values to reduce size
    flood_map_vector = flood_map_vector.loc[~flood_map_vector['band_data'].isna()].reset_index(drop=True)
    
    # and turn them into squares again:
    flood_map_vector.geometry= shapely.buffer(flood_map_vector.geometry,distance=diameter_distance,cap_style='square').values 

    return flood_map_vector

def read_windstorm_map(windstorm_map_path,bbox):
     
    # load data from NetCDF file
    with xr.open_dataset(flood_map_path) as ds:
        
        # convert data to WGS84 CRS
        ds.rio.write_crs(4326, inplace=True)
        ds = ds.rio.clip_box(minx=bbox[0], miny=bbox[1], maxx=bbox[2], maxy=bbox[3])
        #ds['band_data'] = ds['band_data']/0.88*1.11 #convert 10-min sustained wind speed to 3-s gust wind speed
    
        ds_vector = ds['band_data'].to_dataframe().reset_index() #transform to dataframe
        
        #remove data that will not be used
        ds_vector = ds_vector.loc[(ds_vector.band_data > 0) & (ds_vector.band_data < 100)]
        
        # create geometry values and drop lat lon columns
        ds_vector['geometry'] = [shapely.points(x) for x in list(zip(ds_vector['x'],ds_vector['y']))]
        ds_vector = ds_vector.drop(['x','y','band','spatial_ref'],axis=1)
        ds_vector['geometry'] = shapely.buffer(ds_vector.geometry, distance=0.1/2, cap_style='square').values
    
        return ds_vector

## Flood data (coastal, fluvial and pluvial) - Fathom

In [None]:
for country_full in country_lst:
    file_path_lst = os.path.abspath(os.path.join(dathom_data_path, country_full, 'fluvial_undefended', 'FU_1in{}'.format(rp))) #pathway to file
    hazard_country_df = transform_raster_to_vectorgrid.core(file_path, hazard_type)

## Tropical cyclone data - STORM

In [None]:
basins = ['EP', 'NA', 'NI', 'SI', 'SP', 'WP']
for basin in basins: 
    file_path = os.path.abspath(os.path.join(hazard_data_path, hazard_type, 'STORM_FIXED_RETURN_PERIODS_{}.nc'.format(basin))) #pathway to file
    
    #open file
    #transform to vector files
    #clip per country
    #save as feather

## Earthquakes - GEM?

In [None]:
file_path_lst = [os.path.abspath(os.path.join(hazard_data_path, hazard_type, 'rp_'.format(rp), 'gar17pga{}.tif'.format(rp))) for rp in hazard_dct[hazard_type]] #pathway to file

#transform to vector data
for file_path in file_path_lst:
    hazard_df = transform_raster_to_vectorgrid(file_path,hazard_type)
    print('Hazard data for has been loaded in polygon format for the following hazard type: {}'.format(hazard_type))

    #soft overlay of hazard data with countries
    country_shape = shape_countries[shape_countries['ISO_3digit'] == country]
    if country_shape.empty == False: #if ISO_3digit in shape_countries
        print("Time to overlay and output '{}' hazard data for the following country: {}".format(hazard_type, country))
        spat_tree = pygeos.STRtree(hazard_df.geometry)
        hazard_country_df = (hazard_df.loc[spat_tree.query(country_shape.geometry.iloc[0],predicate='intersects').tolist()]).sort_index(ascending=True) #get grids that overlap with country
        hazard_country_df = hazard_country_df.reset_index() #.rename(columns = {'index':'grid_number'}) #get index as column and name column grid_number

        #save data
        rp = str(hazard_dct[hazard_type][file_path_lst.index(file_path)]) #get return period using index
        Path(os.path.abspath(os.path.join(hazard_data_path, hazard_type, rp))).mkdir(parents=True, exist_ok=True) 
        to_geofeather(hazard_country_df, os.path.join(hazard_data_path, hazard_type, rp, '{}_{}_{}.feather'.format(hazard_type, rp, country)), crs="EPSG:4326") #save as geofeather #save file for each country
        #to_geofeather(hazard_country_df, os.path.join(hazard_data_path, hazard_type, rp, '{}_{}_{}.feather'.format(hazard_type, rp, country)), crs="EPSG:4326") #save as geofeather #save file for each country
        temp_df = functions.transform_to_gpd(hazard_country_df) #transform df to gpd with shapely geometries
        temp_df.to_file(os.path.join(hazard_data_path, hazard_type, rp, '{}_{}_{}.gpkg'.format(hazard_type, rp, country)), layer=' ', driver="GPKG")              
    else:
        print("Country '{}' not specified in file containing shapefiles of countries with ISO_3digit codes. Please check inconsistency".format(country))

## Earthquakes - GEM

In [7]:
    # load data from Excel file
    landslide_map_path = Path(pathlib.Path.home().parts[0]) / 'Projects' / 'gmhcira' / 'data' / 'v2023_2_PGA_rock_475.csv'# Landslide data

    with xr.open_dataset(landslide_map_path) as ds:
        print(ds)


ValueError: did not find a match in any of xarray's currently installed IO backends ['scipy', 'rasterio']. Consider explicitly selecting one of the installed engines via the ``engine`` parameter, or installing additional IO dependencies, see:
https://docs.xarray.dev/en/stable/getting-started-guide/installing.html
https://docs.xarray.dev/en/stable/user-guide/io.html

In [10]:
#https://docs.xarray.dev/en/latest/getting-started-guide/faq.html

import xarray as xr
import pandas as pd

# Load CSV file into pandas DataFrame using the "c" engine
df = pd.read_csv(landslide_map_path)#, engine="c")

# Convert `:py:func:pandas` DataFrame to xarray.Dataset

#ds = xr.Dataset.from_dataframe(df)

# Prints the resulting xarray dataset

print(df)

               lon       lat  PGA-0.002105
0         16.76948 -28.98803       0.05761
1         16.77544 -29.04607       0.05745
2         16.82456 -28.95738       0.05772
3         16.83054 -29.01545       0.05750
4         16.83652 -29.07349       0.05743
...            ...       ...           ...
3613671  178.96487  51.58447       0.73558
3613672  178.97079  51.52631       0.76061
3613673  179.05708  51.49823       0.76937
3613674  179.23502  51.38363       0.82476
3613675  179.40123  51.38524       0.82278

[3613676 rows x 3 columns]


In [None]:
        # convert data to WGS84 CRS
        ds.rio.write_crs(4326, inplace=True)
        ds = ds.rio.clip_box(minx=bbox[0], miny=bbox[1], maxx=bbox[2], maxy=bbox[3])
        #ds['band_data'] = ds['band_data']/0.88*1.11 #convert 10-min sustained wind speed to 3-s gust wind speed
    
        ds_vector = ds['band_data'].to_dataframe().reset_index() #transform to dataframe
        
        #remove data that will not be used
        ds_vector = ds_vector.loc[(ds_vector.band_data > 0) & (ds_vector.band_data < 100)]
        
        # create geometry values and drop lat lon columns
        ds_vector['geometry'] = [shapely.points(x) for x in list(zip(ds_vector['x'],ds_vector['y']))]
        ds_vector = ds_vector.drop(['x','y','band','spatial_ref'],axis=1)
        ds_vector['geometry'] = shapely.buffer(ds_vector.geometry, distance=0.1/2, cap_style='square').values
    
        return ds_vector

## Landslides - ThinkHazard!

In [None]:
landslide_methods = ['LS_RF_Mean','LS_RF_Median','LS_EQ'] #RF = rainfaill triggered, EQ = earthquake-triggered landslides
methods = ['mean', 'median']
for method in methods:
    file_path = os.path.abspath(os.path.join(hazard_data_path, hazard_type, 'LS_RF_{}_1980-2018_COG.tif'.format(method))) #pathway to file
    
    #open file
    #transform to vector files
    #clip per country
    #save as feather