In [20]:
import os
import xarray as xr
import pandas as pd
import geopandas as gpd
import numpy as np
import shapely
#import cftime
from pathlib import Path
from tqdm import tqdm
import matplotlib.pyplot as plt
import contextily as cx
import re

from pathlib import Path
import pathlib

In [21]:
country = 'GEO'

In [22]:
damage_data_path = Path(pathlib.Path.home().parts[0]) / 'Projects' / 'gmhcira' / 'data' / 'damage' / country
eq_damage_data_path = damage_data_path / 'landslide_eq'
his_rainfall_damage_data_path = damage_data_path / 'landslide_rf_historical'
fut126_rainfall_damage_data_path = damage_data_path / 'landslide_rf_ssp126'
fut585_rainfall_damage_data_path = damage_data_path / 'landslide_rf_ssp585'

admin_path = Path('C:\\Users/snn490/OneDrive - Vrije Universiteit Amsterdam\WorldBank_Projects\SupplyChainECA\data\gadm')
figures_path = Path('C://Users/snn490/OneDrive - Vrije Universiteit Amsterdam/WorldBank_Projects/SupplyChainECA/asset_damages/{}/landslides_figures'.format(country))

In [23]:
#def calculate_risk(road_segment, damages_dict):
#    damages_lst = [damages_dict[rp][damages_dict[rp]['osm_id'] == road_segment]['Partial destruction (0.5)'].iloc[0] for rp in [*damages_dict]]
#    asset_dam_df = pd.DataFrame([1/rp for rp in [*damages_dict]]+[1,1e-10],damages_lst+[0, max(damages_lst)]).reset_index()
#    asset_dam_df.columns = ['damage','prob']
#    asset_dam_df = asset_dam_df.sort_values('prob',ascending=True).reset_index(drop=True)
#    return np.trapz(asset_dam_df.damage.values,asset_dam_df.prob.values) #np.trapz(y,x)

def calculate_risk_vectorized(row):
    damages_lst = row.values
    rps = row.index
    if isinstance((row.index)[0], str): rps = [int(s) for con_rp in rps for s in re.findall(r'\d+', con_rp)]
    prob_values = np.array([1/rp for rp in rps] + [1, 1e-10]) #without design standard of rp 10
    damage_values = np.append(damages_lst, [0, max(damages_lst)])
    sorted_indices = np.argsort(prob_values)
    prob_values = prob_values[sorted_indices]
    damage_values = damage_values[sorted_indices]
    return np.trapz(damage_values, prob_values)

def get_province(road_segment,subnational):
    try:
        return subnational.loc[road_segment.geometry.intersects(subnational.geometry)].GID_3.values[0]
    except:
        return None

def handle_zero_values(ead_df, haz_trig_rp_lst):
    """
    Replaces zero values in higher return periods with the value of the previous lower return period,
    if the lower return period has a non-zero value.

    Parameters:
    ead_df (pd.DataFrame): DataFrame containing the 'ead' columns.
    haz_trig_rp_lst (list): List of return periods corresponding to the 'ead' columns.

    Returns:
    pd.DataFrame: Updated DataFrame with zero values replaced.
    """
    # Extract the relevant columns and convert to numpy array for vectorized operations
    values = ead_df[['ead_{}'.format(rp_trig) for rp_trig in haz_trig_rp_lst]].values

    # Iterate over the columns, starting from the second column
    for i in range(1, values.shape[1]):
        # Replace zeros with the previous column's value
        values[:, i] = np.where(values[:, i] == 0, values[:, i-1], values[:, i])

    # Convert the numpy array back to a DataFrame and update the original DataFrame
    ead_df[['ead_{}'.format(rp_trig) for rp_trig in haz_trig_rp_lst]] = values

    # Extract the relevant columns and convert to numpy array for vectorized operations
    values = ead_df[['ead_{}_overlay'.format(rp_trig) for rp_trig in haz_trig_rp_lst]].fillna(0).values

    # Iterate over the columns, starting from the second column
    for i in range(1, values.shape[1]):
        # Replace zeros with the previous column's value
        values[:, i] = np.where(values[:, i] == 0, values[:, i-1], values[:, i])

    # Convert the numpy array back to a DataFrame and update the original DataFrame
    ead_df[['ead_{}_overlay'.format(rp_trig) for rp_trig in haz_trig_rp_lst]] = values

    # Extract the relevant columns and convert to numpy array for vectorized operations
    values = ead_df[['ead_{}_number_landslides'.format(rp_trig) for rp_trig in haz_trig_rp_lst]].fillna(0).values

    # Iterate over the columns, starting from the second column
    for i in range(1, values.shape[1]):
        # Replace zeros with the previous column's value
        values[:, i] = np.where(values[:, i] == 0, values[:, i-1], values[:, i])

    # Convert the numpy array back to a DataFrame and update the original DataFrame
    ead_df[['ead_{}_number_landslides'.format(rp_trig) for rp_trig in haz_trig_rp_lst]] = values 
    
    return ead_df

# Earthquake-triggered 

In [24]:
# read parquets per return period
haz_trig_rp_lst = [475]
landslide_rp_lst = [2.5, 10.0, 20.0, 100.0, 200.0, 1000.0]
road_types_lst = ['unclassified', 'primary', 'secondary', 'tertiary', 'residential', 
                                'trunk', 'trunk_link',  'motorway','motorway_link',  'primary_link','secondary_link', 'tertiary_link','road', 'track' ]
#create df with all unique ID numbers, geometry and column ead
ead_df = gpd.GeoDataFrame(columns=['osm_id', 'asset', 'geometry']+['ead_{}'.format(rp_trig) for rp_trig in haz_trig_rp_lst] 
                          +['ead_{}_overlay'.format(rp_trig) for rp_trig in haz_trig_rp_lst] +['ead_{}_number_landslides'.format(rp_trig) for rp_trig in haz_trig_rp_lst], geometry='geometry')

for rp_trig in haz_trig_rp_lst:
    damage_data_path_list = eq_damage_data_path.iterdir()
    rp_trig_path_list = [path for path in damage_data_path_list if '_trig{}'.format(rp_trig) in str(path)]

    damages_dict = {key: pd.DataFrame() for key in landslide_rp_lst}
    for data_path in rp_trig_path_list:
        df = gpd.read_parquet(data_path)
        damages_dict[df['return_period_landslide'].unique()[0]] = pd.concat([damages_dict[df['return_period_landslide'].unique()[0]], df], ignore_index=True)  #create dictionary with the return period 

    #modify dictionaries 
    non_empty_rps = [key for key, df in damages_dict.items() if not df.empty]
    if non_empty_rps:
        lowest_non_empty_rp = min(non_empty_rps)
        damages_dict = {key: df for key, df in damages_dict.items() if key >= lowest_non_empty_rp} # Step 1: Remove all keys above this return period
        highest_non_empty_rp = max(non_empty_rps)
        damages_dict = {key: df for key, df in damages_dict.items() if key <= highest_non_empty_rp} # Step 2: Remove all keys below this return period
        for rp in damages_dict.keys():
            #print('This is the total damage for landslide return period {} given rainfall event of {}: {:,.2f}; exposed roads: {:.2f} km'.format(rp, rp_trig, damages_dict[rp]['Partial destruction (0.5)'].sum(), damages_dict[rp]['Overlay'].sum()/1000))
            print('This is the total damage for landslide return period {} given rainfall event of {}: {:,.2f}'.format(rp, rp_trig, damages_dict[rp]['Partial destruction (0.5)'].sum()))   
        
        # Calculate EAD per OSM road segment for rainfall event
        combined_df = pd.concat([damages_dict[rp] for rp in damages_dict.keys()]) #merge dataframes into one
        combined_df['Partial destruction (0.5)'] = combined_df['Partial destruction (0.5)'].fillna(0)
        temp_df = combined_df.drop_duplicates(subset=['osm_id']) # remove duplicates
        ead_df = pd.merge(ead_df, temp_df[['osm_id', 'asset', 'geometry']], on=['osm_id', 'asset', 'geometry'], how='outer')
        
        pivoted_damages = combined_df.pivot_table(index='osm_id', columns='return_period_landslide', values='Partial destruction (0.5)', fill_value=0)
        
        print("Unique osm_id in original data:", combined_df['osm_id'].nunique()) # Print unique osm_id from the original DataFrame
        print("Unique osm_id in pivoted data:", pivoted_damages.index.nunique()) # Print unique osm_id from the pivoted DataFrame
        missing_ids = set(combined_df['osm_id']) - set(pivoted_damages.index) # Check if some osm_id values are missing from pivoted_damages
        print("Missing osm_id values:", missing_ids)
        
        ead_df['ead_{}'.format(rp_trig)] = ead_df.apply(lambda row: calculate_risk_vectorized(pivoted_damages.loc[row['osm_id']]) if row['osm_id'] in pivoted_damages.index else 0, axis=1)

        #fill in overlay columns
        overlay_dict = damages_dict[highest_non_empty_rp].set_index('osm_id')['Overlay'].to_dict()
        ead_df['ead_{}_overlay'.format(rp_trig)] = ead_df['osm_id'].map(overlay_dict)

        #fill in number of landslides columns
        number_landslide_dict = damages_dict[highest_non_empty_rp].set_index('osm_id')['number of landslides'].to_dict()
        ead_df['ead_{}_number_landslides'.format(rp_trig)] = ead_df['osm_id'].map(number_landslide_dict)

ead_df = handle_zero_values(ead_df, haz_trig_rp_lst) # Handle 0 values for higher return periods with damages for the lower return periods

# Calculate EAD for earthquake-triggered landslides
ead_df[['ead_{}'.format(rp_trig) for rp_trig in haz_trig_rp_lst]] = ead_df[['ead_{}'.format(rp_trig) for rp_trig in haz_trig_rp_lst]].fillna(0)
temp_df = (ead_df.filter(['osm_id']+['ead_{}'.format(rp_trig) for rp_trig in haz_trig_rp_lst], axis=1)).set_index('osm_id') #create df with only ead columns
ead_df['ead'] = ead_df.apply(lambda row: calculate_risk_vectorized(temp_df.loc[row['osm_id']]), axis=1)
ead_df = ead_df.to_crs(3857)

for rp_trig in haz_trig_rp_lst:
    print('This is the EAD given earthquake event of {}: {:,}'.format(rp_trig, ead_df['ead_{}'.format(rp_trig)].sum()))
print('This is the national EAD for earthquake-triggered landslides: {}'.format(sum(ead_df['ead'])))
print('This is the max EAD for earthquake-triggered landslides for a road segment: {}'.format(max(ead_df['ead'])))

This is the total damage for landslide return period 2.5 given rainfall event of 475: 0.00
This is the total damage for landslide return period 10.0 given rainfall event of 475: 403,832.69
This is the total damage for landslide return period 20.0 given rainfall event of 475: 54,217,955.71
This is the total damage for landslide return period 100.0 given rainfall event of 475: 520,322,413.04
This is the total damage for landslide return period 200.0 given rainfall event of 475: 1,821,415,092.91
This is the total damage for landslide return period 1000.0 given rainfall event of 475: 4,764,047,517.28
Unique osm_id in original data: 175213
Unique osm_id in pivoted data: 175213
Missing osm_id values: set()
This is the EAD given earthquake event of 475: 36,706,243.01489286
This is the national EAD for earthquake-triggered landslides: 18391759.654317692
This is the max EAD for earthquake-triggered landslides for a road segment: 292206.6695288195


In [25]:
# exposed roads
print("This is the road length exposed at national level: {:,.2f}".format(ead_df.ead_475_overlay.sum()/1000))
print("This is the EAD at national level: {:,.2f}".format(ead_df.ead.sum()))

affected_road = ead_df.ead_475_overlay.sum()/1000
damage = ead_df.ead.sum()
print('The expected annual damage, based on length of affected road segment: {:,.2f} dollar per km'.format(damage/affected_road))

This is the road length exposed at national level: 19,067.70
This is the EAD at national level: 18,391,759.65
The expected annual damage, based on length of affected road segment: 964.55 dollar per km


In [None]:
ead_df

In [None]:
subnational_df = gpd.read_file(admin_path / 'gadm41_TJK.gpkg',layer=3)
subnational_df = subnational_df.to_crs(3857)
ead_df['GID_3'] = ead_df.apply(lambda road_segment: get_province(road_segment, subnational), axis=1)
subnational_df = subnational_df.merge(ead_df[['GID_3','ead']].groupby('GID_3').sum(),left_on='GID_3',right_index=True)
subnational_df[['NAME_2','ead']].groupby('NAME_2').sum().sort_values(by='ead',ascending=False)

In [None]:
subnational_df['binned'] =  pd.cut(subnational_df.ead,[0,1e4,2.5e4,5e4,subnational_df.ead.max()],labels=['\\$1-$10,000','\\$10,000-$25,000','\\$25,000-$50,000','> $50,000'])

fig, ax = plt.subplots(1,1,figsize=(10, 10))

subnational_df.plot(column = 'binned',cmap='copper_r',legend=True,ax=ax)

cx.add_basemap(ax, source=cx.providers.CartoDB.Positron,alpha=0.5)
subnational_df.dissolve('GID_0').plot(ax=ax,facecolor="none",edgecolor='black')
ax.set_axis_off()

plt.savefig(figures_path /'landslide_eq_EAD_municipal.png', bbox_inches='tight', dpi=300)

In [None]:
subnational_df.ead.max()

In [None]:
ead_df['damage/km'] = ead_df.ead/(ead_df.ead_475_overlay/1000)
ead_df['damage/km'].max()

In [None]:
# figure EAD damages per km per road segment
ead_df['damage/km_binned'] = pd.cut(ead_df['damage/km'],[0,2.5e2,5e2,1.5e3,ead_df['damage/km'].max()],labels=['\\$1-$250','\\$250-$500','\\$500-$1,500','> $1,500'])
ead_df = gpd.GeoDataFrame(ead_df, geometry='geometry') # Set the GeoDataFrame's geometry column
ead_df = ead_df.to_crs(3857)

fig, ax = plt.subplots(1,1,figsize=(10, 10))
subnational.dissolve('GID_0').plot(ax=ax,facecolor="none",edgecolor='black')

ead_df.plot(column ='damage/km_binned',cmap='copper_r',legend=True,ax=ax)

cx.add_basemap(ax, source=cx.providers.CartoDB.Positron,alpha=0.5)
ax.set_axis_off()

plt.savefig(figures_path /'landslide_eq_EAD_km.png', bbox_inches='tight', dpi=300)

In [None]:
# figure 1/1000 landslide events given a 475 year eq-RP: damages per OSM road segment
damages_dict[1000]['binned'] = pd.cut(damages_dict[1000]['Partial destruction (0.5)'],[0,1e4,1e6,1e7,damages_dict[1000]['Partial destruction (0.5)'].max()],labels=['\\$1-$10,000','\\$10,000-$1,000,000','\\$1,000,000-$10,000,000','> $10,000,000'])
damage_df = damages_dict[1000].to_crs(3857)

fig, ax = plt.subplots(1,1,figsize=(10, 10))
subnational.dissolve('GID_0').plot(ax=ax,facecolor="none",edgecolor='black')

damage_df.plot(column = 'binned',cmap='Reds',legend=True,ax=ax)
#damage_df.plot(column ='Partial destruction (0.5)',cmap='Reds',legend=True,ax=ax)

cx.add_basemap(ax, source=cx.providers.CartoDB.Positron,alpha=0.5)
ax.set_axis_off()

In [None]:
# figure 1/100 landslide events given a 475 year eq-RP: damages per OSM road segment
damages_dict[100]['binned'] = pd.cut(damages_dict[100]['Partial destruction (0.5)'],[0,1e4,1e6,1e7,damages_dict[1000]['Partial destruction (0.5)'].max()],labels=['\\$1-$10,000','\\$10,000-$1,000,000','\\$1,000,000-$10,000,000','> $10,000,000'])
damage_df = damages_dict[100].to_crs(3857)

fig, ax = plt.subplots(1,1,figsize=(10, 10))
subnational.dissolve('GID_0').plot(ax=ax,facecolor="none",edgecolor='black')

damage_df.plot(column = 'binned',cmap='Reds',legend=True,ax=ax)
#damage_df.plot(column ='Partial destruction (0.5)',cmap='Reds',legend=True,ax=ax)

cx.add_basemap(ax, source=cx.providers.CartoDB.Positron,alpha=0.5)
ax.set_axis_off()

In [None]:
# figure 1/20 landslide events given a 475 year eq-RP: damages per OSM road segment
damages_dict[20]['binned'] = pd.cut(damages_dict[20]['Partial destruction (0.5)'],[0,1e4,1e6,1e7,damages_dict[1000]['Partial destruction (0.5)'].max()],labels=['\\$1-$10,000','\\$10,000-$1,000,000','\\$1,000,000-$10,000,000','> $10,000,000'])
damage_df = damages_dict[20].to_crs(3857)

fig, ax = plt.subplots(1,1,figsize=(10, 10))
subnational.dissolve('GID_0').plot(ax=ax,facecolor="none",edgecolor='black')

damage_df.plot(column = 'binned',cmap='Reds',legend=True,ax=ax)
#damage_df.plot(column ='Partial destruction (0.5)',cmap='Reds',legend=True,ax=ax)

cx.add_basemap(ax, source=cx.providers.CartoDB.Positron,alpha=0.5)
ax.set_axis_off()

In [None]:
# figure EAD due to earthquake-triggered landslide events given a 475 year eq-RP: damages per OSM road segment
ead_df['ead_475_binned'] = pd.cut(ead_df['ead_475'],[0,1e3,1e4,2.5e4,ead_df['ead_475'].max()],labels=['\\$1-$1,000','\\$1,000-$10,000','\\$10,000-$25,000','> $25,000'])
ead_df = gpd.GeoDataFrame(ead_df, geometry='geometry') # Set the GeoDataFrame's geometry column
ead_df = ead_df.to_crs(3857)

fig, ax = plt.subplots(1,1,figsize=(10, 10))
subnational.dissolve('GID_0').plot(ax=ax,facecolor="none",edgecolor='black')

ead_df.plot(column ='ead_475_binned',cmap='Reds',legend=True,ax=ax)
#damage_df.plot(column ='Partial destruction (0.5)',cmap='Reds',legend=True,ax=ax)

cx.add_basemap(ax, source=cx.providers.CartoDB.Positron,alpha=0.5)
ax.set_axis_off()

In [None]:
# figure EAD due to earthquake-triggered landslides: damages per OSM road segment
ead_df['ead_binned'] = pd.cut(ead_df['ead'],[0,1e3,1e4,2.5e4,ead_df['ead'].max()],labels=['\\$1-$1,000','\\$1,000-$10,000','\\$10,000-$25,000','> $25,000'])
ead_df = gpd.GeoDataFrame(ead_df, geometry='geometry') # Set the GeoDataFrame's geometry column
ead_df = ead_df.to_crs(3857)

fig, ax = plt.subplots(1,1,figsize=(10, 10))
subnational.dissolve('GID_0').plot(ax=ax,facecolor="none",edgecolor='black')

ead_df.plot(column ='ead_binned',cmap='Reds',legend=True,ax=ax)
#damage_df.plot(column ='Partial destruction (0.5)',cmap='Reds',legend=True,ax=ax)

cx.add_basemap(ax, source=cx.providers.CartoDB.Positron,alpha=0.5)
ax.set_axis_off()

plt.savefig(figures_path /'landslide_eq_EAD.png', bbox_inches='tight', dpi=300)

# Rainfall-triggered 

## Historical conditions

In [26]:
#read parquets per return period
haz_trig_rp_lst = [5, 25, 200, 1000]
landslide_rp_lst = [5.0, 7.0, 10.0, 20.0, 33.0, 50.0, 100.0]
road_types_lst = ['unclassified', 'primary', 'secondary', 'tertiary', 'residential', 
                                'trunk', 'trunk_link',  'motorway','motorway_link',  'primary_link','secondary_link', 'tertiary_link','road', 'track' ]

#create df with all unique ID numbers, geometry and column ead
ead_df = gpd.GeoDataFrame(columns=['osm_id', 'asset', 'geometry']+['ead_{}'.format(rp_trig) for rp_trig in haz_trig_rp_lst] 
                          +['ead_{}_overlay'.format(rp_trig) for rp_trig in haz_trig_rp_lst] +['ead_{}_number_landslides'.format(rp_trig) for rp_trig in haz_trig_rp_lst], geometry='geometry')

for rp_trig in haz_trig_rp_lst:
    damage_data_path_list = his_rainfall_damage_data_path.iterdir()
    rp_trig_path_list = [path for path in damage_data_path_list if '_trig{}'.format(rp_trig) in str(path)]

    damages_dict = {key: pd.DataFrame() for key in landslide_rp_lst}
    for data_path in rp_trig_path_list:
        df = gpd.read_parquet(data_path)
        damages_dict[df['return_period_landslide'].unique()[0]] = pd.concat([damages_dict[df['return_period_landslide'].unique()[0]], df], ignore_index=True)  #create dictionary with the return period 

    #modify dictionaries 
    non_empty_rps = [key for key, df in damages_dict.items() if not df.empty]
    if non_empty_rps:
        lowest_non_empty_rp = min(non_empty_rps)
        damages_dict = {key: df for key, df in damages_dict.items() if key >= lowest_non_empty_rp} # Step 1: Remove all keys above this return period
        highest_non_empty_rp = max(non_empty_rps)
        damages_dict = {key: df for key, df in damages_dict.items() if key <= highest_non_empty_rp} # Step 2: Remove all keys below this return period
        for rp in damages_dict.keys():
            #print('This is the total damage for landslide return period {} given rainfall event of {}: {:,.2f}; exposed roads: {:.2f} km'.format(rp, rp_trig, damages_dict[rp]['Partial destruction (0.5)'].sum(), damages_dict[rp]['Overlay'].sum()/1000))
            print('This is the total damage for landslide return period {} given rainfall event of {}: {:,.2f}'.format(rp, rp_trig, damages_dict[rp]['Partial destruction (0.5)'].sum()))   
        
        # Calculate EAD per OSM road segment for rainfall event
        combined_df = pd.concat([damages_dict[rp] for rp in damages_dict.keys()]) #merge dataframes into one
        combined_df['Partial destruction (0.5)'] = combined_df['Partial destruction (0.5)'].fillna(0)
        temp_df = combined_df.drop_duplicates(subset=['osm_id']) # remove duplicates
        ead_df = pd.merge(ead_df, temp_df[['osm_id', 'asset', 'geometry']], on=['osm_id', 'asset', 'geometry'], how='outer')
        
        pivoted_damages = combined_df.pivot_table(index='osm_id', columns='return_period_landslide', values='Partial destruction (0.5)', fill_value=0)
        
        print("Unique osm_id in original data:", combined_df['osm_id'].nunique()) # Print unique osm_id from the original DataFrame
        print("Unique osm_id in pivoted data:", pivoted_damages.index.nunique()) # Print unique osm_id from the pivoted DataFrame
        missing_ids = set(combined_df['osm_id']) - set(pivoted_damages.index) # Check if some osm_id values are missing from pivoted_damages
        print("Missing osm_id values:", missing_ids)
        
        ead_df['ead_{}'.format(rp_trig)] = ead_df.apply(lambda row: calculate_risk_vectorized(pivoted_damages.loc[row['osm_id']]) if row['osm_id'] in pivoted_damages.index else 0, axis=1)

        #fill in overlay columns
        overlay_dict = damages_dict[highest_non_empty_rp].set_index('osm_id')['Overlay'].to_dict()
        ead_df['ead_{}_overlay'.format(rp_trig)] = ead_df['osm_id'].map(overlay_dict)

        #fill in number of landslides columns
        number_landslide_dict = damages_dict[highest_non_empty_rp].set_index('osm_id')['number of landslides'].to_dict()
        ead_df['ead_{}_number_landslides'.format(rp_trig)] = ead_df['osm_id'].map(number_landslide_dict)

ead_df = handle_zero_values(ead_df, haz_trig_rp_lst) # Handle 0 values for higher return periods with damages for the lower return periods

# Calculate EAD for landslide-triggered landslides
ead_df[['ead_{}'.format(rp_trig) for rp_trig in haz_trig_rp_lst]] = ead_df[['ead_{}'.format(rp_trig) for rp_trig in haz_trig_rp_lst]].fillna(0)
temp_df = (ead_df.filter(['osm_id']+['ead_{}'.format(rp_trig) for rp_trig in haz_trig_rp_lst], axis=1)).set_index('osm_id') #create df with only ead columns
ead_df['ead'] = ead_df.apply(lambda row: calculate_risk_vectorized(temp_df.loc[row['osm_id']]), axis=1)
ead_df = ead_df.to_crs(3857)

for rp_trig in haz_trig_rp_lst:
    print('This is the EAD given rainfall event of {}: {:,}'.format(rp_trig, ead_df['ead_{}'.format(rp_trig)].sum()))
print('This is the national EAD for rainfall-triggered landslides: {}'.format(sum(ead_df['ead'])))
print('This is the max EAD for rainfall-triggered landslides for a road segment: {}'.format(max(ead_df['ead'])))

This is the total damage for landslide return period 20.0 given rainfall event of 5: 1,110,586.54
This is the total damage for landslide return period 33.0 given rainfall event of 5: 10,735,423.25
This is the total damage for landslide return period 50.0 given rainfall event of 5: 75,801,523.45
This is the total damage for landslide return period 100.0 given rainfall event of 5: 418,841,420.82
Unique osm_id in original data: 174224
Unique osm_id in pivoted data: 174224
Missing osm_id values: set()
This is the total damage for landslide return period 10.0 given rainfall event of 25: 34,782,097.68
This is the total damage for landslide return period 20.0 given rainfall event of 25: 194,891,991.45
This is the total damage for landslide return period 33.0 given rainfall event of 25: 1,452,383,602.19
This is the total damage for landslide return period 50.0 given rainfall event of 25: 7,526,826,624.95
Unique osm_id in original data: 175213
Unique osm_id in pivoted data: 175213
Missing osm_i

  values = ead_df[['ead_{}_overlay'.format(rp_trig) for rp_trig in haz_trig_rp_lst]].fillna(0).values
  values = ead_df[['ead_{}_number_landslides'.format(rp_trig) for rp_trig in haz_trig_rp_lst]].fillna(0).values
  ead_df[['ead_{}'.format(rp_trig) for rp_trig in haz_trig_rp_lst]] = ead_df[['ead_{}'.format(rp_trig) for rp_trig in haz_trig_rp_lst]].fillna(0)


This is the EAD given rainfall event of 5: 7,751,663.496123953
This is the EAD given rainfall event of 25: 234,410,034.19110712
This is the EAD given rainfall event of 200: 400,158,077.3380545
This is the EAD given rainfall event of 1000: 0
This is the national EAD for rainfall-triggered landslides: 34578938.338525735
This is the max EAD for rainfall-triggered landslides for a road segment: 275431.59939733293


In [27]:
# exposed roads
print("This is the road length exposed at national level: {:,.2f}".format(ead_df.ead_1000_overlay.sum()/1000))
print("This is the EAD at national level: {:,.2f}".format(ead_df.ead.sum()))

affected_road = ead_df.ead_1000_overlay.sum()/1000
damage = ead_df.ead.sum()
print('The expected annual damage, based on length of affected road segment: {:,.2f} dollar per km'.format(damage/affected_road))

This is the road length exposed at national level: 33,011.83
This is the EAD at national level: 34,578,938.34
The expected annual damage, based on length of affected road segment: 1,047.47 dollar per km


In [None]:
subnational_df = gpd.read_file(admin_path / 'gadm41_TJK.gpkg',layer=3)
subnational_df = subnational_df.to_crs(3857)
ead_df['GID_3'] = ead_df.apply(lambda road_segment: get_province(road_segment, subnational), axis=1)
subnational_df = subnational_df.merge(ead_df[['GID_3','ead']].groupby('GID_3').sum(),left_on='GID_3',right_index=True)
subnational_df[['NAME_2','ead']].groupby('NAME_2').sum().sort_values(by='ead',ascending=False)

In [None]:
subnational_df['binned'] =  pd.cut(subnational_df.ead,[0,1e4,5e4,1.5e5,2.5e5,subnational_df.ead.max()],labels=['\\$1-$10,000','\\$10,000-$50,000','\\$50,000-$150,000',
                                                                                                       '\\$150,000-$250,000','> $250,000'])
fig, ax = plt.subplots(1,1,figsize=(10, 10))

subnational_df.plot(column = 'binned',cmap='copper_r',legend=True,ax=ax)

cx.add_basemap(ax, source=cx.providers.CartoDB.Positron,alpha=0.5)
subnational_df.dissolve('GID_0').plot(ax=ax,facecolor="none",edgecolor='black')
ax.set_axis_off()

plt.savefig(figures_path /'landslide_rf_historical_EAD_municipal.png', bbox_inches='tight', dpi=300)

In [None]:
subnational_df.ead.max()

In [None]:
ead_df['damage/km'] = ead_df.ead/(ead_df.ead_1000_overlay/1000)
ead_df['damage/km'].max()

In [None]:
# figure EAD damages per km per road segment
ead_df['damage/km_binned'] = pd.cut(ead_df['damage/km'],[0,1e3,2.5e3,5.5e3,ead_df['damage/km'].max()],labels=['\\$1-$1,000','\\$1,000-$2,500','\\$2,500-$5,500','> $5,500'])
ead_df = gpd.GeoDataFrame(ead_df, geometry='geometry') # Set the GeoDataFrame's geometry column
ead_df = ead_df.to_crs(3857)

fig, ax = plt.subplots(1,1,figsize=(10, 10))
subnational.dissolve('GID_0').plot(ax=ax,facecolor="none",edgecolor='black')

ead_df.plot(column ='damage/km_binned',cmap='copper_r',legend=True,ax=ax)

cx.add_basemap(ax, source=cx.providers.CartoDB.Positron,alpha=0.5)
ax.set_axis_off()

plt.savefig(figures_path /'landslide_rf_historical_EAD_km.png', bbox_inches='tight', dpi=300)

In [None]:
# figure EAD due to rainfall-triggered landslide events due to 1000 year RP rainfall: damages per OSM road segment
ead_df['ead_1000_binned'] = pd.cut(ead_df['ead_1000'],[0,1e3,1e4,2.5e4,ead_df['ead_1000'].max()],labels=['\\$1-$1,000','\\$1,000-$10,000','\\$10,000-$25,000','> $25,000'])
ead_df = gpd.GeoDataFrame(ead_df, geometry='geometry') # Set the GeoDataFrame's geometry column
ead_df = ead_df.to_crs(3857)

fig, ax = plt.subplots(1,1,figsize=(10, 10))
subnational.dissolve('GID_0').plot(ax=ax,facecolor="none",edgecolor='black')

ead_df.plot(column ='ead_1000_binned',cmap='Reds',legend=True,ax=ax)
#damage_df.plot(column ='Partial destruction (0.5)',cmap='Reds',legend=True,ax=ax)

cx.add_basemap(ax, source=cx.providers.CartoDB.Positron,alpha=0.5)
ax.set_axis_off()

In [None]:
# figure EAD due to rainfall-triggered landslide events due to 25 year RP rainfall: damages per OSM road segment
ead_df['ead_25_binned'] = pd.cut(ead_df['ead_25'],[0,1e3,1e4,2.5e4,ead_df['ead_1000'].max()],labels=['\\$1-$1,000','\\$1,000-$10,000','\\$10,000-$25,000','> $25,000'])
ead_df = gpd.GeoDataFrame(ead_df, geometry='geometry') # Set the GeoDataFrame's geometry column
ead_df = ead_df.to_crs(3857)

fig, ax = plt.subplots(1,1,figsize=(10, 10))
subnational.dissolve('GID_0').plot(ax=ax,facecolor="none",edgecolor='black')

ead_df.plot(column ='ead_25_binned',cmap='Reds',legend=True,ax=ax)
#damage_df.plot(column ='Partial destruction (0.5)',cmap='Reds',legend=True,ax=ax)

cx.add_basemap(ax, source=cx.providers.CartoDB.Positron,alpha=0.5)
ax.set_axis_off()

In [None]:
# figure EAD due to rainfall-triggered landslides: damages per OSM road segment
ead_df['ead_binned'] = pd.cut(ead_df['ead'],[0,1e3,1e4,2.5e4,ead_df['ead'].max()],labels=['\\$1-$1,000','\\$1,000-$10,000','\\$10,000-$25,000','> $25,000'])
ead_df = gpd.GeoDataFrame(ead_df, geometry='geometry') # Set the GeoDataFrame's geometry column
ead_df = ead_df.to_crs(3857)

fig, ax = plt.subplots(1,1,figsize=(10, 10))
subnational.dissolve('GID_0').plot(ax=ax,facecolor="none",edgecolor='black')

ead_df.plot(column ='ead_binned',cmap='Reds',legend=True,ax=ax)
#damage_df.plot(column ='Partial destruction (0.5)',cmap='Reds',legend=True,ax=ax)

cx.add_basemap(ax, source=cx.providers.CartoDB.Positron,alpha=0.5)
ax.set_axis_off()

plt.savefig(figures_path /'landslide_rf_historical_EAD.png', bbox_inches='tight', dpi=300)

In [None]:
ead_df.ead_binned.value_counts()

In [None]:
# project road of interest
dangara_gulliston_ids = dangara_gulliston_ids = ['629623298', '588578885', '667337582', '32623282', '32623136']
project_df = ead_df[ead_df['osm_id'].isin(dangara_gulliston_ids)]

project_df

In [None]:
print(shapely.length(project_df['geometry'])/1000) # length of road segments in km
print(sum(shapely.length(project_df['geometry'])/1000)) # total length of road segments in km

In [None]:
damage = sum(project_df['ead'])
affected_road = sum(project_df['ead_1000_overlay'])/1000
number_of_landslides = sum(project_df['ead_1000_number_landslides'])
road_segments_length = 22.976544 + 20.829034 + 10.498568

print('The expected annual damage for Dangara-Gulliston road project, based on total length of project road: {:.2f} dollar per km'.format(damage/67.34681578690605))
print('The expected annual damage for Dangara-Gulliston road project, based on length of affected OSM road segment: {:.2f} dollar per km'.format(damage/road_segments_length)) 
print('The expected annual damage for Dangara-Gulliston road project, based on length of affected road segment: {:.2f} dollar per km'.format(damage/affected_road)) 
print('The expected annual damage for Dangara-Gulliston road project per landslide event: {:.2f} dollar'.format(damage/number_of_landslides)) #shouldn't we divide by the average landslides per year?

In [None]:
sum(project_df['ead'])

In [None]:
sum(project_df['ead_1000_overlay'])/1000 #affected road segments

## Future conditions SSP126

In [28]:
#read parquets per return period
haz_trig_rp_lst = [5, 25, 200, 1000]
landslide_rp_lst = [5.0, 7.0, 10.0, 20.0, 33.0, 50.0, 100.0]
road_types_lst = ['unclassified', 'primary', 'secondary', 'tertiary', 'residential', 
                                'trunk', 'trunk_link',  'motorway','motorway_link',  'primary_link','secondary_link', 'tertiary_link','road', 'track' ]

#create df with all unique ID numbers, geometry and column ead
ead_df = gpd.GeoDataFrame(columns=['osm_id', 'asset', 'geometry']+['ead_{}'.format(rp_trig) for rp_trig in haz_trig_rp_lst] 
                          +['ead_{}_overlay'.format(rp_trig) for rp_trig in haz_trig_rp_lst] +['ead_{}_number_landslides'.format(rp_trig) for rp_trig in haz_trig_rp_lst], geometry='geometry')

for rp_trig in haz_trig_rp_lst:
    damage_data_path_list = fut126_rainfall_damage_data_path.iterdir()
    rp_trig_path_list = [path for path in damage_data_path_list if '_trig{}'.format(rp_trig) in str(path)]

    damages_dict = {key: pd.DataFrame() for key in landslide_rp_lst}
    for data_path in rp_trig_path_list:
        df = gpd.read_parquet(data_path)
        damages_dict[df['return_period_landslide'].unique()[0]] = pd.concat([damages_dict[df['return_period_landslide'].unique()[0]], df], ignore_index=True)  #create dictionary with the return period 

    #modify dictionaries 
    non_empty_rps = [key for key, df in damages_dict.items() if not df.empty]
    if non_empty_rps:
        lowest_non_empty_rp = min(non_empty_rps)
        damages_dict = {key: df for key, df in damages_dict.items() if key >= lowest_non_empty_rp} # Step 1: Remove all keys above this return period
        highest_non_empty_rp = max(non_empty_rps)
        damages_dict = {key: df for key, df in damages_dict.items() if key <= highest_non_empty_rp} # Step 2: Remove all keys below this return period
        for rp in damages_dict.keys():
            #print('This is the total damage for landslide return period {} given rainfall event of {}: {:,.2f}; exposed roads: {:.2f} km'.format(rp, rp_trig, damages_dict[rp]['Partial destruction (0.5)'].sum(), damages_dict[rp]['Overlay'].sum()/1000))
            print('This is the total damage for landslide return period {} given rainfall event of {}: {:,.2f}'.format(rp, rp_trig, damages_dict[rp]['Partial destruction (0.5)'].sum()))   
        
        # Calculate EAD per OSM road segment for rainfall event
        combined_df = pd.concat([damages_dict[rp] for rp in damages_dict.keys()]) #merge dataframes into one
        combined_df['Partial destruction (0.5)'] = combined_df['Partial destruction (0.5)'].fillna(0)
        temp_df = combined_df.drop_duplicates(subset=['osm_id']) # remove duplicates
        ead_df = pd.merge(ead_df, temp_df[['osm_id', 'asset', 'geometry']], on=['osm_id', 'asset', 'geometry'], how='outer')
        
        pivoted_damages = combined_df.pivot_table(index='osm_id', columns='return_period_landslide', values='Partial destruction (0.5)', fill_value=0)
        
        print("Unique osm_id in original data:", combined_df['osm_id'].nunique()) # Print unique osm_id from the original DataFrame
        print("Unique osm_id in pivoted data:", pivoted_damages.index.nunique()) # Print unique osm_id from the pivoted DataFrame
        missing_ids = set(combined_df['osm_id']) - set(pivoted_damages.index) # Check if some osm_id values are missing from pivoted_damages
        print("Missing osm_id values:", missing_ids)
        
        ead_df['ead_{}'.format(rp_trig)] = ead_df.apply(lambda row: calculate_risk_vectorized(pivoted_damages.loc[row['osm_id']]) if row['osm_id'] in pivoted_damages.index else 0, axis=1)

        #fill in overlay columns
        overlay_dict = damages_dict[highest_non_empty_rp].set_index('osm_id')['Overlay'].to_dict()
        ead_df['ead_{}_overlay'.format(rp_trig)] = ead_df['osm_id'].map(overlay_dict)

        #fill in number of landslides columns
        number_landslide_dict = damages_dict[highest_non_empty_rp].set_index('osm_id')['number of landslides'].to_dict()
        ead_df['ead_{}_number_landslides'.format(rp_trig)] = ead_df['osm_id'].map(number_landslide_dict)

ead_df = handle_zero_values(ead_df, haz_trig_rp_lst) # Handle 0 values for higher return periods with damages for the lower return periods

# Calculate EAD for earthquake-triggered landslides
ead_df[['ead_{}'.format(rp_trig) for rp_trig in haz_trig_rp_lst]] = ead_df[['ead_{}'.format(rp_trig) for rp_trig in haz_trig_rp_lst]].fillna(0)
temp_df = (ead_df.filter(['osm_id']+['ead_{}'.format(rp_trig) for rp_trig in haz_trig_rp_lst], axis=1)).set_index('osm_id')
ead_df['ead'] = ead_df.apply(lambda row: calculate_risk_vectorized(temp_df.loc[row['osm_id']]), axis=1)
ead_df = ead_df.to_crs(3857)

for rp_trig in haz_trig_rp_lst:
    print('This is the EAD given rainfall event of {}: {:,}'.format(rp_trig, ead_df['ead_{}'.format(rp_trig)].sum()))
print('This is the national EAD for rainfall-triggered landslides: {}'.format(sum(ead_df['ead'])))
print('This is the max EAD for rainfall-triggered landslides for a road segment: {}'.format(max(ead_df['ead'])))

This is the total damage for landslide return period 20.0 given rainfall event of 5: 1,110,586.54
This is the total damage for landslide return period 33.0 given rainfall event of 5: 10,875,160.28
This is the total damage for landslide return period 50.0 given rainfall event of 5: 77,387,451.18
This is the total damage for landslide return period 100.0 given rainfall event of 5: 439,990,837.57
Unique osm_id in original data: 174224
Unique osm_id in pivoted data: 174224
Missing osm_id values: set()
This is the total damage for landslide return period 10.0 given rainfall event of 25: 34,782,097.68
This is the total damage for landslide return period 20.0 given rainfall event of 25: 195,470,870.27
This is the total damage for landslide return period 33.0 given rainfall event of 25: 1,476,786,401.08
This is the total damage for landslide return period 50.0 given rainfall event of 25: 7,844,206,436.04
Unique osm_id in original data: 175213
Unique osm_id in pivoted data: 175213
Missing osm_i

  values = ead_df[['ead_{}_overlay'.format(rp_trig) for rp_trig in haz_trig_rp_lst]].fillna(0).values
  values = ead_df[['ead_{}_number_landslides'.format(rp_trig) for rp_trig in haz_trig_rp_lst]].fillna(0).values
  ead_df[['ead_{}'.format(rp_trig) for rp_trig in haz_trig_rp_lst]] = ead_df[['ead_{}'.format(rp_trig) for rp_trig in haz_trig_rp_lst]].fillna(0)


This is the EAD given rainfall event of 5: 8,087,100.369983716
This is the EAD given rainfall event of 25: 242,778,832.3195557
This is the EAD given rainfall event of 200: 416,059,663.5123421
This is the EAD given rainfall event of 1000: 0
This is the national EAD for rainfall-triggered landslides: 35873937.578193165
This is the max EAD for rainfall-triggered landslides for a road segment: 275431.59939733293


In [29]:
# exposed roads
print("This is the road length exposed at national level: {:,.2f}".format(ead_df.ead_1000_overlay.sum()/1000))
print("This is the EAD at national level: {:,.2f}".format(ead_df.ead.sum()))

affected_road = ead_df.ead_1000_overlay.sum()/1000
damage = ead_df.ead.sum()
print('The expected annual damage, based on length of affected road segment: {:,.2f} dollar per km'.format(damage/affected_road))

This is the road length exposed at national level: 34,746.06
This is the EAD at national level: 35,873,937.58
The expected annual damage, based on length of affected road segment: 1,032.46 dollar per km


In [None]:
ead_df 

In [None]:
subnational_df = gpd.read_file(admin_path / 'gadm41_TJK.gpkg',layer=3)
subnational_df = subnational_df.to_crs(3857)
ead_df['GID_3'] = ead_df.apply(lambda road_segment: get_province(road_segment, subnational), axis=1)
subnational_df = subnational_df.merge(ead_df[['GID_3','ead']].groupby('GID_3').sum(),left_on='GID_3',right_index=True)
subnational_df[['NAME_2','ead']].groupby('NAME_2').sum().sort_values(by='ead',ascending=False)

In [None]:
subnational_df['binned'] =  pd.cut(subnational_df.ead,[0,1e4,5e4,1.5e5,3e5,subnational_df.ead.max()],labels=['\\$1-$10,000','\\$10,000-$50,000','\\$50,000-$150,000',
                                                                                                       '\\$150,000-$300,000','> $300,000'])
fig, ax = plt.subplots(1,1,figsize=(10, 10))

subnational_df.plot(column = 'binned',cmap='copper_r',legend=True,ax=ax)

cx.add_basemap(ax, source=cx.providers.CartoDB.Positron,alpha=0.5)
subnational_df.dissolve('GID_0').plot(ax=ax,facecolor="none",edgecolor='black')
ax.set_axis_off()

plt.savefig(figures_path /'landslide_rf_SSP126_EAD_municipal.png', bbox_inches='tight', dpi=300)

In [None]:
subnational_df.ead.max()

In [None]:
ead_df['damage/km'] = ead_df.ead/(ead_df.ead_1000_overlay/1000)
ead_df['damage/km'].max()

In [None]:
# figure EAD damages per km per road segment
ead_df['damage/km_binned'] = pd.cut(ead_df['damage/km'],[0,1e3,3e3,7e3,ead_df['damage/km'].max()],labels=['\\$1-$1,000','\\$1,000-$3,000','\\$3,000-$7,000','> $7,000'])
ead_df = gpd.GeoDataFrame(ead_df, geometry='geometry') # Set the GeoDataFrame's geometry column
ead_df = ead_df.to_crs(3857)

fig, ax = plt.subplots(1,1,figsize=(10, 10))
subnational.dissolve('GID_0').plot(ax=ax,facecolor="none",edgecolor='black')

ead_df.plot(column ='damage/km_binned',cmap='copper_r',legend=True,ax=ax)

cx.add_basemap(ax, source=cx.providers.CartoDB.Positron,alpha=0.5)
ax.set_axis_off()

plt.savefig(figures_path /'landslide_rf_SSP126_EAD_km.png', bbox_inches='tight', dpi=300)

In [None]:
# figure EAD due to rainfall-triggered landslides: damages per OSM road segment
ead_df['ead_binned'] = pd.cut(ead_df['ead'],[0,1e3,1e4,2.5e4,ead_df['ead'].max()],labels=['\\$1-$1,000','\\$1,000-$10,000','\\$10,000-$25,000','> $25,000'])
ead_df = gpd.GeoDataFrame(ead_df, geometry='geometry') # Set the GeoDataFrame's geometry column
ead_df = ead_df.to_crs(3857)

fig, ax = plt.subplots(1,1,figsize=(10, 10))
subnational.dissolve('GID_0').plot(ax=ax,facecolor="none",edgecolor='black')

ead_df.plot(column ='ead_binned',cmap='Reds',legend=True,ax=ax)
#damage_df.plot(column ='Partial destruction (0.5)',cmap='Reds',legend=True,ax=ax)

cx.add_basemap(ax, source=cx.providers.CartoDB.Positron,alpha=0.5)
ax.set_axis_off()

plt.savefig(figures_path /'landslide_rf_ssp126_EAD.png', bbox_inches='tight', dpi=300)

In [None]:
ead_df.ead.max()

In [None]:
ead_df.ead_binned.value_counts()

In [None]:
# project road of interest
dangara_gulliston_ids = dangara_gulliston_ids = ['629623298', '588578885', '667337582', '32623282', '32623136']
project_df = ead_df[ead_df['osm_id'].isin(dangara_gulliston_ids)]

project_df

In [None]:
print(shapely.length(project_df['geometry'])/1000) # length of road segments in km
print(sum(shapely.length(project_df['geometry'])/1000)) # total length of road segments in km

In [None]:
damage = sum(project_df['ead'])
affected_road = sum(project_df['ead_1000_overlay'])/1000
number_of_landslides = sum(project_df['ead_1000_number_landslides'])
road_segments_length = 22.976544 + 20.829034 + 10.498568

print('The expected annual damage for Dangara-Gulliston road project, based on total length of project road: {:.2f} dollar per km'.format(damage/67.34681578690605))
print('The expected annual damage for Dangara-Gulliston road project, based on length of affected OSM road segment: {:.2f} dollar per km'.format(damage/road_segments_length)) 
print('The expected annual damage for Dangara-Gulliston road project, based on length of affected road segment: {:.2f} dollar per km'.format(damage/affected_road)) 
print('The expected annual damage for Dangara-Gulliston road project per landslide event: {:.2f} dollar'.format(damage/number_of_landslides)) #shouldn't we divide by the average landslides per year?

In [None]:
sum(project_df['ead'])

In [None]:
sum(project_df['ead_1000_overlay'])/1000

## Future conditions SSP585

In [30]:
#read parquets per return period
haz_trig_rp_lst = [5, 25, 200, 1000]
landslide_rp_lst = [5.0, 7.0, 10.0, 20.0, 33.0, 50.0, 100.0]
road_types_lst = ['primary']

#create df with all unique ID numbers, geometry and column ead
ead_df = gpd.GeoDataFrame(columns=['osm_id', 'asset', 'geometry']+['ead_{}'.format(rp_trig) for rp_trig in haz_trig_rp_lst] 
                          +['ead_{}_overlay'.format(rp_trig) for rp_trig in haz_trig_rp_lst] +['ead_{}_number_landslides'.format(rp_trig) for rp_trig in haz_trig_rp_lst], geometry='geometry')

for rp_trig in haz_trig_rp_lst:
    damage_data_path_list = fut585_rainfall_damage_data_path.iterdir()
    rp_trig_path_list = [path for path in damage_data_path_list if '_trig{}'.format(rp_trig) in str(path)]

    damages_dict = {key: pd.DataFrame() for key in landslide_rp_lst}
    for data_path in rp_trig_path_list:
        df = gpd.read_parquet(data_path)
        damages_dict[df['return_period_landslide'].unique()[0]] = pd.concat([damages_dict[df['return_period_landslide'].unique()[0]], df], ignore_index=True)  #create dictionary with the return period 

    #modify dictionaries 
    non_empty_rps = [key for key, df in damages_dict.items() if not df.empty]
    if non_empty_rps:
        lowest_non_empty_rp = min(non_empty_rps)
        damages_dict = {key: df for key, df in damages_dict.items() if key >= lowest_non_empty_rp} # Step 1: Remove all keys above this return period
        highest_non_empty_rp = max(non_empty_rps)
        damages_dict = {key: df for key, df in damages_dict.items() if key <= highest_non_empty_rp} # Step 2: Remove all keys below this return period
        for rp in damages_dict.keys():
            #print('This is the total damage for landslide return period {} given rainfall event of {}: {:,.2f}; exposed roads: {:.2f} km'.format(rp, rp_trig, damages_dict[rp]['Partial destruction (0.5)'].sum(), damages_dict[rp]['Overlay'].sum()/1000))
            print('This is the total damage for landslide return period {} given rainfall event of {}: {:,.2f}'.format(rp, rp_trig, damages_dict[rp]['Partial destruction (0.5)'].sum()))   
        
        # Calculate EAD per OSM road segment for rainfall event
        combined_df = pd.concat([damages_dict[rp] for rp in damages_dict.keys()]) #merge dataframes into one
        combined_df['Partial destruction (0.5)'] = combined_df['Partial destruction (0.5)'].fillna(0)
        temp_df = combined_df.drop_duplicates(subset=['osm_id']) # remove duplicates
        ead_df = pd.merge(ead_df, temp_df[['osm_id', 'asset', 'geometry']], on=['osm_id', 'asset', 'geometry'], how='outer')
        
        pivoted_damages = combined_df.pivot_table(index='osm_id', columns='return_period_landslide', values='Partial destruction (0.5)', fill_value=0)
        
        print("Unique osm_id in original data:", combined_df['osm_id'].nunique()) # Print unique osm_id from the original DataFrame
        print("Unique osm_id in pivoted data:", pivoted_damages.index.nunique()) # Print unique osm_id from the pivoted DataFrame
        missing_ids = set(combined_df['osm_id']) - set(pivoted_damages.index) # Check if some osm_id values are missing from pivoted_damages
        print("Missing osm_id values:", missing_ids)
        
        ead_df['ead_{}'.format(rp_trig)] = ead_df.apply(lambda row: calculate_risk_vectorized(pivoted_damages.loc[row['osm_id']]) if row['osm_id'] in pivoted_damages.index else 0, axis=1)

        #fill in overlay columns
        overlay_dict = damages_dict[highest_non_empty_rp].set_index('osm_id')['Overlay'].to_dict()
        ead_df['ead_{}_overlay'.format(rp_trig)] = ead_df['osm_id'].map(overlay_dict)

        #fill in number of landslides columns
        number_landslide_dict = damages_dict[highest_non_empty_rp].set_index('osm_id')['number of landslides'].to_dict()
        ead_df['ead_{}_number_landslides'.format(rp_trig)] = ead_df['osm_id'].map(number_landslide_dict)

ead_df = handle_zero_values(ead_df, haz_trig_rp_lst) # Handle 0 values for higher return periods with damages for the lower return periods

# Calculate EAD for earthquake-triggered landslides
ead_df[['ead_{}'.format(rp_trig) for rp_trig in haz_trig_rp_lst]] = ead_df[['ead_{}'.format(rp_trig) for rp_trig in haz_trig_rp_lst]].fillna(0)
temp_df = (ead_df.filter(['osm_id']+['ead_{}'.format(rp_trig) for rp_trig in haz_trig_rp_lst], axis=1)).set_index('osm_id')
ead_df['ead'] = ead_df.apply(lambda row: calculate_risk_vectorized(temp_df.loc[row['osm_id']]), axis=1)
ead_df = ead_df.to_crs(3857)

for rp_trig in haz_trig_rp_lst:
    print('This is the EAD given rainfall event of {}: {:,}'.format(rp_trig, ead_df['ead_{}'.format(rp_trig)].sum()))
print('This is the national EAD for rainfall-triggered landslides: {}'.format(sum(ead_df['ead'])))
print('This is the max EAD for rainfall-triggered landslides for a road segment: {}'.format(max(ead_df['ead'])))

This is the total damage for landslide return period 20.0 given rainfall event of 5: 1,110,586.54
This is the total damage for landslide return period 33.0 given rainfall event of 5: 10,875,160.28
This is the total damage for landslide return period 50.0 given rainfall event of 5: 77,387,451.18
This is the total damage for landslide return period 100.0 given rainfall event of 5: 439,990,837.57
Unique osm_id in original data: 174224
Unique osm_id in pivoted data: 174224
Missing osm_id values: set()
This is the total damage for landslide return period 10.0 given rainfall event of 25: 55,679,639.80
This is the total damage for landslide return period 20.0 given rainfall event of 25: 272,716,100.19
This is the total damage for landslide return period 33.0 given rainfall event of 25: 1,691,724,216.00
This is the total damage for landslide return period 50.0 given rainfall event of 25: 8,057,225,362.03
Unique osm_id in original data: 175213
Unique osm_id in pivoted data: 175213
Missing osm_i

  values = ead_df[['ead_{}_overlay'.format(rp_trig) for rp_trig in haz_trig_rp_lst]].fillna(0).values
  values = ead_df[['ead_{}_number_landslides'.format(rp_trig) for rp_trig in haz_trig_rp_lst]].fillna(0).values
  ead_df[['ead_{}'.format(rp_trig) for rp_trig in haz_trig_rp_lst]] = ead_df[['ead_{}'.format(rp_trig) for rp_trig in haz_trig_rp_lst]].fillna(0)


This is the EAD given rainfall event of 5: 8,087,100.369983716
This is the EAD given rainfall event of 25: 263,978,859.99890238
This is the EAD given rainfall event of 200: 446,153,137.93574005
This is the EAD given rainfall event of 1000: 0
This is the national EAD for rainfall-triggered landslides: 38542809.76389298
This is the max EAD for rainfall-triggered landslides for a road segment: 696089.7445629857


In [31]:
# exposed roads
print("This is the road length exposed at national level: {:,.2f}".format(ead_df.ead_1000_overlay.sum()/1000))
print("This is the EAD at national level: {:,.2f}".format(ead_df.ead.sum()))

affected_road = ead_df.ead_1000_overlay.sum()/1000
damage = ead_df.ead.sum()
print('The expected annual damage based on length of affected road segment: {:,.2f} dollar per km'.format(damage/affected_road))

This is the road length exposed at national level: 35,801.47
This is the EAD at national level: 38,542,809.76
The expected annual damage based on length of affected road segment: 1,076.57 dollar per km


In [None]:
ead_df 

In [None]:
subnational_df = gpd.read_file(admin_path / 'gadm41_TJK.gpkg',layer=3)
subnational_df = subnational_df.to_crs(3857)
ead_df['GID_3'] = ead_df.apply(lambda road_segment: get_province(road_segment, subnational), axis=1)
subnational_df = subnational_df.merge(ead_df[['GID_3','ead']].groupby('GID_3').sum(),left_on='GID_3',right_index=True)
subnational_df[['NAME_2','ead']].groupby('NAME_2').sum().sort_values(by='ead',ascending=False)

In [None]:
subnational_df['binned'] =  pd.cut(subnational_df.ead,[0,1e4,5e4,1.5e5,3e5,subnational_df.ead.max()],labels=['\\$1-$10,000','\\$10,000-$50,000','\\$50,000-$150,000',
                                                                                                       '\\$150,000-$300,000','> $300,000'])
fig, ax = plt.subplots(1,1,figsize=(10, 10))

subnational_df.plot(column = 'binned',cmap='copper_r',legend=True,ax=ax)

cx.add_basemap(ax, source=cx.providers.CartoDB.Positron,alpha=0.5)
subnational_df.dissolve('GID_0').plot(ax=ax,facecolor="none",edgecolor='black')
ax.set_axis_off()

plt.savefig(figures_path /'landslide_rf_SSP585_EAD_municipal.png', bbox_inches='tight', dpi=300)

In [None]:
subnational_df.ead.max()

In [None]:
ead_df['damage/km'] = ead_df.ead/(ead_df.ead_1000_overlay/1000)
ead_df['damage/km'].max()

In [None]:
# figure EAD damages per km per road segment
ead_df['damage/km_binned'] = pd.cut(ead_df['damage/km'],[0,1e3,3e3,7e3,ead_df['damage/km'].max()],labels=['\\$1-$1,000','\\$1,000-$3,000','\\$3,000-$7,000','> $7,000'])
ead_df = gpd.GeoDataFrame(ead_df, geometry='geometry') # Set the GeoDataFrame's geometry column
ead_df = ead_df.to_crs(3857)

fig, ax = plt.subplots(1,1,figsize=(10, 10))
subnational.dissolve('GID_0').plot(ax=ax,facecolor="none",edgecolor='black')

ead_df.plot(column ='damage/km_binned',cmap='copper_r',legend=True,ax=ax)

cx.add_basemap(ax, source=cx.providers.CartoDB.Positron,alpha=0.5)
ax.set_axis_off()

plt.savefig(figures_path /'landslide_rf_SSP585_EAD_km.png', bbox_inches='tight', dpi=300)

In [None]:
# figure EAD due to rainfall-triggered landslide events due to 1000 year RP rainfall: damages per OSM road segment
ead_df['ead_1000_binned'] = pd.cut(ead_df['ead_1000'],[0,1e3,1e4,2.5e4,ead_df['ead_1000'].max()],labels=['\\$1-$1,000','\\$1,000-$10,000','\\$10,000-$25,000','> $25,000'])
ead_df = gpd.GeoDataFrame(ead_df, geometry='geometry') # Set the GeoDataFrame's geometry column

fig, ax = plt.subplots(1,1,figsize=(10, 10))
subnational.dissolve('GID_0').plot(ax=ax,facecolor="none",edgecolor='black')

ead_df.plot(column ='ead_1000_binned',cmap='Reds',legend=True,ax=ax)
#damage_df.plot(column ='Partial destruction (0.5)',cmap='Reds',legend=True,ax=ax)

cx.add_basemap(ax, source=cx.providers.CartoDB.Positron,alpha=0.5)
ax.set_axis_off()

In [None]:
# figure EAD due to rainfall-triggered landslide events due to 25 year RP rainfall: damages per OSM road segment
ead_df['ead_25_binned'] = pd.cut(ead_df['ead_25'],[0,1e3,1e4,2.5e4,ead_df['ead_1000'].max()],labels=['\\$1-$1,000','\\$1,000-$10,000','\\$10,000-$25,000','> $25,000'])
ead_df = gpd.GeoDataFrame(ead_df, geometry='geometry') # Set the GeoDataFrame's geometry column

fig, ax = plt.subplots(1,1,figsize=(10, 10))
subnational.dissolve('GID_0').plot(ax=ax,facecolor="none",edgecolor='black')

ead_df.plot(column ='ead_25_binned',cmap='Reds',legend=True,ax=ax)
#damage_df.plot(column ='Partial destruction (0.5)',cmap='Reds',legend=True,ax=ax)

cx.add_basemap(ax, source=cx.providers.CartoDB.Positron,alpha=0.5)
ax.set_axis_off()

In [None]:
# figure EAD due to rainfall-triggered landslides: damages per OSM road segment
ead_df['ead_binned'] = pd.cut(ead_df['ead'],[0,1e3,1e4,2.5e4,ead_df['ead'].max()],labels=['\\$1-$1,000','\\$1,000-$10,000','\\$10,000-$25,000','> $25,000'])
ead_df = gpd.GeoDataFrame(ead_df, geometry='geometry') # Set the GeoDataFrame's geometry column
ead_df = ead_df.to_crs(3857)

fig, ax = plt.subplots(1,1,figsize=(10, 10))
subnational.dissolve('GID_0').plot(ax=ax,facecolor="none",edgecolor='black')

ead_df.plot(column ='ead_binned',cmap='Reds',legend=True,ax=ax)
#damage_df.plot(column ='Partial destruction (0.5)',cmap='Reds',legend=True,ax=ax)

cx.add_basemap(ax, source=cx.providers.CartoDB.Positron,alpha=0.5)
ax.set_axis_off()

plt.savefig(figures_path /'landslide_rf_ssp585_EAD.png', bbox_inches='tight', dpi=300)

In [None]:
# project road of interest
dangara_gulliston_ids = dangara_gulliston_ids = ['629623298', '588578885', '667337582', '32623282', '32623136']
project_df = ead_df[ead_df['osm_id'].isin(dangara_gulliston_ids)]

project_df

In [None]:
print(shapely.length(project_df['geometry'])/1000) # length of road segments in km
print(sum(shapely.length(project_df['geometry'])/1000)) # total length of road segments in km

In [None]:
damage = sum(project_df['ead'])
affected_road = sum(project_df['ead_1000_overlay'])/1000
number_of_landslides = sum(project_df['ead_1000_number_landslides'])
road_segments_length = 22.976544 + 20.829034 + 10.498568

print('The expected annual damage for Dangara-Gulliston road project, based on total length of project road: {:.2f} dollar per km'.format(damage/67.34681578690605))
print('The expected annual damage for Dangara-Gulliston road project, based on length of affected OSM road segment: {:.2f} dollar per km'.format(damage/road_segments_length)) 
print('The expected annual damage for Dangara-Gulliston road project, based on length of affected road segment: {:.2f} dollar per km'.format(damage/affected_road)) 
print('The expected annual damage for Dangara-Gulliston road project per landslide event: {:.2f} dollar'.format(damage/number_of_landslides)) #shouldn't we divide by the average landslides per year?

In [None]:
sum(project_df['ead'])

In [None]:
sum(project_df['ead_1000_overlay'])/1000