In [1]:
import os
os.environ['USE_PYGEOS'] = '0'
import numpy as np
import pandas as pd
import geopandas as gpd

def truncate_by_threshold(paths, flux_column='flux', threshold=.99):
    paths_sorted = paths.reset_index(drop=True).sort_values(by=flux_column, ascending=False)
    fluxes_sorted = paths_sorted[flux_column]
    total_flux = fluxes_sorted.sum()
    flux_percentiles = fluxes_sorted.cumsum() / total_flux
    excess = flux_percentiles[flux_percentiles >= threshold]
    cutoff = excess.idxmin()
    keep = flux_percentiles[flux_percentiles <= threshold].index
    paths_truncated = paths_sorted.loc[keep, :]
    return paths_truncated, fluxes_sorted, flux_percentiles, keep, cutoff


def plot_path_truncation(fluxes_sorted, flux_percentiles, cutoff, threshold):
    
    fig, axs = plt.subplots(1, 2, figsize=(10, 3))

    ax = axs[0]
    ax.plot(fluxes_sorted.values, flux_percentiles.values, alpha=.8, linewidth=.5, color='black')
    ax.axvline(x=fluxes_sorted[cutoff], linestyle='--', color='red', linewidth=.5)
    ax.axhline(y=threshold, linestyle='--', color='red', linewidth=.5, alpha=.8)
    ax.fill_between(x=fluxes_sorted, y1=flux_percentiles, where=fluxes_sorted>fluxes_sorted[cutoff], alpha=.8, color="w", edgecolor='black', hatch='//')
    ax.invert_xaxis()
    ax.set_xscale('log')
    ax.set_xlabel('Log(flux)')
    ax.set_ylabel('Cumulative density')

    ax = axs[1]
    fluxes_sorted.hist(ax=ax, alpha=.8, edgecolor='black', color='white', hatch='//')
    ax.axvline(x=fluxes_sorted[cutoff], linestyle='--', color='red', linewidth=.5)
    ax.set_xscale('log')
    ax.set_yscale('log')
    ax.invert_xaxis()
    ax.set_ylabel('Number of observations (log)')
    ax.set_xlabel('Flux (log)')
    ax.grid(False);

    plt.suptitle('Choice of cutoff based-on fluxes')
    
    return fig

In [2]:
loss_cols = ['trips_lost_amin', 'trips_lost_mean', 'trips_lost_amax',
       '%trips_lost_amin', '%trips_lost_mean', '%trips_lost_amax',
       'time_m_delta_amin', 'time_m_delta_mean', 'time_m_delta_amax',
       '%time_m_delta_amin', '%time_m_delta_mean', '%time_m_delta_amax',
       'rerouting_loss_person_time_m_amin',
       'rerouting_loss_person_time_m_mean',
       'rerouting_loss_person_time_m_amax']

In [4]:
paths = pd.read_parquet("/Users/alison/Documents/World Bank Caribbean/data/results/transport/path and flux data/LCA_pathdata_time_m_30.parquet", engine='fastparquet')

In [23]:
threshold = .99
paths_truncated, fluxes_sorted, flux_percentiles, keep, cutoff = truncate_by_threshold(paths, threshold=threshold)
fig = plot_path_truncation(fluxes_sorted, flux_percentiles, cutoff, threshold)
fig.savefig("/Users/alison/Documents/World Bank Caribbean/data/figures/thresholding.png", dpi=300, bbox_inches='tight')

# Looking at data

In [7]:
fail = pd.read_parquet("/Users/alison/Documents/World Bank Caribbean/data/results/transport/disruption results/lca_roads_edges_sector_damages_with_roads.parquet")

In [8]:
loss_cols = ['trips_lost_amin', 'trips_lost_mean', 'trips_lost_amax',
       '%trips_lost_amin', '%trips_lost_mean', '%trips_lost_amax',
       'time_m_delta_amin', 'time_m_delta_mean', 'time_m_delta_amax',
       '%time_m_delta_amin', '%time_m_delta_mean', '%time_m_delta_amax',
       'rerouting_loss_person_time_m_amin',
       'rerouting_loss_person_time_m_mean',
       'rerouting_loss_person_time_m_amax']

fail[loss_cols].describe()

Unnamed: 0,trips_lost_amin,trips_lost_mean,trips_lost_amax,%trips_lost_amin,%trips_lost_mean,%trips_lost_amax,time_m_delta_amin,time_m_delta_mean,time_m_delta_amax,%time_m_delta_amin,%time_m_delta_mean,%time_m_delta_amax,rerouting_loss_person_time_m_amin,rerouting_loss_person_time_m_mean,rerouting_loss_person_time_m_amax
count,97.0,249.0,249.0,97.0,249.0,249.0,97.0,249.0,249.0,97.0,249.0,249.0,97.0,249.0,249.0
mean,25564.826313,10272.223915,10272.223915,0.262304,0.106755,0.106755,42358.360325,20861.939959,20861.939959,0.527731,0.580616,0.580616,7509.667547,3135.195106,3135.195106
std,14158.00593,15081.269891,15081.269891,0.135773,0.150463,0.150463,15468.728488,19715.941117,19715.941117,0.193525,0.13776,0.13776,2460.717875,3822.134813,3822.134813
min,3522.194446,430.997347,430.997347,0.03941,0.006859,0.006859,13904.326468,6897.062755,6897.062755,0.127862,0.127862,0.127862,3390.350417,264.821069,264.821069
25%,17593.725942,478.240304,478.240304,0.197217,0.00711,0.00711,35669.780825,7116.041848,7116.041848,0.36209,0.503315,0.503315,6018.65786,285.898164,285.898164
50%,24869.139567,576.016254,576.016254,0.267835,0.00805,0.00805,47378.955005,7274.73943,7274.73943,0.503315,0.585503,0.585503,7859.935306,380.972179,380.972179
75%,39132.168692,17596.075033,17596.075033,0.389664,0.197222,0.197222,50974.836476,47378.955005,47378.955005,0.756451,0.659669,0.659669,9468.866696,7859.935306,7859.935306
max,52355.389312,52355.389312,52355.389312,0.453517,0.453517,0.453517,60306.054218,60306.054218,60306.054218,0.812477,0.812477,0.812477,10337.451111,10337.451111,10337.451111


In [9]:
fail[['rp', 'hazard'] + loss_cols]

Unnamed: 0,rp,hazard,trips_lost_amin,trips_lost_mean,trips_lost_amax,%trips_lost_amin,%trips_lost_mean,%trips_lost_amax,time_m_delta_amin,time_m_delta_mean,time_m_delta_amax,%time_m_delta_amin,%time_m_delta_mean,%time_m_delta_amax,rerouting_loss_person_time_m_amin,rerouting_loss_person_time_m_mean,rerouting_loss_person_time_m_amax
0,,landslide,52355.389312,52355.389312,52355.389312,0.453517,0.453517,0.453517,13904.326468,13904.326468,13904.326468,0.127862,0.127862,0.127862,4204.016161,4204.016161,4204.016161
1,100.0,coastal,3850.751814,3850.751814,3850.751814,0.042111,0.042111,0.042111,15567.406168,15567.406168,15567.406168,0.758022,0.758022,0.758022,3390.350417,3390.350417,3390.350417
2,10.0,coastal,3522.194446,3522.194446,3522.194446,0.039410,0.039410,0.039410,15870.577432,15870.577432,15870.577432,0.812477,0.812477,0.812477,3455.443264,3455.443264,3455.443264
3,50.0,coastal,3793.897442,3793.897442,3793.897442,0.041254,0.041254,0.041254,15679.924983,15679.924983,15679.924983,0.773224,0.773224,0.773224,3403.770066,3403.770066,3403.770066
4,5.0,coastal,3522.194446,3522.194446,3522.194446,0.039410,0.039410,0.039410,15870.577432,15870.577432,15870.577432,0.812477,0.812477,0.812477,3455.443264,3455.443264,3455.443264
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
244,5.0,pluvial,17593.725942,17596.075033,17596.075033,0.197217,0.197222,0.197222,60306.054218,60306.054218,60306.054218,0.756451,0.756435,0.756435,10337.451111,10337.451111,10337.451111
245,100.0,pluvial,39132.168692,39132.168692,39132.168692,0.389664,0.389664,0.389664,47378.955005,47378.955005,47378.955005,0.362090,0.362090,0.362090,7859.935306,7859.935306,7859.935306
246,10.0,pluvial,24869.139567,24869.139567,24869.139567,0.267835,0.267835,0.267835,50974.836476,50974.836476,50974.836476,0.503315,0.503315,0.503315,9468.866696,9468.866696,9468.866696
247,50.0,pluvial,39132.168692,39132.168692,39132.168692,0.389664,0.389664,0.389664,47378.955005,47378.955005,47378.955005,0.362090,0.362090,0.362090,7859.935306,7859.935306,7859.935306


In [13]:
fail.groupby(['rp', 'hazard']).mean()[loss_cols]

  fail.groupby(['rp', 'hazard']).mean()[loss_cols]


Unnamed: 0_level_0,Unnamed: 1_level_0,trips_lost_amin,trips_lost_mean,trips_lost_amax,%trips_lost_amin,%trips_lost_mean,%trips_lost_amax,time_m_delta_amin,time_m_delta_mean,time_m_delta_amax,%time_m_delta_amin,%time_m_delta_mean,%time_m_delta_amax,rerouting_loss_person_time_m_amin,rerouting_loss_person_time_m_mean,rerouting_loss_person_time_m_amax
rp,hazard,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
5.0,coastal,3662.585344,3662.585344,3662.585344,0.04044,0.04044,0.04044,15762.184563,15762.184563,15762.184563,0.790886,0.790886,0.790886,3426.836795,3426.836795,3426.836795
5.0,fluvial_defended,,430.997347,430.997347,,0.006859,0.006859,,6993.120434,6993.120434,,0.643957,0.643957,,330.125853,330.125853
5.0,fluvial_undefended,,478.240304,478.240304,,0.00711,0.00711,,7274.73943,7274.73943,,0.659669,0.659669,,380.972179,380.972179
5.0,pluvial,18742.475462,18744.453644,18744.453644,0.208367,0.208371,0.208371,58832.704048,58832.704048,58832.704048,0.716482,0.716468,0.716468,10200.306203,10200.306203,10200.306203
10.0,coastal,3784.097064,3784.097064,3784.097064,0.041528,0.041528,0.041528,15678.203279,15678.203279,15678.203279,0.771801,0.771801,0.771801,3411.883647,3411.883647,3411.883647
10.0,fluvial_defended,,443.429704,443.429704,,0.006925,0.006925,,7327.031822,7327.031822,,0.704576,0.704576,,486.442403,486.442403
10.0,fluvial_undefended,,478.240304,478.240304,,0.00711,0.00711,,7274.73943,7274.73943,,0.659669,0.659669,,380.972179,380.972179
10.0,pluvial,24869.139567,24869.139567,24869.139567,0.267835,0.267835,0.267835,50974.836476,50974.836476,50974.836476,0.503315,0.503315,0.503315,9468.866696,9468.866696,9468.866696
50.0,coastal,3947.266256,3947.266256,3947.266256,0.043442,0.043442,0.043442,15604.224857,15604.224857,15604.224857,0.741613,0.741613,0.741613,3416.767634,3416.767634,3416.767634
50.0,fluvial_defended,,544.490067,544.490067,,0.007695,0.007695,,7116.041848,7116.041848,,0.585503,0.585503,,285.898164,285.898164


In [15]:
import seaborn

fg = seaborn.factorplot(x='hazard', y='trips_lost_mean', hue='rp', 
                        col='hazard', data=fail, kind='bar')
fg.set_xlabels('')

AttributeError: module 'seaborn' has no attribute 'factorplot'

In [None]:
for rp in [5, 10, 50, 100]:
    fail_rp = fail[fail['rp'] == rp]
    for 

In [None]:
rp = 5
fail_rp = fail[fail['rp'] == rp]


In [30]:
data = pd.read_parquet("/Users/alison/Documents/World Bank Caribbean/data/processed_data/infrastructure/transport/LCA_schools_pathdata_time_m_60.parquet", engine='fastparquet')

In [31]:
data

Unnamed: 0,origin_id,destination_id,edge_path,time_m,flux,school_district
0,roadn_10054,LCA_edu_12,"[roade_475, roade_474, roade_473, roade_472, r...",32.022526,0.001965,Five
1,roadn_10054,LCA_edu_126,"[roade_475, roade_474, roade_473, roade_472, r...",14.334686,0.008314,Five
2,roadn_10054,LCA_edu_127,"[roade_475, roade_474, roade_473, roade_472, r...",29.663314,0.003880,Five
3,roadn_10054,LCA_edu_128,"[roade_475, roade_474, roade_473, roade_472, r...",29.718432,0.003604,Five
4,roadn_10054,LCA_edu_149,"[roade_475, roade_474, roade_473, roade_472, r...",30.934998,0.000078,Five
...,...,...,...,...,...,...
5273,roadn_9933,LCA_edu_39_and_LCA_edu_63,[roade_11030],0.311917,7.128984,Three
5274,roadn_9933,LCA_edu_50,"[roade_11030, roade_26344, roade_34638, roade_...",5.345988,0.001133,Three
5275,roadn_9933,LCA_edu_6,"[roade_11029, roade_11028, roade_11027, roade_...",1.581064,0.028880,Three
5276,roadn_9933,LCA_edu_61_and_LCA_edu_115,"[roade_11029, roade_11028, roade_11027, roade_...",4.622602,0.002202,Three
