### Figures
This notebook contains the code used to generate the figures in the study.

In [None]:
import geopandas as gpd
import pandas as pd
import torch
import pickle
from pathlib import Path
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import pearsonr
import datetime
import math
import os
import xarray as xr

In [None]:
static_df = pd.read_csv('Data/attributes_lat_lon.csv')

In [None]:
def compute_nse(observed, simulated):
    denominator = np.sum((observed - np.mean(observed)) ** 2)
    numerator = np.sum((simulated - observed) ** 2)
    if denominator == 0:
        denominator += 1e-10
    nse_val = 1 - numerator / denominator
    return nse_val

def compute_rmse(observed, simulated):
    mse = np.square(np.subtract(simulated, observed)).mean()
    return math.sqrt(mse)

def compute_kge(observed, simulated):
    r = pearsonr(observed, simulated)[0]
    alpha = np.std(simulated)/np.std(observed)
    beta = np.mean(simulated)/np.mean(observed)
    value = (r-1)**2 + (alpha-1)**2 + (beta-1)**2
    return 1 - np.sqrt(float(value))

def compute_bias(sim, obs):
    # sse
    sse = np.mean((np.mean(sim) - obs)** 2)

    # variance
    variance = np.var(sim)
    
    return sse - variance

In [None]:
# Experiment A basins
run_dir = Path("Data/runs/a_basins_sl_90_hs_32_0702_182604")

with open(run_dir / "test" / "model_epoch050" / "test_results.p", "rb") as fp:
    a_results = pickle.load(fp)
    
for i in a_results:
    basin_name = i
    qobs = a_results[i]['1D']['xr']['q_cms_obs']
    qsim = a_results[i]['1D']['xr']['q_cms_sim']
    df = pd.DataFrame({'sim': qsim.to_dataframe().reset_index().q_cms_sim,
                       'obs': qobs.to_dataframe().reset_index().q_cms_obs,
                       'date': qobs.to_dataframe().reset_index().date}).dropna()
    df.index = pd.to_datetime(df.date)
    nse = compute_nse(df.obs, df.sim)
    rmse = compute_rmse(df.obs, df.sim)
    kge = compute_kge(df.obs, df.sim)
    r = pearsonr(df.obs, df.sim)[0]
    fig, ax = plt.subplots(figsize=(8,4))
    ax.plot(df.date, df.obs, label='daily glofas', linestyle='-', color='black', zorder=1)
    ax.plot(df.date, df.sim, label='LSTM glofas', color='mediumturquoise', zorder=2, linestyle='-')
    ax.legend()
    ax.set_ylabel("Discharge ($m^3$/s)")
    ax.set_title(f"{basin_name} - KGE {kge:.2f} - R {r:.2f} - NSE {nse:.2f} - RMSE {rmse:.2f}")
    plt.show()

In [None]:
# subgroup boxplots
a = 'indigo'
b = 'steelblue'
c = 'darkcyan'
d = 'yellowgreen'
e = 'gold'
fig, ax = plt.subplots(figsize=(8,4))
plt.boxplot(static_df.loc[static_df.f_groups == 'all', 'f_nse'], positions = [0.0],
            patch_artist=True, boxprops=dict(facecolor=a, color=a), medianprops=dict(color='white'))

plt.boxplot(static_df.loc[static_df.e_groups == 'A', 'e_nse'], positions = [0.4],
            patch_artist=True, boxprops=dict(facecolor=b, color=b), medianprops=dict(color='black'))
plt.boxplot(static_df.loc[static_df.e_groups == 'B', 'e_nse'], positions = [0.6],
            patch_artist=True, boxprops=dict(facecolor=b, color=b), medianprops=dict(color='black'))
plt.boxplot(static_df.loc[static_df.e_groups == 'C', 'e_nse'], positions = [0.8],
            patch_artist=True, boxprops=dict(facecolor=b, color=b), medianprops=dict(color='black'))

plt.boxplot(static_df.loc[static_df.a_groups == 'low', 'a_nse'], positions = [1.2],
            patch_artist=True, boxprops=dict(facecolor=c, color=c), medianprops=dict(color='black'))
plt.boxplot(static_df.loc[static_df.a_groups == 'medium', 'a_nse'], positions = [1.4],
            patch_artist=True, boxprops=dict(facecolor=c, color=c), medianprops=dict(color='black'))
plt.boxplot(static_df.loc[static_df.a_groups == 'high', 'a_nse'], positions = [1.6],
            patch_artist=True, boxprops=dict(facecolor=c, color=c), medianprops=dict(color='black'))

plt.boxplot(static_df.loc[static_df.b_groups == 'low WB', 'b_nse'], positions = [2.0],
            patch_artist=True, boxprops=dict(facecolor=d, color=d), medianprops=dict(color='black'))
plt.boxplot(static_df.loc[static_df.b_groups == 'medium WB', 'b_nse'], positions = [2.2],
            patch_artist=True, boxprops=dict(facecolor=d, color=d), medianprops=dict(color='black'))
plt.boxplot(static_df.loc[static_df.b_groups == 'high WB', 'b_nse'], positions = [2.4],
            patch_artist=True, boxprops=dict(facecolor=d, color=d), medianprops=dict(color='black'))

plt.boxplot(static_df.loc[static_df.d_groups == 'dam', 'd_nse'], positions = [2.8],
            patch_artist=True, boxprops=dict(facecolor=e, color=e), medianprops=dict(color='black'))
plt.boxplot(static_df.loc[static_df.d_groups == 'flashy', 'd_nse'], positions = [3.0],
            patch_artist=True, boxprops=dict(facecolor=e, color=e), medianprops=dict(color='black'))
plt.boxplot(static_df.loc[static_df.d_groups == 'natural', 'd_nse'], positions = [3.2],
            patch_artist=True, boxprops=dict(facecolor=e, color=e), medianprops=dict(color='black'))

ax.set_xticklabels(['All', 'Random Group 1', 'Random Group 2', 'Random Group 3',
                    'Low NSE', 'Med NSE', 'High NSE', 'Low WB',
                    'Med WB', 'High WB','Reservoir',
                    'Ephemeral', 'Natural'])
plt.xticks(rotation=90)
plt.ylabel('NSE')
# plt.savefig('Data/images/boxplot_nse.png', bbox_inches='tight')

In [None]:
# std dev nse scatterplot
df_diverse = pd.DataFrame({'subgroup': ['All', 'A', 'B', 'C',
                           'low', 'medium', 'high',
                           'low WB', 'medium WB', 'high WB',
                           'dam', 'flashy', 'natural']})

df_diverse['std_norm'] = np.nan
df_diverse['nse'] = np.nan

df_diverse.loc[df_diverse.subgroup == 'All', 'std_norm'] = static_df.std_q.mean()

df_diverse.loc[df_diverse.subgroup == 'A', 'std_norm'] = static_df.loc[static_df.e_groups == 'A', 'std_q'].mean()
df_diverse.loc[df_diverse.subgroup == 'B', 'std_norm'] = static_df.loc[static_df.e_groups == 'B', 'std_q'].mean()
df_diverse.loc[df_diverse.subgroup == 'C', 'std_norm'] = static_df.loc[static_df.e_groups == 'C', 'std_q'].mean()

df_diverse.loc[df_diverse.subgroup == 'low', 'std_norm'] = static_df.loc[static_df.a_groups == 'low', 'std_q'].mean()
df_diverse.loc[df_diverse.subgroup == 'medium', 'std_norm'] = static_df.loc[static_df.a_groups == 'medium', 'std_q'].mean()
df_diverse.loc[df_diverse.subgroup == 'high', 'std_norm'] = static_df.loc[static_df.a_groups == 'high', 'std_q'].mean()

df_diverse.loc[df_diverse.subgroup == 'low WB', 'std_norm'] = static_df.loc[static_df.b_groups == 'low WB', 'std_q'].mean()
df_diverse.loc[df_diverse.subgroup == 'medium WB', 'std_norm'] = static_df.loc[static_df.b_groups == 'medium WB', 'std_q'].mean()
df_diverse.loc[df_diverse.subgroup == 'high WB', 'std_norm'] = static_df.loc[static_df.b_groups == 'high WB', 'std_q'].mean()

df_diverse.loc[df_diverse.subgroup == 'dam', 'std_norm'] = static_df.loc[static_df.d_groups == 'dam', 'std_q'].mean()
df_diverse.loc[df_diverse.subgroup == 'flashy', 'std_norm'] = static_df.loc[static_df.d_groups == 'flashy', 'std_q'].mean()
df_diverse.loc[df_diverse.subgroup == 'natural', 'std_norm'] = static_df.loc[static_df.d_groups == 'natural', 'std_q'].mean()

df_diverse.loc[df_diverse.subgroup == 'All', 'nse'] = static_df.f_nse.median()

df_diverse.loc[df_diverse.subgroup == 'A', 'nse'] = static_df.loc[static_df.e_groups == 'A', 'e_nse'].median()
df_diverse.loc[df_diverse.subgroup == 'B', 'nse'] = static_df.loc[static_df.e_groups == 'B', 'e_nse'].median()
df_diverse.loc[df_diverse.subgroup == 'C', 'nse'] = static_df.loc[static_df.e_groups == 'C', 'e_nse'].median()

df_diverse.loc[df_diverse.subgroup == 'low', 'nse'] = static_df.loc[static_df.a_groups == 'low', 'a_nse'].median()
df_diverse.loc[df_diverse.subgroup == 'medium', 'nse'] = static_df.loc[static_df.a_groups == 'medium', 'a_nse'].median()
df_diverse.loc[df_diverse.subgroup == 'high', 'nse'] = static_df.loc[static_df.a_groups == 'high', 'a_nse'].median()

df_diverse.loc[df_diverse.subgroup == 'low WB', 'nse'] = static_df.loc[static_df.b_groups == 'low WB', 'b_nse'].median()
df_diverse.loc[df_diverse.subgroup == 'medium WB', 'nse'] = static_df.loc[static_df.b_groups == 'medium WB', 'b_nse'].median()
df_diverse.loc[df_diverse.subgroup == 'high WB', 'nse'] = static_df.loc[static_df.b_groups == 'high WB', 'b_nse'].median()

df_diverse.loc[df_diverse.subgroup == 'dam', 'nse'] = static_df.loc[static_df.d_groups == 'dam', 'd_nse'].median()
df_diverse.loc[df_diverse.subgroup == 'flashy', 'nse'] = static_df.loc[static_df.d_groups == 'flashy', 'd_nse'].median()
df_diverse.loc[df_diverse.subgroup == 'natural', 'nse'] = static_df.loc[static_df.d_groups == 'natural', 'd_nse'].median()

df_diverse['exp'] = ['A', 'B', 'B', 'B', 'C', 'C', 'C', 'D', 'D', 'D', 'E', 'E', 'E']

plt.scatter(df_diverse.loc[df_diverse.exp == 'A'].std_norm, df_diverse.loc[df_diverse.exp == 'A'].nse, c='indigo', label='Exp A')
plt.scatter(df_diverse.loc[df_diverse.exp == 'B'].std_norm, df_diverse.loc[df_diverse.exp == 'B'].nse, c='steelblue', label='Exp B')
plt.scatter(df_diverse.loc[df_diverse.exp == 'C'].std_norm, df_diverse.loc[df_diverse.exp == 'C'].nse, c='darkcyan', label='Exp C')
plt.scatter(df_diverse.loc[df_diverse.exp == 'D'].std_norm, df_diverse.loc[df_diverse.exp == 'D'].nse, c='yellowgreen', label='Exp D')
plt.scatter(df_diverse.loc[df_diverse.exp == 'E'].std_norm, df_diverse.loc[df_diverse.exp == 'E'].nse, c='gold', label='Exp E')
plt.yticks(fontsize=14)
plt.xticks(fontsize=14)

for i in df_diverse.subgroup: 
    plt.annotate(i, (df_diverse.loc[df_diverse.subgroup == i].std_norm, df_diverse.loc[df_diverse.subgroup == i].nse + 0.02)) 
# plt.legend()
# plt.savefig('Data/images/diversity.png', bbox_inches='tight')

In [None]:
# Experiment E w/ CA
run_dir = Path("Data/runs/d_group_dam_sl_365_hs_121_1502_114133")

with open(run_dir / "test" / "model_epoch050" / "test_results.p", "rb") as fp:
    d_results_a_new = pickle.load(fp)

run_dir = Path("Data/runs/d_group_flashy_sl_365_hs_121_2002_182807")

with open(run_dir / "test" / "model_epoch050" / "test_results.p", "rb") as fp:
    d_results_b_new = pickle.load(fp)
    
run_dir = Path("Data/runs/d_group_natural_sl_90_hs_121_2002_215342")

with open(run_dir / "test" / "model_epoch050" / "test_results.p", "rb") as fp:
    d_results_c_new = pickle.load(fp)

# Experiment E
run_dir = Path("Data/runs/d_group_dam_sl_90_hs_256_3101_194107")

with open(run_dir / "test" / "model_epoch050" / "test_results.p", "rb") as fp:
    d_results_a = pickle.load(fp)

run_dir = Path("Data/runs/d_group_flashy_sl_90_hs_256_3101_213002")

with open(run_dir / "test" / "model_epoch050" / "test_results.p", "rb") as fp:
    d_results_b = pickle.load(fp)
    
run_dir = Path("Data/runs/d_group_natural_sl_90_hs_32_0102_122118")

with open(run_dir / "test" / "model_epoch050" / "test_results.p", "rb") as fp:
    d_results_c = pickle.load(fp)
    
# Experiment A
run_dir = Path("Data/runs/f_basins_sl_90_hs_32_0702_182604")

with open(run_dir / "test" / "model_epoch050" / "test_results.p", "rb") as fp:
    exp_a_results = pickle.load(fp)

In [None]:
# Experiment E v Experiment A: Sample of Reservoir Basin Predictions
i = '14763607426'
basin_name = ''
qobs = d_results_a[i]['1D']['xr']['q_cms_obs']
qsim = d_results_a[i]['1D']['xr']['q_cms_sim']
qsim_all = exp_a_results[i]['1D']['xr']['q_cms_sim']
df = pd.DataFrame({'sim': qsim.to_dataframe().reset_index().q_cms_sim,
                   'sim_all': qsim_all.to_dataframe().reset_index().q_cms_sim,
                   'obs': qobs.to_dataframe().reset_index().q_cms_obs,
                   'date': qobs.to_dataframe().reset_index().date}).dropna()
df.index = pd.to_datetime(df.date)
nse = compute_nse(df.obs, df.sim)
rmse = compute_rmse(df.obs, df.sim)
kge = compute_kge(df.obs, df.sim)
r = pearsonr(df.obs, df.sim)[0]
b_bias = compute_bias(df.sim, df.obs)
fig, ax = plt.subplots(figsize=(12,4))
ax.plot(df.date, df.obs, label='obs', linestyle='-', color='silver', zorder=1, linewidth=4.5)
ax.plot(df.date, df.sim, label='Exp E sim', color='gold', zorder=2, linestyle='-', linewidth=2.5)
ax.plot(df.date, df.sim_all, label='Exp A sim', color='indigo', zorder=3, linestyle='-', linewidth=2.5)
plt.yticks(fontsize=14)
plt.xticks(fontsize=14)
# plt.savefig('Data/images/dam_improvement.png', bbox_inches='tight')

In [None]:
# NSE difference histograms
dam_df = static_df.loc[static_df.d_groups == 'dam']
flashy_df = static_df.loc[static_df.d_groups == 'flashy']
natural_df = static_df.loc[static_df.d_groups == 'natural']

plt.hist(dam_df.d_diff_nse, bins=40, color='gray')
plt.vlines(0.0, dam_df.d_diff_nse.mean(), 50, color='blue', linestyles='--', linewidth=2)
# plt.set_title('Reservoir')
# plt.savefig('Data/images/nse_diffa.png', bbox_inches='tight')
plt.show()

plt.hist(flashy_df.d_diff_nse, bins=40, color='gray')
plt.vlines(0.0, flashy_df.d_diff_nse.mean(), 18.5, color='blue', linestyles='--', linewidth=2)
# ax2.set_title('Ephemeral')
# plt.savefig('Data/images/nse_diffb.png', bbox_inches='tight')
plt.show()

plt.hist(natural_df.d_diff_nse, bins=40, color='gray')
plt.vlines(0.0, natural_df.d_diff_nse.mean(), 8.2, color='blue', linestyles='--', linewidth=2)
# ax3.set_title('Natural')
# plt.savefig('Data/images/nse_diffc.png', bbox_inches='tight')
plt.show()

In [None]:
# Experiment E w/ CA v Experiment E: Sample of Reservoir Basin Predictions
i = '14423617656'
basin_name = ''
qobs = d_results_a_new[i]['1D']['xr']['q_cms_obs']
qsim = d_results_a_new[i]['1D']['xr']['q_cms_sim']
qsim_all = d_results_a[i]['1D']['xr']['q_cms_sim']
df = pd.DataFrame({'sim': qsim.to_dataframe().reset_index().q_cms_sim,
                   'sim_all': qsim_all.to_dataframe().reset_index().q_cms_sim,
                   'obs': qobs.to_dataframe().reset_index().q_cms_obs,
                   'date': qobs.to_dataframe().reset_index().date}).dropna()
df.index = pd.to_datetime(df.date)
nse = compute_nse(df.obs, df.sim)
rmse = compute_rmse(df.obs, df.sim)
kge = compute_kge(df.obs, df.sim)
r = pearsonr(df.obs, df.sim)[0]
b_bias = compute_bias(df.sim, df.obs)
fig, ax = plt.subplots(figsize=(12,4))
ax.plot(df.date, df.obs, label='obs', linestyle='-', color='silver', zorder=1, linewidth=5.5)
ax.plot(df.date, df.sim, label='Exp E w/ CA sim', color='#9A0EEA', zorder=3, alpha=0.8, linestyle='-', linewidth=2.5)
ax.plot(df.date, df.sim_all, label='Exp E sim', color='gold', zorder=2, alpha=0.8, linestyle='-', linewidth=2.5)
ax.set_title(f"{basin_name} - KGE {kge:.2f} - R {r:.2f} - NSE {nse:.2f} - RMSE {rmse:.2f}")
plt.yticks(fontsize=14)
plt.xticks(fontsize=14)
# plt.savefig('Data/images/ca_improvement.png', bbox_inches='tight')

In [None]:
# shap feature importance barplots
results = {}
for f in features:
    results[f] = []
    ft = f + '_t'
    results[ft] = []
    
results['NSE'] = []
results['group'] = []

dam = ['13888737139', '14286853736', '14729335179', '14740465819', '14857052821']

for i in dam:
    df = pd.read_csv(f'Data/shaps/shapley_ts_{i}.csv')
    n_features = 29
    shaps = {'f':[], 'maxs': [], 't': [], 'means':[]}
    for f in range(0, n_features):
        seq = []
        for c in df.columns:
            a = c.split('_')[0]
            b = c.split('_')[1]
            if a == str(f):
                seq.append(df[c].abs().sum())
        shaps['f'].append(f)
        shaps['maxs'].append(max(seq))
        shaps['t'].append(np.argmax(seq))
        shaps['means'].append(np.mean(seq))

    shaps_df = pd.DataFrame(shaps)
    shaps_df['means_p'] = shaps_df.means/shaps_df.means.sum() * 100

    norm = plt.Normalize(shaps_df.t.min(), shaps_df.t.max())
    sm = plt.cm.ScalarMappable(cmap="Blues_r", norm=norm)
    
    for v in features:
        vt = v + '_t'
        v_i = features.index(v)
        shap_i = shaps_df.loc[shaps_df.f == v_i, 'means'].item()
        t_i = shaps_df.loc[shaps_df.f == v_i, 't'].item()
        results[v].append(shap_i)
        results[vt].append(t_i)
        
    nse_i = static_df.loc[static_df['index'] == int(i), 'd_new_nse'].item()
    results['NSE'].append(nse_i)
    results['group'].append('dam')


    pal = sns.color_palette("Blues_d", len(shaps_df))
    rank = shaps_df.t.argsort().argsort()
    sns.barplot(x=features, y=shaps_df.means_p, palette=np.array(pal[::-1])[rank])
    plt.xticks(rotation=90, fontsize=12)
    plt.title(i)
    plt.ylabel('')
#     plt.colorbar(sm)
#     plt.savefig(f'Data/images/dam_{i}.png', bbox_inches='tight')
    plt.show()
    

flashy = ['14111286463', '14163313180', '14262493006', '14446761221', '14438236386']

for i in flashy:
    df = pd.read_csv(f'Data/shaps/shapley_ts_{i}.csv')
    n_features = 29
    shaps = {'f':[], 'maxs': [], 't': [], 'means':[]}
    for f in range(0, n_features):
        seq = []
        for c in df.columns:
            a = c.split('_')[0]
            b = c.split('_')[1]
            if a == str(f):
                seq.append(df[c].abs().sum())
        shaps['f'].append(f)
        shaps['maxs'].append(max(seq))
        shaps['t'].append(np.argmax(seq))
        shaps['means'].append(np.mean(seq))

    shaps_df = pd.DataFrame(shaps)
    shaps_df['means_p'] = shaps_df.means/shaps_df.means.sum() * 100

    norm = plt.Normalize(shaps_df.t.min(), shaps_df.t.max())
    sm = plt.cm.ScalarMappable(cmap="Blues_r", norm=norm)
    
    
    for v in features:
        vt = v + '_t'
        v_i = features.index(v)
        shap_i = shaps_df.loc[shaps_df.f == v_i, 'means'].item()
        t_i = shaps_df.loc[shaps_df.f == v_i, 't'].item()
        results[v].append(shap_i)
        results[vt].append(t_i)
        
    nse_i = static_df.loc[static_df['index'] == int(i), 'd_new_nse'].item()
    results['NSE'].append(nse_i)
    results['group'].append('flashy')

    pal = sns.color_palette("Blues_d", len(shaps_df))
    rank = shaps_df.t.argsort().argsort()
    sns.barplot(x=features, y=shaps_df.means_p, palette=np.array(pal[::-1])[rank])
    plt.xticks(rotation=90, fontsize=12)
    plt.ylabel('')
    plt.title(i)
#     plt.colorbar(sm)
#     plt.savefig(f'Data/images/flashy_{i}.png', bbox_inches='tight')
    plt.show()
    
    
natural = ['-14895675224', '14373498333', '14427967711', '14945087340', '15173376543']

for i in natural:
    df = pd.read_csv(f'Data/shaps/shapley_ts_{i}.csv')
    n_features = 29
    shaps = {'f':[], 'maxs': [], 't': [], 'means':[]}
    for f in range(0, n_features):
        seq = []
        for c in df.columns:
            a = c.split('_')[0]
            b = c.split('_')[1]
            if a == str(f):
                seq.append(df[c].abs().sum())
        shaps['f'].append(f)
        shaps['maxs'].append(max(seq))
        shaps['t'].append(np.argmax(seq))
        shaps['means'].append(np.mean(seq))

    shaps_df = pd.DataFrame(shaps)
    shaps_df['means_p'] = shaps_df.means/shaps_df.means.sum() * 100

    norm = plt.Normalize(shaps_df.t.min(), shaps_df.t.max())
    sm = plt.cm.ScalarMappable(cmap="Blues_r", norm=norm)
    
    for v in features:
        vt = v + '_t'
        v_i = features.index(v)
        shap_i = shaps_df.loc[shaps_df.f == v_i, 'means'].item()
        t_i = shaps_df.loc[shaps_df.f == v_i, 't'].item()
        results[v].append(shap_i)
        results[vt].append(t_i)
        
    nse_i = static_df.loc[static_df['index'] == int(i), 'd_new_nse'].item()
    results['NSE'].append(nse_i)
    results['group'].append('natural')

    pal = sns.color_palette("Blues_d", len(shaps_df))
    rank = shaps_df.t.argsort().argsort()
    sns.barplot(x=features, y=shaps_df.means_p, palette=np.array(pal[::-1])[rank])
    plt.xticks(rotation=90, fontsize=12)
    plt.ylabel('')
    plt.title(i)
#     plt.colorbar(sm)
#     plt.savefig(f'Data/images/natural_{i}.png', bbox_inches='tight')
    plt.show()
    

results_df = pd.DataFrame(results)
dam_df = pd.DataFrame(results_df.loc[results_df.group == 'dam'].mean())
flashy_df = pd.DataFrame(results_df.loc[results_df.group == 'flashy'].mean())
natural_df = pd.DataFrame(results_df.loc[results_df.group == 'natural'].mean())

t_cols = []

for c in results_df:
    if c.split('_')[-1] == 't':
        t_cols.append(c)

In [None]:
# averaged shap feature importance barplots
pal = sns.color_palette("Blues_d", len(shaps_df))
rank = dam_df.T[t_cols].T[0].argsort().argsort()
sns.barplot(x=features, y=dam_df.T[features].T[0], palette=np.array(pal[::-1])[rank])
plt.xticks(rotation=90, fontsize=12)
plt.ylabel('')
# plt.title('Reservoir')
#     plt.colorbar(sm)
# plt.savefig(f'Data/images/reservoir-all.png', bbox_inches='tight')
plt.show()

pal = sns.color_palette("Blues_d", len(shaps_df))
rank = flashy_df.T[t_cols].T[0].argsort().argsort()
sns.barplot(x=features, y=flashy_df.T[features].T[0], palette=np.array(pal[::-1])[rank])
plt.xticks(rotation=90, fontsize=12)
plt.ylabel('')
# plt.title('Flashy')
#     plt.colorbar(sm)
# plt.savefig(f'Data/images/flashy-all.png', bbox_inches='tight')
plt.show()

pal = sns.color_palette("Blues_d", len(shaps_df))
rank = natural_df.T[t_cols].T[0].argsort().argsort()
sns.barplot(x=features, y=natural_df.T[features].T[0], palette=np.array(pal[::-1])[rank])
plt.xticks(rotation=90, fontsize=12)
plt.ylabel('')
# plt.title('Natural')
#     plt.colorbar(sm)
# plt.savefig(f'Data/images/natural-all.png', bbox_inches='tight')
plt.show()

In [None]:
# era5 precipitation impact on the model output scatterplot
plt.scatter(results_df.loc[results_df.group == 'dam']['ERA5 precip'], results_df.loc[results_df.group == 'dam'].NSE, label='Reservoir', c='#f0f921')
plt.scatter(results_df.loc[results_df.group == 'flashy']['ERA5 precip'], results_df.loc[results_df.group == 'flashy'].NSE, label='Ephemeral', c='#cc4778')
plt.scatter(results_df.loc[results_df.group == 'natural']['ERA5 precip'], results_df.loc[results_df.group == 'natural'].NSE, label='Natural', c='#0d0887')
plt.xticks(fontsize=14)
plt.yticks(fontsize=14)
# plt.savefig(f'Data/images/shaps_nse_era5_precip.png', bbox_inches='tight')
plt.legend()

In [None]:
# cdf of NSEs
count, bins_count = np.histogram(static_df.f_nse, bins=100)
pdf = count / sum(count)
cdf = np.cumsum(pdf)
plt.plot(bins_count[1:], cdf, linestyle='--', color='black', label="Exp A Overall CDF")

count, bins_count = np.histogram(static_df.d_new_nse.dropna(), bins=100)
pdf = count / sum(count)
cdf = np.cumsum(pdf)
plt.plot(bins_count[1:], cdf, color='black', label="Exp E w/ CA: Overall CDF")

count, bins_count = np.histogram(static_df.loc[static_df.d_groups == 'dam', 'd_new_nse'], bins=100)
pdf = count / sum(count)
cdf = np.cumsum(pdf)
plt.plot(bins_count[1:], cdf, color='#fde725', label="Exp E w/ CA: Reservoir CDF")

count, bins_count = np.histogram(static_df.loc[static_df.d_groups == 'flashy', 'd_new_nse'].dropna(), bins=100)
pdf = count / sum(count)
cdf = np.cumsum(pdf)
plt.plot(bins_count[1:], cdf, color='#35b779', label="Exp E w/ CA: Flashy CDF")

count, bins_count = np.histogram(static_df.loc[static_df.d_groups == 'natural', 'd_new_nse'].dropna(), bins=100)
pdf = count / sum(count)
cdf = np.cumsum(pdf)
plt.plot(bins_count[1:], cdf, color='#31688e', label="Exp E w/ CA: Natural CDF")

count, bins_count = np.histogram(static_df.loc[static_df.d_groups == 'dam', 'f_nse'], bins=100)
pdf = count / sum(count)
cdf = np.cumsum(pdf)
plt.plot(bins_count[1:], cdf, linestyle='--', color='#fde725', label="Exp A: Reservoir CDF")

count, bins_count = np.histogram(static_df.loc[static_df.d_groups == 'flashy', 'f_nse'].dropna(), bins=100)
pdf = count / sum(count)
cdf = np.cumsum(pdf)
plt.plot(bins_count[1:], cdf, linestyle='--', color='#35b779', label="Exp A: Flashy CDF")

count, bins_count = np.histogram(static_df.loc[static_df.d_groups == 'natural', 'f_nse'].dropna(), bins=100)
pdf = count / sum(count)
cdf = np.cumsum(pdf)
plt.plot(bins_count[1:], cdf, linestyle='--', color='#31688e', label="Exp A: Natural CDF")
plt.yticks(fontsize=14)
plt.xticks(fontsize=14)
# plt.legend()
# plt.savefig('Data/images/nses_cdf.png', bbox_inches='tight')