# Figure 3: Performance evaluation [NSE] (PUR + PUB)

In [4]:
import os 
import numpy as np
import pandas as pd
import geopandas as gpd

import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

os.chdir('/home/rooda/OneDrive/Projects/DeepHydro/')
path_pmet = "/home/rooda/OneDrive/Projects/PatagoniaMet/"

## Data

In [5]:
# pmet-obs
selection = gpd.read_file("data/GIS/Basins_PMETobs_points_subset.gpkg").set_index("gauge_id").sort_values("gauge_lat", ascending = False)
pmet_metadata = pd.read_csv(path_pmet + "data/Zenodo/v11/Q_PMETobs_v11_metadata.csv", index_col = 0)
pmet_metadata = pmet_metadata.loc[selection.index]

pmet_data = pd.read_csv(path_pmet + "data/Zenodo/v11/Q_PMETobs_1950_2020_v11d.csv", parse_dates= ["Date"], index_col = 0)
pmet_data = pmet_data['1990-01-01' :'2019-12-31']
pmet_data = pmet_data[selection.index]
pmet_data = (pmet_data*1000*86400) / (pmet_metadata.total_area*1e6)

In [6]:
new_names = {"Pearson-r": "r", "Beta-KGE": "Beta", "Alpha-NSE": "Gamma"}

# cross-validation (PUR)
pur_lstm_oggm_on  = pd.read_csv("results/performance/Historical_CV_PUR_LSTM_OGGM_on.csv", index_col = 0).rename(columns = new_names)
pur_lstm_oggm_off = pd.read_csv("results/performance/Historical_CV_PUR_LSTM_OGGM_off.csv", index_col = 0).rename(columns = new_names)
pur_GR4J       = pd.read_csv("results/performance/Historical_CV_PUR_process_based.csv")
pur_GR4J       = pur_GR4J[pur_GR4J.Model == "GR4J"].set_index("Basin")
pur_TUWmodel   = pd.read_csv("results/performance/Historical_CV_PUR_process_based.csv")
pur_TUWmodel   = pur_TUWmodel[pur_TUWmodel.Model == "TUWmodel"].set_index("Basin")

# cross-validation (PUB)
pub_lstm_oggm_on  = pd.read_csv("results/performance/Historical_CV_PUB_LSTM_OGGM_on.csv", index_col = 0).rename(columns = new_names)
pub_lstm_oggm_off = pd.read_csv("results/performance/Historical_CV_PUB_LSTM_OGGM_off.csv", index_col = 0).rename(columns = new_names)
pub_GR4J       = pd.read_csv("results/performance/Historical_CV_PUB_process_based.csv")
pub_GR4J       = pub_GR4J[pub_GR4J.Model == "GR4J"].set_index("Basin")
pub_TUWmodel   = pd.read_csv("results/performance/Historical_CV_PUB_process_based.csv")
pub_TUWmodel   = pub_TUWmodel[pub_TUWmodel.Model == "TUWmodel"].set_index("Basin")

# only one df
lstm_oggm_on  = pd.concat([pur_lstm_oggm_on, pub_lstm_oggm_on], keys=['PUR', 'PUB'], axis = 1).reindex(selection.index)
lstm_oggm_off = pd.concat([pur_lstm_oggm_off, pub_lstm_oggm_off], keys=['PUR', 'PUB'], axis = 1).reindex(selection.index)
GR4J          = pd.concat([pur_GR4J, pub_GR4J], keys=['PUR', 'PUB'], axis = 1).reindex(selection.index)
TUWmodel      = pd.concat([pur_TUWmodel, pub_TUWmodel], keys=['PUR', 'PUB'], axis = 1).reindex(selection.index)

## Plot

In [8]:
colorscale = [(0, "#ccebc5"), (0.3, "#4eb3d3"), (1, "#034b8a")]
parameter = "NSE"

fig = make_subplots(rows=2, cols=3, horizontal_spacing = 0.02, vertical_spacing= 0.03, shared_yaxes= True, shared_xaxes= True, row_titles= ["PUB", "PUR"])

for x in range (0,2): 
    for y in range(0,3):
        fig.add_trace(go.Scatter(x= np.array([-1,1]), y= np.array([-1,1]), mode='lines',  line=dict(color='black', width=1, dash='dot'), showlegend = False), row = x+1, col = y+1)

fig.add_trace(go.Scatter(x= lstm_oggm_off["PUB"][parameter], y= lstm_oggm_on["PUB"][parameter], text=pmet_metadata.gauge_name,  marker=dict(size=np.exp2(np.log10(pmet_metadata.total_area)), 
            color=pmet_metadata.glacier_cover, opacity = 0.9, showscale = True, line_width = 1, line_color = "grey", cmin = 0, cmax = 40,
                                     colorscale=colorscale, colorbar=dict(len=1, x=1.01, title='Glacier<br>area (%)', thickness=20, tickwidth=1)), 
                         mode='markers', showlegend = False), row = 1, col = 1)

fig.add_trace(go.Scatter(x= TUWmodel["PUB"][parameter], y= lstm_oggm_on["PUB"][parameter], text=pmet_metadata.gauge_name,  marker=dict(size=np.exp2(np.log10(pmet_metadata.total_area)), 
                                     color=pmet_metadata.glacier_cover, opacity = 0.9, showscale = False, line_width = 1, line_color = "grey", cmin = 0, cmax = 40,
                                     colorscale=colorscale), mode='markers', showlegend = False), row = 1, col = 2)

fig.add_trace(go.Scatter(x= GR4J["PUB"][parameter], y= lstm_oggm_on["PUB"][parameter], text=pmet_metadata.gauge_name,  marker=dict(size=np.exp2(np.log10(pmet_metadata.total_area)), 
                                     color=pmet_metadata.glacier_cover, opacity = 0.9, showscale = False, line_width = 1, line_color = "grey", cmin = 0, cmax = 40,
                                     colorscale=colorscale), mode='markers', showlegend = False), row = 1, col = 3)

fig.add_trace(go.Scatter(x= lstm_oggm_off["PUR"][parameter], y= lstm_oggm_on["PUR"][parameter], text=pmet_metadata.gauge_name,  marker=dict(size=np.exp2(np.log10(pmet_metadata.total_area)), 
                                     color=pmet_metadata.glacier_cover, opacity = 0.9, line_width = 1, line_color = "grey", cmin = 0, cmax = 40,
                                     colorscale=colorscale,showscale = False),  mode='markers', showlegend = False), row = 2, col = 1)

fig.add_trace(go.Scatter(x= TUWmodel["PUR"][parameter], y= lstm_oggm_on["PUR"][parameter], text=pmet_metadata.gauge_name,  marker=dict(size=np.exp2(np.log10(pmet_metadata.total_area)), 
                                     color=pmet_metadata.glacier_cover, opacity = 0.9, showscale = False, line_width = 1, line_color = "grey", cmin = 0, cmax = 40,
                                     colorscale=colorscale), mode='markers', showlegend = False), row = 2, col = 2)

fig.add_trace(go.Scatter(x= GR4J["PUR"][parameter], y= lstm_oggm_on["PUR"][parameter], text=pmet_metadata.gauge_name,  marker=dict(size=np.exp2(np.log10(pmet_metadata.total_area)), 
                                     color=pmet_metadata.glacier_cover, opacity = 0.9, showscale = False, line_width = 1, line_color = "grey", cmin = 0, cmax = 40,
                                     colorscale=colorscale), mode='markers', showlegend = False), row = 2, col = 3)

fig.add_annotation(text="a)", font=dict(size=16), x=-0.15, y=0.95, showarrow=False, row=1, col=1)
fig.add_annotation(text="b)", font=dict(size=16), x=-0.15, y=0.95, showarrow=False, row=1, col=2)
fig.add_annotation(text="c)", font=dict(size=16), x=-0.15, y=0.95, showarrow=False, row=1, col=3)
fig.add_annotation(text="d)", font=dict(size=16), x=-0.15, y=0.95, showarrow=False, row=2, col=1)
fig.add_annotation(text="e)", font=dict(size=16), x=-0.15, y=0.95, showarrow=False, row=2, col=2)
fig.add_annotation(text="f)", font=dict(size=16), x=-0.15, y=0.95, showarrow=False, row=2, col=3)

fig.add_annotation(x=0.25, y=0.5, ax=65, ay=65, arrowcolor= "black", arrowwidth= 1.5, showarrow=True, arrowhead=4, row=1, col=1)
fig.add_annotation(x=0.08, y=0.54, text="Hybrid approach<br>is better", 
                   font=dict(size=15),  opacity= 0.9, showarrow=False, row=1, col=1)

fig.update_yaxes(title = "NSE (LSTM + OGGM)",     title_standoff = 5, col = 1)
fig.update_xaxes(title = "NSE (Only LSTM)",       title_standoff = 5, row = 2, col = 1)
fig.update_xaxes(title = "NSE (TUWmodel + OGGM)", title_standoff = 5, row = 2, col = 2)
fig.update_xaxes(title = "NSE (GR4J + OGGM)",     title_standoff = 5, row = 2, col = 3)
fig.update_xaxes(ticks="outside", griddash = "dot", range = [-0.2, 1], dtick = 0.2)
fig.update_yaxes(ticks="outside", griddash = "dot", range = [-0.2, 1], dtick = 0.2)

fig.update_layout(font=dict(size=13), autosize = False, width = 1100, height = 700, template = "seaborn", margin = dict(l=5, r=0, b=5, t=5))
fig.update_layout(legend=dict(y=0.52, x = 1.01, orientation="v"))

fig.write_image("reports/figures/Figure3_Performance_calibration_{}.png".format(parameter), scale = 4)
#fig.show()

## Text

In [None]:
parameter = "NSE"

In [None]:
# OGGM on > alternatives (PUB)
"OGGM on is better than OGGM off, TUWmodel and GR4J in {:.0f}%, {:.0f}% and {:.0f}%".format(
    (lstm_oggm_on["PUB"][parameter] > lstm_oggm_off["PUB"][parameter]).sum() * 100 / len(lstm_oggm_on),
    (lstm_oggm_on["PUB"][parameter] > TUWmodel["PUB"][parameter]).sum() * 100 / len(lstm_oggm_on),
    (lstm_oggm_on["PUB"][parameter] > GR4J["PUB"][parameter]).sum() * 100 / len(lstm_oggm_on))

In [None]:
# OGGM on > ALL alternatives (PUR)
"OGGM on is better than OGGM off, TUWmodel and GR4J in {:.0f}%, {:.0f}% and {:.0f}%".format(
    (lstm_oggm_on["PUR"][parameter] > lstm_oggm_off["PUR"][parameter]).sum() * 100 / len(lstm_oggm_on),
    (lstm_oggm_on["PUR"][parameter] > TUWmodel["PUR"][parameter]).sum() * 100 / len(lstm_oggm_on),
    (lstm_oggm_on["PUR"][parameter] > GR4J["PUR"][parameter]).sum() * 100 / len(lstm_oggm_on))

In [None]:
# OGGM on > ALL alternatives (PUB)
exp = "PUB"

"OGGM on is better than all alternatives in {:.0f}% of the basins".format(
    (pd.concat([lstm_oggm_on[exp][parameter].rename("OGGM_on"), lstm_oggm_off[exp][parameter], TUWmodel[exp][parameter], GR4J[exp][parameter]], axis = 1).idxmax(axis=1) == "OGGM_on").sum()*100/len(selection))

In [None]:
# OGGM on > ALL alternatives (PUR)
exp = "PUR"

"OGGM on is better than all alternatives in {:.0f}% of the basins".format(
    (pd.concat([lstm_oggm_on[exp][parameter].rename("OGGM_on"), lstm_oggm_off[exp][parameter], TUWmodel[exp][parameter], GR4J[exp][parameter]], axis = 1).idxmax(axis=1) == "OGGM_on").sum()*100/len(selection))

In [None]:
# Median NSE (only basin with glaciers)
threshold = 5
exp = "PUR"

"Median NSE for all models {:.2f}, {:.2f} and {:.2f}, {:.2f}, (in total {} basins)".format(
    lstm_oggm_on[exp][parameter][selection.glacier_a > threshold].median(),
    lstm_oggm_off[exp][parameter][selection.glacier_a > threshold].median(),
    TUWmodel[exp][parameter][selection.glacier_a > threshold].median(),
    GR4J[exp][parameter][selection.glacier_a > threshold].median(),
    len(lstm_oggm_on[selection.glacier_a > threshold].index))