# NB4: Analysis of bias corrected streamlfow from SUMMA+mizuRoute forced by retrospective forcing

BMORPH bias-correction of retrospective SUMMA+mizuRoute streamflow simulation forced by gmet

- training period: 1981-01-01 2000-12-31

- BC applying periods: 1970/01-2020/12

Analysis periods: control(1980/10-2004/09), and validation(200) 

In [None]:
%matplotlib inline

import os, sys
import xarray as xr
import geopandas as gpd
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
from matplotlib import scale as mscale
from matplotlib.ticker import NullFormatter
from matplotlib.colors import LinearSegmentedColormap
import matplotlib.patches as patches
from matplotlib.lines import Line2D
import cartopy.crs as ccrs

from scripts.units import cms2csf
from scripts.utility import AutoVivification
from scripts.utility import PPFScale
mscale.register_scale(PPFScale)
import scripts.metrics as metrics
import scripts.colors as ccmap
from scripts.utility import base_map

print("\nThe Python version: %s.%s.%s" % sys.version_info[:3])
print(xr.__name__, xr.__version__)

In [None]:
# create colormap
cmap_gmet_control = LinearSegmentedColormap.from_list('custom1', 
                                             [(0.0, 'xkcd:white'),
                                              (1.0, 'xkcd:blue')], N=256)

cmap_gmet_bc = LinearSegmentedColormap.from_list('custom4', 
                                             [(0.0, 'xkcd:white'),
                                              (1.0, 'xkcd:cyan')], N=256)

cmap_gmet_raw = LinearSegmentedColormap.from_list('custom4', 
                                             [(0.0, 'xkcd:white'),
                                              (1.0, 'xkcd:blue')], N=256)

cmap_obs = LinearSegmentedColormap.from_list('custom3', 
                                             [(0.0, 'xkcd:white'),
                                              (1.0, 'xkcd:black')], N=256)

## 1. Setup

In [None]:
# directories
main_path  = '/glade/campaign/ral/hap/mizukami/archive/pnw_hydrology/final_archive_v1' # !!! This is top directory of the dataset.
geo_path   = os.path.join(main_path, 'ancillary_data','geospatial_data')
nrni_path  = os.path.join(main_path, 'ancillary_data')
figure_path = 'NB4_figures'
os.makedirs(figure_path, exist_ok=True)
os.makedirs(os.path.join(figure_path, 'per_site'), exist_ok=True)

In [None]:
retro_runs = {
    'GMET':{'case':'GMET_hist'}
}

periods = {
         'valid':   {'time':slice('2001-10-01', '2018-09-30'), 'lc':'xkcd:blue',   'cmap':cmap_gmet_control},
         'control': {'time':slice('1980-10-01', '2004-09-30'), 'lc':'xkcd:blue',   'cmap':cmap_gmet_control},
}

flow_names = {'flow_scbc_u':{'alpha':1, 'legend_name':'SCBC'}, 'flow_raw':{'alpha':0.3, 'legend_name':'raw'}}

# xxx_names: list of simulation name
retro_names = list(retro_runs.keys())

english_unit = False #otherwise flow is in english unit
flow_unit='cms'
if english_unit:
    flow_unit='cfs'

## 2.Load data 

### 2.1 geopackage data

In [None]:
df_site  = gpd.read_file(os.path.join(geo_path, 'PNW_flow_site.gpkg'))
df_reach = gpd.read_file(os.path.join(geo_path, 'rivEndoMERITpfaf_PNW.gpkg'))
df_huc12 = gpd.read_file(os.path.join(geo_path, 'HUC12_MERIT_PNW.gpkg'))
df_huc12['geometry'] = df_huc12.geometry.simplify(0.05) # simplified

### 2.3. Read bmorph outputs

Read mizuRoute output into xarray dataset and put it dictionary ds_route[gcm_case][period]

In [None]:
%%time
ds_bc_flow = AutoVivification()
for retro_name in retro_runs.keys():
    case = retro_runs[retro_name]['case']
    ds_temp = xr.open_mfdataset(os.path.join(main_path, case, f'bmorph_site_univariate_daily.nc'))
    print(retro_name)
    for period in periods.keys():
        ds_bc_flow[retro_name][period] = ds_temp.sel(time=periods[period]['time']).load()
    ref_site = ds_bc_flow[retro_name][period].site.values

### 2.2. Read Naturalized flow data

In [None]:
ds_tmp = xr.open_dataset(os.path.join(nrni_path,'PNW_unimpaired_flow_1951-2018_latlon.nc')).sel(site=ref_site)
for period in periods.keys():
    ds_bc_flow['obs'][period] = ds_tmp.sel(time=periods[period]['time']).load()
nrni_site = ds_tmp.site.values

## 2. Computing flow metrics

In [None]:
%%time
ds_seasonal_flow = AutoVivification()
for retro in retro_runs.keys():
    for period in ds_bc_flow[retro].keys():
        for flow_name in flow_names.keys(): 
            ds_seasonal_flow[retro][period][flow_name] = ds_bc_flow[retro][period][flow_name].groupby('time.dayofyear').mean()
for period in periods.keys():
   ds_seasonal_flow['obs'][period] = ds_bc_flow['obs'][period]['streamflow'].groupby('time.dayofyear').mean()

In [None]:
%%time
ds_flow_metrics = AutoVivification()
    
for retro in retro_runs.keys():
    for period in ds_bc_flow[retro].keys():
        for flow_name in flow_names.keys(): 
    
            ds1 = ds_bc_flow[retro][period][flow_name]
                
            ds_flow_metrics['annual_max'][retro][period][flow_name]      = metrics.annual_max(ds1.rolling(time=7, center=True).mean())
            ds_flow_metrics['annual_min'][retro][period][flow_name]       = metrics.annual_min(ds1.rolling(time=7, center=True).mean())
            ds_flow_metrics['ctr'][retro][period][flow_name]              = metrics.annual_centroid(ds1)
            #ds_flow_metrics['BFI'][retro][period][flow_name]              = metrics.BFI(ds1)
            ds_flow_metrics['FMS'][retro][period][flow_name]              = metrics.FMS(ds1)
            ds_flow_metrics['FHV'][retro][period][flow_name]              = metrics.FHV(ds1)
            ds_flow_metrics['FLV'][retro][period][flow_name]              = metrics.FLV(ds1)
            #ds_flow_metrics['high_q_freq_dur'][retro][period][flow_name] = metrics.high_q_freq_dur(ds1)
            #ds_flow_metrics['low_q_freq_dur'][retro][period][flow_name]  = metrics.low_q_freq_dur(ds1)
            ds_flow_metrics['season_mean'][retro][period][flow_name]      = metrics.season_mean(ds1)
        print(f'{retro}-{period}')

for period in periods.keys():
    ds1 = ds_bc_flow['obs'][period]['streamflow']
                
    ds_flow_metrics['annual_max']['obs'][period]      = metrics.annual_max(ds1.rolling(time=7, center=True).mean())
    ds_flow_metrics['annual_min']['obs'][period]      = metrics.annual_min(ds1.rolling(time=7, center=True).mean())
    ds_flow_metrics['ctr']['obs'][period]              = metrics.annual_centroid(ds1)
    #ds_flow_metrics['BFI']['obs'][period]             = metrics.BFI(ds1)
    ds_flow_metrics['FMS']['obs'][period]              = metrics.FMS(ds1)
    ds_flow_metrics['FHV']['obs'][period]              = metrics.FHV(ds1)
    ds_flow_metrics['FLV']['obs'][period]             = metrics.FLV(ds1)
    #ds_flow_metrics['high_q_freq_dur']['obs'][period] = metrics.high_q_freq_dur(ds1)
    #ds_flow_metrics['low_q_freq_dur']['obs'][period]  = metrics.low_q_freq_dur(ds1)
    ds_flow_metrics['season_mean']['obs'][period]      = metrics.season_mean(ds1)
    print(f'obs-{period}')

## 4. Error in bias-corrected flow during historical period (1980-2004)

- bais at each site
- seasonal bias at each site

In [None]:
metric_lists = [
    'annual_centroid',
    'annual_min_day',
    'annual_max_day',
    'annual_min_flow',
    'annual_max_flow',
    'pbiasFHV',
    'pbiasFLV',
    'mean_high_q_dur',
    'freq_high_q',
    'alpha',
    'beta',
    'corr_seas',
    'corr',
    'kge',
    'pbias_djf',
    'pbias_mam',
    'pbias_jja',
    'pbias_son',
]

In [None]:
%%time

hist_period = 'control' # 'valid' or 'control'

error_metric = {}
for metric in metric_lists:
    error_metric[metric] = np.zeros((len(ref_site), len(retro_runs.keys()), len(flow_names.keys())))

for r, site in enumerate(ref_site):
    # nrni
    sr_obs = ds_bc_flow['obs'][hist_period]['streamflow'].sel(site=site).values
    sr_obs = np.where(sr_obs<0.0, 1.0e-7,sr_obs)

    sr_seas_obs = ds_seasonal_flow['obs'][hist_period].sel(site=site).values
    
    sr_obs_centroid = ds_flow_metrics['ctr']['obs'][hist_period].sel(site=site)['ann_centroid_day'].values
    sr_obs_max_day  = ds_flow_metrics['annual_max']['obs'][hist_period].sel(site=site)['ann_max_day'].values
    sr_obs_max_flow = ds_flow_metrics['annual_max']['obs'][hist_period].sel(site=site)['ann_max_flow'].values
    sr_obs_min_day  = ds_flow_metrics['annual_min']['obs'][hist_period].sel(site=site)['ann_min_day'].values
    sr_obs_min_flow = ds_flow_metrics['annual_min']['obs'][hist_period].sel(site=site)['ann_min_flow'].values
#    sr_obs_mean_high_q_dur = ds_flow_metrics['high_q_freq_dur']['obs'][hist_period].sel(site=site)['mean_high_q_dur'].values
#    sr_obs_freq_high_q     = ds_flow_metrics['high_q_freq_dur']['obs'][hist_period].sel(site=site)['freq_high_q'].values
    sr_obs_FHV      = ds_flow_metrics['FHV']['obs'][hist_period].sel(site=site)['FHV'].values
    sr_obs_FLV      = ds_flow_metrics['FLV']['obs'][hist_period].sel(site=site)['FLV'].values
    sr_obs_djf      = ds_flow_metrics['season_mean']['obs'][hist_period].sel(site=site, season='DJF').values
    sr_obs_mam      = ds_flow_metrics['season_mean']['obs'][hist_period].sel(site=site, season='MAM').values
    sr_obs_jja      = ds_flow_metrics['season_mean']['obs'][hist_period].sel(site=site, season='JJA').values
    sr_obs_son      = ds_flow_metrics['season_mean']['obs'][hist_period].sel(site=site, season='SON').values
        
    for c, sim_name in enumerate(retro_runs.keys()):# sim_names includes both retro and gcms
        for d, flow_name in enumerate(flow_names.keys()): 
            # simulated flow series
            sr_sim = ds_bc_flow[sim_name][hist_period][flow_name].sel(site=site).values
            sr_sim = np.where(sr_sim<0.0, 1.0e-7,sr_sim)
            
            sr_seas_sim = ds_seasonal_flow[sim_name][hist_period][flow_name].sel(site=site).values
      
            sr_centroid = ds_flow_metrics['ctr'][sim_name][hist_period][flow_name].sel(site=site)['ann_centroid_day'].values
            sr_max_day  = ds_flow_metrics['annual_max'][sim_name][hist_period][flow_name].sel(site=site)['ann_max_day'].values
            sr_max_flow = ds_flow_metrics['annual_max'][sim_name][hist_period][flow_name].sel(site=site)['ann_max_flow'].values
            sr_min_day  = ds_flow_metrics['annual_min'][sim_name][hist_period][flow_name].sel(site=site)['ann_min_day'].values
            sr_min_flow = ds_flow_metrics['annual_min'][sim_name][hist_period][flow_name].sel(site=site)['ann_min_flow'].values
#            sr_mean_high_q_dur = ds_flow_metrics['high_q_freq_dur'][sim_name][hist_period][flow_name].sel(site=site)['mean_high_q_dur'].values
#            sr_freq_high_q     = ds_flow_metrics['high_q_freq_dur'][sim_name][hist_period][flow_name].sel(site=site)['freq_high_q'].values
            sr_FHV      = ds_flow_metrics['FHV'][sim_name][hist_period][flow_name].sel(site=site)['FHV'].values
            sr_FLV      = ds_flow_metrics['FLV'][sim_name][hist_period][flow_name].sel(site=site)['FLV'].values
            sr_djf      = ds_flow_metrics['season_mean'][sim_name][hist_period][flow_name].sel(site=site, season='DJF').values
            sr_mam      = ds_flow_metrics['season_mean'][sim_name][hist_period][flow_name].sel(site=site, season='MAM').values
            sr_jja      = ds_flow_metrics['season_mean'][sim_name][hist_period][flow_name].sel(site=site, season='JJA').values
            sr_son      = ds_flow_metrics['season_mean'][sim_name][hist_period][flow_name].sel(site=site, season='SON').values

            # compute error in flow metrics
            error_metric['annual_centroid'][r,c,d] = metrics.bias(sr_obs_centroid, sr_centroid)
            error_metric['annual_max_day'][r,c,d]  = metrics.bias(sr_obs_max_day, sr_max_day)
            error_metric['annual_max_flow'][r,c,d] = metrics.pbias(sr_obs_max_flow, sr_max_flow)*100
            error_metric['annual_min_day'][r,c,d]  = metrics.bias(sr_obs_min_day, sr_min_day)
            error_metric['annual_min_flow'][r,c,d] = metrics.pbias(sr_obs_min_flow, sr_min_flow)*100
#            error_metric['mean_high_q_dur'][r,c,d] = metrics.bias(sr_obs_mean_high_q_dur, sr_mean_high_q_dur)
#            error_metric['freq_high_q'][r,c,d]     = metrics.bias(sr_obs_freq_high_q, sr_freq_high_q)
            error_metric['pbiasFHV'][r,c,d]        = metrics.pbias(sr_obs_FHV, sr_FHV)*100
            error_metric['pbiasFLV'][r,c,d]        = metrics.pbias(sr_obs_FLV, sr_FLV)*100
            
            error_metric['alpha'][r,c,d] = metrics.alpha(sr_obs, sr_sim)
            error_metric['beta'][r,c,d]  = metrics.beta(sr_obs, sr_sim)
            error_metric['corr'][r,c,d]  = metrics.corr(sr_obs, sr_sim)
            error_metric['kge'][r,c,d]   = metrics.kge(sr_obs, sr_sim)
    
            error_metric['corr_seas'][r,c,d]  = metrics.corr(sr_seas_obs, sr_seas_sim)
            
            error_metric['pbias_djf'][r,c,d]  = metrics.pbias(sr_obs_djf, sr_djf)*100
            error_metric['pbias_mam'][r,c,d]  = metrics.pbias(sr_obs_mam, sr_mam)*100
            error_metric['pbias_jja'][r,c,d]  = metrics.pbias(sr_obs_jja, sr_jja)*100
            error_metric['pbias_son'][r,c,d]  = metrics.pbias(sr_obs_son, sr_son)*100

In [None]:
metric_lists = [
    'annual_centroid',
    'annual_min_day',
    'annual_max_day',
    'annual_min_flow',
    'annual_max_flow',
    'pbiasFHV',
    'pbiasFLV',
    'mean_high_q_dur',
    'freq_high_q',
    'alpha',
    'beta',
    'corr_seas',
    'corr',
    'kge',
    'pbias_djf',
    'pbias_mam',
    'pbias_jja',
    'pbias_son',
]

In [None]:
%%time

hist_period = 'control' # 'valid' or 'control'

error_metric = {}
for metric in metric_lists:
    error_metric[metric] = np.zeros((len(ref_site), len(retro_runs.keys()), len(flow_names.keys())))

for r, site in enumerate(ref_site):
    # nrni
    sr_obs = ds_bc_flow['obs'][hist_period]['streamflow'].sel(site=site).values
    sr_obs = np.where(sr_obs<0.0, 1.0e-7,sr_obs)

    sr_seas_obs = ds_seasonal_flow['obs'][hist_period].sel(site=site).values
    
    sr_obs_centroid = ds_flow_metrics['ctr']['obs'][hist_period].sel(site=site)['ann_centroid_day'].values
    sr_obs_max_day  = ds_flow_metrics['annual_max']['obs'][hist_period].sel(site=site)['ann_max_day'].values
    sr_obs_max_flow = ds_flow_metrics['annual_max']['obs'][hist_period].sel(site=site)['ann_max_flow'].values
    sr_obs_min_day  = ds_flow_metrics['annual_min']['obs'][hist_period].sel(site=site)['ann_min_day'].values
    sr_obs_min_flow = ds_flow_metrics['annual_min']['obs'][hist_period].sel(site=site)['ann_min_flow'].values
#    sr_obs_mean_high_q_dur = ds_flow_metrics['high_q_freq_dur']['obs'][hist_period].sel(site=site)['mean_high_q_dur'].values
#    sr_obs_freq_high_q     = ds_flow_metrics['high_q_freq_dur']['obs'][hist_period].sel(site=site)['freq_high_q'].values
    sr_obs_FHV      = ds_flow_metrics['FHV']['obs'][hist_period].sel(site=site)['FHV'].values
    sr_obs_FLV      = ds_flow_metrics['FLV']['obs'][hist_period].sel(site=site)['FLV'].values
    sr_obs_djf      = ds_flow_metrics['season_mean']['obs'][hist_period].sel(site=site, season='DJF').values
    sr_obs_mam      = ds_flow_metrics['season_mean']['obs'][hist_period].sel(site=site, season='MAM').values
    sr_obs_jja      = ds_flow_metrics['season_mean']['obs'][hist_period].sel(site=site, season='JJA').values
    sr_obs_son      = ds_flow_metrics['season_mean']['obs'][hist_period].sel(site=site, season='SON').values
        
    for c, sim_name in enumerate(retro_runs.keys()):# sim_names includes both retro and gcms
        for d, flow_name in enumerate(flow_names.keys()): 
            # simulated flow series
            sr_sim = ds_bc_flow[sim_name][hist_period][flow_name].sel(site=site).values
            sr_sim = np.where(sr_sim<0.0, 1.0e-7,sr_sim)
            
            sr_seas_sim = ds_seasonal_flow[sim_name][hist_period][flow_name].sel(site=site).values
      
            sr_centroid = ds_flow_metrics['ctr'][sim_name][hist_period][flow_name].sel(site=site)['ann_centroid_day'].values
            sr_max_day  = ds_flow_metrics['annual_max'][sim_name][hist_period][flow_name].sel(site=site)['ann_max_day'].values
            sr_max_flow = ds_flow_metrics['annual_max'][sim_name][hist_period][flow_name].sel(site=site)['ann_max_flow'].values
            sr_min_day  = ds_flow_metrics['annual_min'][sim_name][hist_period][flow_name].sel(site=site)['ann_min_day'].values
            sr_min_flow = ds_flow_metrics['annual_min'][sim_name][hist_period][flow_name].sel(site=site)['ann_min_flow'].values
#            sr_mean_high_q_dur = ds_flow_metrics['high_q_freq_dur'][sim_name][hist_period][flow_name].sel(site=site)['mean_high_q_dur'].values
#            sr_freq_high_q     = ds_flow_metrics['high_q_freq_dur'][sim_name][hist_period][flow_name].sel(site=site)['freq_high_q'].values
            sr_FHV      = ds_flow_metrics['FHV'][sim_name][hist_period][flow_name].sel(site=site)['FHV'].values
            sr_FLV      = ds_flow_metrics['FLV'][sim_name][hist_period][flow_name].sel(site=site)['FLV'].values
            sr_djf      = ds_flow_metrics['season_mean'][sim_name][hist_period][flow_name].sel(site=site, season='DJF').values
            sr_mam      = ds_flow_metrics['season_mean'][sim_name][hist_period][flow_name].sel(site=site, season='MAM').values
            sr_jja      = ds_flow_metrics['season_mean'][sim_name][hist_period][flow_name].sel(site=site, season='JJA').values
            sr_son      = ds_flow_metrics['season_mean'][sim_name][hist_period][flow_name].sel(site=site, season='SON').values

            # compute error in flow metrics
            error_metric['annual_centroid'][r,c,d] = metrics.bias(sr_obs_centroid, sr_centroid)
            error_metric['annual_max_day'][r,c,d]  = metrics.bias(sr_obs_max_day, sr_max_day)
            error_metric['annual_max_flow'][r,c,d] = metrics.pbias(sr_obs_max_flow, sr_max_flow)*100
            error_metric['annual_min_day'][r,c,d]  = metrics.bias(sr_obs_min_day, sr_min_day)
            error_metric['annual_min_flow'][r,c,d] = metrics.pbias(sr_obs_min_flow, sr_min_flow)*100
#            error_metric['mean_high_q_dur'][r,c,d] = metrics.bias(sr_obs_mean_high_q_dur, sr_mean_high_q_dur)
#            error_metric['freq_high_q'][r,c,d]     = metrics.bias(sr_obs_freq_high_q, sr_freq_high_q)
            error_metric['pbiasFHV'][r,c,d]        = metrics.pbias(sr_obs_FHV, sr_FHV)*100
            error_metric['pbiasFLV'][r,c,d]        = metrics.pbias(sr_obs_FLV, sr_FLV)*100
            
            error_metric['alpha'][r,c,d] = metrics.alpha(sr_obs, sr_sim)
            error_metric['beta'][r,c,d]  = metrics.beta(sr_obs, sr_sim)
            error_metric['corr'][r,c,d]  = metrics.corr(sr_obs, sr_sim)
            error_metric['kge'][r,c,d]   = metrics.kge(sr_obs, sr_sim)
    
            error_metric['corr_seas'][r,c,d]  = metrics.corr(sr_seas_obs, sr_seas_sim)
            
            error_metric['pbias_djf'][r,c,d]  = metrics.pbias(sr_obs_djf, sr_djf)*100
            error_metric['pbias_mam'][r,c,d]  = metrics.pbias(sr_obs_mam, sr_mam)*100
            error_metric['pbias_jja'][r,c,d]  = metrics.pbias(sr_obs_jja, sr_jja)*100
            error_metric['pbias_son'][r,c,d]  = metrics.pbias(sr_obs_son, sr_son)*100

### 4.1 CDF plots for flow error in flow metrics - all the flow sites

In [None]:
%matplotlib inline 

retro = 'GMET'

# 4 panel CDFs (alpha, beta, correlation and KGE)
fig = plt.figure(figsize=(8.0, 7.0))
fig.subplots_adjust(left=0.095,right=0.975,bottom=0.095,top=0.935,wspace=0.25,hspace=0.30)

ax1 = plt.subplot2grid((2, 2), (0, 0))
ax2 = plt.subplot2grid((2, 2), (0, 1))
ax3 = plt.subplot2grid((2, 2), (1, 0))
ax4 = plt.subplot2grid((2, 2), (1, 1))

mpl.rcParams['xtick.labelsize'] = 9 
mpl.rcParams['ytick.labelsize'] = 9 
mpl.rcParams['axes.labelsize'] = 12 

clr=['r','k','b','m','c']
lstyle=['-','-','-','-','-']

ix_retro = list(retro_runs.keys()).index(retro)

#panel a) alpha
for c, case in enumerate(flow_names.keys()): # flow_names.keys()
    idx = list(flow_names.keys()).index(case)
    xdata = error_metric['alpha'][:,ix_retro,idx]
    xdata_sort = np.sort(xdata)
    prob_metric = np.arange(1,float(len(xdata)+1))/(1+len(xdata))
    maxval = 1.1*np.max(xdata)
    minval = 0.9*np.min(xdata)
    ax1.plot(xdata_sort,prob_metric,c=clr[c],ls=lstyle[c],label=flow_names[case]['legend_name'],linewidth=1.25)
    ax1.text(0.02,0.92, 'a)', transform=ax1.transAxes,)
lgd = ax1.legend(frameon=True,loc="lower right", handlelength=2, prop={'size':8})
lgd.get_frame().set_facecolor('white')
ax1.set_xlim(0.0, 2.0) #ratio
ax1.set_ylim(0, 1)
ax1.grid(True)
ax1.set_yticks(np.arange(0, 1, 0.25))
ax1.set_xlabel(r'$\alpha$')
ax1.set_ylabel('CDF')

#panel b) beta
for c, case in enumerate(flow_names.keys()):
    idx = list(flow_names.keys()).index(case)
    xdata = error_metric['beta'][:,ix_retro,idx] #beta_array[:,idx]
    xdata_sort = np.sort(xdata)
    prob_metric = np.arange(1,float(len(xdata)+1))/(1+len(xdata))
    maxval = 1.1*np.max(xdata)
    minval = 0.9*np.min(xdata)
    ax2.plot(xdata_sort,prob_metric,c=clr[c],ls=lstyle[c],label=case,linewidth=1.25)
    ax2.text(0.02,0.92, 'b)', transform=ax2.transAxes,)
ax2.set_xlim(0.0, 2.0) #ratio
ax2.set_ylim(0, 1)
ax2.grid(True)
ax2.set_yticks(np.arange(0, 1, 0.25))
ax2.set_xlabel(r'$\beta$')
ax2.set_ylabel('CDF')

#panel c) correlation
for c, case in enumerate(flow_names.keys()):
    idx = list(flow_names.keys()).index(case)
    xdata = error_metric['corr'][:,ix_retro,idx] #corr_array[:,idx]
    xdata_sort = np.sort(xdata)
    prob_metric = np.arange(1,float(len(xdata)+1))/(1+len(xdata))
    maxval = 1.1*np.max(xdata)
    minval = 0.9*np.min(xdata)
    ax3.plot(xdata_sort,prob_metric,c=clr[c],ls=lstyle[c],label=case,linewidth=1.25)
    ax3.text(0.02,0.92, 'c)', transform=ax3.transAxes,)
ax3.set_xlim(0.0, 1.0) # corr
ax3.set_ylim(0, 1)
ax3.grid(True)
ax3.set_yticks(np.arange(0, 1, 0.25))
ax3.set_xlabel('Correlation')
ax3.set_ylabel('CDF')

#panel d) KGE
for c, case in enumerate(flow_names.keys()):
    idx = list(flow_names.keys()).index(case)
    xdata = error_metric['kge'][:,ix_retro,idx] #kge_array[:,idx]
    xdata_sort = np.sort(xdata)
    prob_metric = np.arange(1,float(len(xdata)+1))/(1+len(xdata))
    maxval = 1.1*np.max(xdata)
    minval = 0.9*np.min(xdata)
    ax4.plot(xdata_sort,prob_metric,c=clr[c],ls=lstyle[c],label=case,linewidth=1.25)
    ax4.text(0.02,0.92, 'd)', transform=ax4.transAxes,)
ax4.set_xlim(-0.2, 1.0)
ax4.set_ylim(0, 1)
ax4.grid(True)
ax4.set_yticks(np.arange(0, 1, 0.25))
ax4.set_xlabel('KGE\'')
ax4.set_ylabel('CDF');

plt.savefig(os.path.join(figure_path,'Fig1_KGE_cdf_bmorph_%s.png'%(hist_period)), dpi=300, bbox_extra_artists=(lgd,))

### 4.2  Map of at-site error metrics for historical period

In [None]:
%matplotlib inline 

retro = 'GMET'

ix_sim = list(retro_runs.keys()).index(retro)

metric_dic = {
    'alpha': {'cmap': ccmap.cmap2, 'norm':ccmap.norm2, 'header':r'$\alpha$'},
    'beta':  {'cmap': ccmap.cmap2, 'norm':ccmap.norm2, 'header':r'$\beta$'},
    'corr':  {'cmap': ccmap.cmap3, 'norm':ccmap.norm3, 'header':'r'},
}

flow_type = {
    'flow_raw':{'header':'orig flow'},
    'flow_scbc_u':{'header':'scbc flow'},
}
        
fig, axs = plt.subplots(nrows=len(metric_dic), ncols=len(flow_type), figsize=(6.0, 7.5), 
                        subplot_kw={"projection": ccrs.PlateCarree()}, dpi=100)
plt.subplots_adjust(left=0.01, bottom=0.01, right=0.9, top=0.925, wspace=0.02)

for ix, (metric, meta) in enumerate(metric_dic.items()):
    for jx, (bc_type, ftyp_meta) in enumerate(flow_type.items()):

        df_metric= pd.DataFrame(data=error_metric[metric][:,ix_sim,:], index=ref_site, columns=list(flow_names.keys()))
        df_metric.reset_index(level=0, inplace=True)
        df_metric.rename(columns={'index':'location_name'},inplace=True)
        df_metric_final = df_site.merge(df_metric, on="location_name", how = 'inner')

        base_map(axs[ix,jx], df_huc12)
        df_metric_final.plot(ax=axs[ix,jx], markersize=10, cmap=meta['cmap'], norm=meta['norm'], column=bc_type, legend=False)
        axs[ix,jx].set_title('%s: %s'%(meta['header'],ftyp_meta['header']))

        if ix==0:
            axs[ix,jx].add_patch(mpl.patches.Rectangle((-124.1, 43.25), 2.20, 2.45,
                                                edgecolor='k',facecolor='None',lw=0.7)); # Willamette
            axs[ix,jx].text(-122.3, 42.8, 'Willamette', fontsize=6)
            axs[ix,jx].add_patch(mpl.patches.Ellipse((-121.7, 47.2), 1.8, 1.9,
                                              edgecolor='k',facecolor='None',lw=0.7)); # Eastern Cascade
            axs[ix,jx].text(-121.1, 47.9, 'Cascade', rotation=35, fontsize=6)                                                             
            axs[ix,jx].add_patch(mpl.patches.Rectangle((-117.5, 42.2), 7.25, 2.9,
                                             edgecolor='k',facecolor='None',lw=0.7)); # Snake
            axs[ix,jx].text(-117.5, 41.7, 'Snake', fontsize=6)
        if ix==1:
            axs[ix,jx].add_patch(mpl.patches.Rectangle((-122.1, 43.2), 1.7, 2.0,
                                                edgecolor='k',facecolor='None',lw=0.7)); # Deschutes
            axs[ix,jx].text(-121.0, 42.6, 'Deschutes', fontsize=6)
       
    # add colorbar
    fig1 = axs[ix,jx].get_figure()
    sm = plt.cm.ScalarMappable(cmap=meta['cmap'], norm=meta['norm'])
    # fake up the array of the scalar mappable. Urgh...
    sm._A = []
    fig.colorbar(sm, ax=axs[ix,jx], extend='both');

fig.savefig(os.path.join(figure_path, f'Fig2_historical_metric_error_map_{retro}_{hist_period}.png'), dpi=300)

## 3. Monthly seasonal cycle at each site <a id='seasonal_cycle'></a>

### 3.1. one or more bc_types and obs per period
bc_type: 'raw', 'ibc', 'scbc'

In [None]:
%matplotlib auto
plt.rcParams.update({'figure.max_open_warning': 0})

# --- setup
retro = 'GMET'
skip_period = []
skip_bc_type = []  # 'raw','ibc', 'scbc'
hist_period = 'valid'
# ---

month = ['Oct','Nov','Dec','Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep']

for site in ref_site:
    plt.figure(figsize = (7, 5))
    obs_flow_plot=False

    plot_title = f'{retro}_{hist_period}'
        
    # retrospective obs run -
    obs_flow = ds_bc_flow['obs'][hist_period]['streamflow'].sel(site=site).groupby("time.month").mean(dim="time").roll(month=3, roll_coords=False).values
    plt.plot(month, obs_flow, linestyle='--', linewidth=2.0, color='black', label='obs', zorder=0)
        
    if 'raw' not in skip_bc_type:
        sim_flow = ds_bc_flow[retro][hist_period]['flow_raw'].sel(site=site).groupby("time.month").mean(dim="time").roll(month=3, roll_coords=False).values
        plt.plot(month, sim_flow, ls=':', c=periods['control']['lc'], lw=1.25, label='%s raw'%(retro), zorder=0)
        
    if 'scbc' not in skip_bc_type:  
        sim_flow = ds_bc_flow[retro][hist_period]['flow_scbc_u'].sel(site=site).groupby("time.month").mean(dim="time").roll(month=3, roll_coords=False).values
        plt.plot(month, sim_flow, ls='-', c=periods['control']['lc'], lw=1.25, label='%s scbc'%(retro))
        
    if 'ibc' not in skip_bc_type:
        sim_flow = ds_bc_flow[retro][hist_period]['flow_ibc_u'].sel(site=site).groupby("time.month").mean(dim="time").roll(month=3, roll_coords=False).values
        plt.plot(month, sim_flow, ls='-.', c=periods['control']['lc'], lw=1.25, label='%s ibc'%(retro))    

    plt.ylabel('Discharge [m3/s]'); plt.xlabel('Month')
    plt.legend()

    plt.title(f'{plot_title} at {site}')
    
    plt.savefig(os.path.join(figure_path, 'per_site','Fig1_bc_monthly_cycle_%s_%s.png'%(plot_title, site)), dpi=200)
    break