# Analysis of VELMA runoff simulations

In [4]:
%matplotlib widget

import __init__
import scripts.config as config
import numpy as np
import pandas as pd
import tempfile
import datetime
import ipywidgets as widgets
from ipywidgets import interact
from natsort import natsorted
import os
from sklearn.metrics import mean_squared_error, r2_score
from matplotlib.font_manager import FontProperties
import seaborn as sns
# import matplotlib as mpl
import matplotlib.pyplot as plt
import importlib
from functools import reduce

In [2]:
# Plotting parameters

XSMALL_SIZE = 6
SMALL_SIZE = 7
MEDIUM_SIZE = 9
BIGGER_SIZE = 12

plt.rc('font', size=SMALL_SIZE)          # controls default text sizes
plt.rc('axes', titlesize=SMALL_SIZE)     # fontsize of the axes title
plt.rc('axes', labelsize=SMALL_SIZE)    # fontsize of the x and y labels
plt.rc('xtick', labelsize=SMALL_SIZE)    # fontsize of the tick labels
plt.rc('ytick', labelsize=SMALL_SIZE)    # fontsize of the tick labels
plt.rc('legend', fontsize=SMALL_SIZE)    # legend fontsize
plt.rc('axes', titlesize=SMALL_SIZE)  # fontsize of the figure title
plt.rcParams['figure.dpi'] = 140

In [5]:
# Getting daily runoff results from simulations

results_dir = config.velma_data.parents[1] / 'results'

all_sims = natsorted(next(os.walk(results_dir))[1])
# remove = ['ellsworth_baseline_03_07']
# all_sims = [x for x in all_sims if x not in remove]

sims = []
runoff_sims = []
for sim in all_sims:
    sim_dir = results_dir / sim
    files = os.listdir(sim_dir)
    if 'AnnualHydrologyResults.csv' not in files:
        continue
    sims.append(sim)
    velma_results = pd.read_csv(results_dir / sim / 'DailyResults.csv')
    rng04_07 = pd.date_range('01-01-2004', '12-31-2007')
    rng03_07 = pd.date_range('01-01-2003', '12-31-2007')
    if len(velma_results) == len(rng04_07):
        velma_results.index = rng04_07
    if len(velma_results) == len(rng03_07):
        velma_results.index = rng03_07
    runoff_sim = pd.DataFrame(velma_results['Runoff_All(mm/day)_Delineated_Average'])
    runoff_sims.append(runoff_sim)
        
# Format datetime of results
jday_pad = velma_results['Day'].apply(lambda x: str(x).zfill(3))
str_year = velma_results['Year'].apply(lambda x: str(x))
velma_results['date'] = str_year + jday_pad
rng = pd.to_datetime(velma_results['date'], format='%Y%j')

# runoff_sims_stack = np.column_stack(runoff_sims)
runoff_sims_df = reduce(lambda  left,right: pd.merge(left,right, left_index=True, right_index=True,
                                            how='outer'), runoff_sims).fillna(np.nan)
runoff_sims_df.columns = sims
# runoff_sims_df = pd.DataFrame(data=runoff_sims_stack, columns=sims, index=rng)
runoff_sims_df['doy'], runoff_sims_df['year'] = runoff_sims_df.index.dayofyear, runoff_sims_df.index.year

# Get observed runoff
runoff_obs = pd.read_csv(config.velma_data / 'runoff' / 'ellsworth_Q_2003_2007_dummy.csv', names=['runoff_obs'])
runoff_obs.index = pd.date_range('01-01-2003', '12-31-2007')
runoff_obs['doy'], runoff_obs['year'] = runoff_obs.index.dayofyear, runoff_obs.index.year

In [23]:
# Convert runoff to pivot tables

runoff_sim_yearly = pd.pivot_table(runoff_sims_df, index=['doy'], columns=['year'],
                                   values=sims, dropna=False)

runoff_obs_yearly = pd.pivot_table(runoff_obs, index=['doy'], columns=['year'],
                                   values='runoff_obs', dropna=False)

## Plotting

In [24]:
runoff_sim_yearly

Unnamed: 0_level_0,ellsworth_baseline_03_07,ellsworth_baseline_03_07,ellsworth_baseline_03_07,ellsworth_baseline_03_07,ellsworth_baseline_03_07,ellsworth_baseline_03_07_10,ellsworth_baseline_03_07_10,ellsworth_baseline_03_07_10,ellsworth_baseline_03_07_10,ellsworth_baseline_03_07_10,...,ellsworth_baseline_04_07_9,ellsworth_baseline_04_07_9,ellsworth_baseline_04_07_9,ellsworth_baseline_04_07_9,ellsworth_baseline_04_07_9,ellsworth_baseline_04_07_9_GHCND,ellsworth_baseline_04_07_9_GHCND,ellsworth_baseline_04_07_9_GHCND,ellsworth_baseline_04_07_9_GHCND,ellsworth_baseline_04_07_9_GHCND
year,2003,2004,2005,2006,2007,2003,2004,2005,2006,2007,...,2003,2004,2005,2006,2007,2003,2004,2005,2006,2007
doy,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
1,13.203224,12.499445,5.590395,25.715544,11.510624,103.395630,10.823229,5.587865,23.986944,11.473555,...,,6.380439,3.779232,22.902950,11.505026,,5.951293,7.634514,26.618570,10.789105
2,18.424429,11.399151,4.809629,21.901718,14.556498,91.956047,10.560692,5.140394,20.875599,13.266190,...,,4.167027,3.393301,17.661301,18.296135,,3.489241,6.177514,24.720856,12.281318
3,28.714983,9.499844,4.431491,18.435453,16.937672,58.590443,9.205021,4.754377,18.077135,15.862616,...,,2.825019,3.247038,16.882809,13.774307,,2.288531,5.428402,18.353373,22.471651
4,32.425411,8.716317,4.219032,17.236071,14.953041,47.687233,8.405148,4.485927,16.729080,14.884953,...,,3.128204,3.109210,15.575370,15.620897,,2.415826,5.089087,17.825466,15.545039
5,25.622589,7.991707,4.123698,24.438087,22.397791,35.280331,7.787065,4.329560,20.881029,18.933987,...,,3.911038,3.173443,33.967197,17.016586,,3.038912,4.786254,18.942703,32.463173
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
362,13.894639,5.544347,42.713436,19.053501,17.804548,12.663321,6.579423,34.132881,17.939346,15.762032,...,,4.126866,27.282475,16.258717,13.726779,,7.584746,62.947025,21.590527,22.711872
363,11.269913,5.218313,36.313129,16.483477,15.542498,10.858833,5.856955,31.354393,15.499677,14.523042,...,,4.140934,33.412300,13.969597,13.226614,,6.677496,36.036469,18.041492,17.805464
364,11.108541,5.136781,37.025509,14.251964,14.587410,9.923604,5.555395,31.167664,13.741363,13.620793,...,,3.996345,30.665348,11.839246,12.187482,,6.584325,42.945370,15.543103,16.841047
365,11.385984,4.969378,30.689274,12.426834,12.784750,9.929136,5.286563,27.898190,12.294537,12.323998,...,,3.713320,23.688158,10.765043,10.469986,,6.374254,36.599518,12.986013,14.633612


In [28]:
# Plotting all simulations together

years = runoff_obs_yearly.columns.get_level_values(0)
fig, axes = plt.subplots(ncols=1, nrows=len(years), figsize=(6, 9))
for i, year in enumerate(years):
    axes[i].set_title(year)
    axes[i].set_ylim([0, 80])
    for j, sim in enumerate(sims):
        col = (j * len(years)) + i
        runoff_sim_yearly.iloc[:, col].plot(ax=axes[i], label=sim, linewidth=0.4)
    runoff_obs_yearly.iloc[:, i].plot(ax=axes[i], label='Observed', linewidth=.8, color='black')
#     leg = axes[0].legend(loc='upper left', bbox_to_anchor=(0, 2.5), fancybox=True, ncol=3)
#     for line in leg.get_lines():
#         line.set_linewidth(4.0)
#     axes[0].get_legend().remove()
axes[0].set_ylabel('Runoff (mm/day)')
plt.tight_layout()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [None]:
plt.close('all')

In [29]:
# Interactively select which sims to plot

# Plotting a subset of simulations

def plot_selected_sims(runoff_sim_yearly_selected, selected):    
    years = runoff_obs_yearly.columns.get_level_values(0)
    fig, axes = plt.subplots(ncols=1, nrows=len(years), figsize=(6, 9))
    for i, year in enumerate(years):
        axes[i].set_title(year)
        axes[i].set_ylim([0, 80])
        for j, sim in enumerate(selected):
            col = (j * len(years)) + i
            runoff_sim_yearly_selected.iloc[:, col].plot(ax=axes[i], label=sim, linewidth=0.6)
        runoff_obs_yearly.iloc[:, i].plot(ax=axes[i], label='Observed', linewidth=1, color='black')
    leg = axes[0].legend(loc='upper left', bbox_to_anchor=(0, 1.3), fancybox=True, ncol=3)
    for line in leg.get_lines():
        line.set_linewidth(4.0)
    axes[0].set_ylabel('Runoff (mm/day)')
    plt.tight_layout()


sel_mul = widgets.SelectMultiple(description="Simulations", options=sims, value=[sims[0]], disabled=False)
@interact(selected=sel_mul)
def interactive_plot(selected):
    tuples = pd.MultiIndex.from_product([selected, years])
    runoff_sim_yearly_sub = runoff_sim_yearly[tuples]
    plot_selected_sims(runoff_sim_yearly_sub, selected)



interactive(children=(SelectMultiple(description='Simulations', index=(0,), options=('ellsworth_baseline_03_07…

In [None]:
# Plotting a subset of simulations manually

sims_sub = ['ellsworth_baseline_04_07_9',
            'ellsworth_baseline_04_07_10',
            'ellsworth_baseline_04_07_11']

tuples = pd.MultiIndex.from_product([sims_sub, years])

runoff_sim_yearly_sub = runoff_sim_yearly[tuples]

years = runoff_obs_yearly.columns.get_level_values(0)
fig, axes = plt.subplots(ncols=1, nrows=len(years), figsize=(6, 9))
for i, year in enumerate(years):
    runoff_obs_yearly.iloc[:, i].plot(ax=axes[i], label='Observed', linewidth=1, color='black')
    axes[i].set_title(year)
    axes[i].set_ylim([0, 80])
    for j, sim in enumerate(sims_sub):
        col = (j * len(years)) + i
        runoff_sim_yearly_sub.iloc[:, col].plot(ax=axes[i], label=sim, linewidth=0.4)
axes[0].legend(loc='upper left', bbox_to_anchor=(0, 2.1), fancybox=True, ncol=2)
axes[0].set_ylabel('Runoff (mm/day)')
plt.tight_layout()

#### Ensemble: averaging simulated runoff using PRISM and gauge precip data

In [None]:
selection = pd.MultiIndex.from_product([['ellsworth_baseline_04_07_10'], [2004, 2005, 2006, 2007]])
prism = runoff_sim_yearly[selection]

selection = pd.MultiIndex.from_product([['ellsworth_baseline_04_07_10_GHCND'], [2004, 2005, 2006, 2007]])
gauge = runoff_sim_yearly[selection]

avg_runoffs = []
for i, col in enumerate(gauge.columns):
    avg_runoff = np.mean([gauge.iloc[:, i], prism.iloc[:, i]], axis=0)
    avg_runoffs.append(avg_runoff)

avg_runoffs_df = pd.DataFrame(data=np.column_stack(avg_runoffs), columns=[2004, 2005, 2006, 2007])

In [None]:
# Plotting PRISM, gauge, and average of PRISM and gauge runoffs

years = [2004, 2005, 2006, 2007]
fig, axes = plt.subplots(ncols=1, nrows=len(years), figsize=(6, 9))
for i, year in enumerate(years):
    runoff_obs_yearly.iloc[:, i].plot(ax=axes[i], label='Observed', linewidth=1, color='black')
    axes[i].set_title(year)
    axes[i].set_ylim([0, 80])
    avg_runoffs_df.iloc[:, i].plot(ax=axes[i], label='Average', linewidth=0.5)
    prism.iloc[:, i].plot(ax=axes[i], label='PRISM', linewidth=0.5)
    gauge.iloc[:, i].plot(ax=axes[i], label='Gauge', linewidth=0.5)
    leg = axes[0].legend(loc='upper left', bbox_to_anchor=(0, 2.5), fancybox=True, ncol=3)
    for line in leg.get_lines():
        line.set_linewidth(4.0)
axes[0].set_ylabel('Runoff (mm/day)')
plt.tight_layout()

In [None]:
# Computing Nash-Sutcliffe
def NS(s, o):
    """
        Nash Sutcliffe efficiency coefficient
        input:
        s: simulated
        o: observed
        output:
        ns: Nash Sutcliffe efficient coefficient
        """
    # s,o = filter_nan(s,o)
    return 1 - np.sum((s-o)**2)/np.sum((o-np.mean(o))**2)


prism_stack = prism.iloc[:, 1:].stack().reset_index().iloc[:, 2]
gauge_stack = gauge.iloc[:, 1:].stack().reset_index().iloc[:, 2]
avg_stack = avg_runoffs_df.iloc[:, 1:].stack().reset_index().iloc[:, 2]
obs_stack = runoff_obs_yearly.iloc[:, 1:].stack().reset_index().iloc[:, 2]

display('NS (PRISM): ', NS(prism_stack, obs_stack))

display('NS (gauge): ', NS(gauge_stack, obs_stack))

display('NS (average): ', NS(avg_stack, obs_stack))

The average of PRISM and gauge runoff has a higher NSE than their individual scores, so perhaps running VELMA with their averaged precipiation will lead to higher NSE as well.