# Final calibration of PDM

In [None]:
import os
import numpy as np
import pandas as pd
import geopandas as gpd
from pathlib import Path
import matplotlib.pyplot as plt
from datetime import timedelta, datetime
from dateutil.relativedelta import relativedelta
import hvplot
import hvplot.pandas
import warnings
import pyswarms as ps
pad = Path(os.getcwd())
if pad.name == "model_training_and_calibration":
    pad_correct = pad.parent
    os.chdir(pad_correct)
from functions.PDM import (PDM, parameter_sampling, 
                           Nelder_Mead_calibration,PDM_calibration_wrapper_PSO)
from functions.performance_metrics import NSE, mNSE
#from wakepy import set_keepawake, unset_keepawake

exec_optimisation = False
append = True #Set to True if you want to continue optimisation where it halted
%load_ext autoreload 
%autoreload 2 

warnings.filterwarnings(action = 'ignore', category= RuntimeWarning)
warnings.filterwarnings(action = 'ignore', category= UserWarning)

## Read in and process initial data

In [None]:
parameters_initial = pd.DataFrame({
    'cmax': 400.60999,
    'cmin':87.67600,
    'b':0.60000,
    'be':3.00000,
    'k1':8.00000,
    'k2':0.70000,
    'kb':5.04660,
    'kg':9000.00000,
    'St': 0.43043,
    'bg':1.00000,
    'tdly':2.00000,
    'qconst':0.00000,
    #'rainfac':0.00000 THIS IS NOT USED!
}, dtype = np.float32, index =[0])
display(parameters_initial)

area_zwalm_initial = np.single(109.2300034)
zwalm_shape = gpd.read_file('data/Zwalm_shape/zwalm_shapefile_emma_31370.shp')
area_zwalm_new = np.single(zwalm_shape.area[0]/10**6)
print('Area of the Zwalm by Cabus: ' + str(area_zwalm_initial) + '[km^2]')
print('Area of the Zwalm by shapefile: ' + str(area_zwalm_new) + '[km^2]')

For calibration, the bounds from Cabus are used! Unless for:
-  $b_e$  minimum from PDM as at LEAST a linear relationship!! take max 3 for $b_e$.
- $b_g$ is set to 1 and not calibrated further!
- $tdly$ suppose this can only be positive and not more than a day

![image.png](cabus.png)

Info from the VMM

![image.png](VMM.png)

In [None]:
lower_bound = np.array([160,0,0.1,1,0.9,0.1,0,700,0,1,0,-0.3]) 
upper_bound = np.array([5000,300,2,3,40,15,5000,25000,150,1.000000000000001,20,0.03])
bounds_list = []
for i in range(len(lower_bound)):
    bounds_list.append((lower_bound[i],upper_bound[i]))
#bounds_opt = tuple(bounds_list)
bounds_opt = bounds_list
print(bounds_opt)

In [None]:
preprocess_output_folder = Path('data/Zwalm_data/preprocess_output')
p_zwalm = pd.read_pickle(preprocess_output_folder/'zwalm_p_thiessen.pkl')
ep_zwalm = pd.read_pickle(preprocess_output_folder/'zwalm_ep_thiessen.pkl')

pywaterinfo_output_folder = Path("data/Zwalm_data/pywaterinfo_output")
Q_day = pd.read_pickle(pywaterinfo_output_folder/"Q_day.pkl")
Q_day = Q_day.set_index('Timestamp')
p_zwalm.set_index('Timestamp').plot()
ep_zwalm.set_index('Timestamp').plot()
print('First day of forcing data: ' + str(p_zwalm['Timestamp'].iloc[0]))
print('Last day of forcing data: ' + str(p_zwalm['Timestamp'].iloc[-1]))

Adapted to longer calibration period after suggestion Hans. 
- Calibration: 2012-01-01 00:00 -> 2019-12-31 23:00 (period1)
- Validation: 2020-01-01 00:00 -> 2022-11-05 23:00 (period2)

Take 9 months warmup period 

In [None]:
warmup_months = 9
start_p1 = p_zwalm['Timestamp'].iloc[0]
start_endofwarmup_p1 = start_p1 + relativedelta(months = warmup_months)
end_p1 =  pd.Timestamp(datetime(year = 2019, month = 12, day = 31, hour = 23))
print('Characteristics of period 1: start = '  + str(start_p1) + ', start of post warmup = ' + str(start_endofwarmup_p1) + ' and end = ' + str(end_p1))

start_p2 = pd.Timestamp(datetime(year = 2020, month= 1, day = 1, hour = 0))
start_endofwarmup_p2 = start_p2 + relativedelta(months = warmup_months)
end_p2 = p_zwalm['Timestamp'].iloc[-1]
print('Characteristics of period 2: start = '  + str(start_p2) + ', start of post warmup = ' + str(start_endofwarmup_p2) + ' and end = ' + str(end_p2))

p1_period_excl_warmup = pd.date_range(start_endofwarmup_p1,end_p1,
freq = 'D') #used for scoring the model 
p1_period = pd.date_range(start_p1, end_p1, freq = 'H')
p2_period_excl_warmup = pd.date_range(start_endofwarmup_p2,end_p2,
freq = 'D') #used for scoring the model 
p2_period = pd.date_range(start_p2, end_p2, freq = 'H')
p_all_nowarmup = pd.date_range(start_endofwarmup_p1, end_p2)
p_all = pd.date_range(start_p1, end_p2)

#now subdivide ep data on p1 and p2
#for ease of selecting data, set time as index!
#select forcings for p1 period
p_zwalm_p1 = p_zwalm.set_index('Timestamp').loc[p1_period]
ep_zwalm_p1 = ep_zwalm.set_index('Timestamp').loc[p1_period]
#select forcings for p2 period
p_zwalm_p2 = p_zwalm.set_index('Timestamp').loc[p2_period]
ep_zwalm_p2 = ep_zwalm.set_index('Timestamp').loc[p2_period]

## Check initial performance of the model

In [None]:
deltat = np.single(1) #internal resolution =  1 hour
deltat_out = np.single(24) #output resolution = 24 hour
pd_zwalm_out_initial = PDM(P = p_zwalm['P_thiessen'].values, 
    EP = ep_zwalm['EP_thiessen'].values,
    t = p_zwalm['Timestamp'].values,
    area = area_zwalm_initial, deltat = deltat, deltatout = deltat_out ,
    parameters = parameters_initial)
pd_zwalm_out_initial = pd_zwalm_out_initial.set_index(['Time'])

# check (m)NSE 
pd_zwalm_out_initial_p2 = pd_zwalm_out_initial[start_endofwarmup_p2:end_p2]
nse_initial_p2 = NSE(
    pd_zwalm_out_initial_p2['qmodm3s'],
    Q_day.loc[start_endofwarmup_p2:end_p2,'Value'].values
)
mnse_initial_p2 = mNSE(
    pd_zwalm_out_initial_p2['qmodm3s'],
    Q_day.loc[start_endofwarmup_p2:end_p2,'Value'].values
)
print('NSE on p2 for initial set:' + str(nse_initial_p2))
print('mNSE on p2 for initial set:' + str(mnse_initial_p2))

pd_zwalm_out_initial_p1 = pd_zwalm_out_initial[start_endofwarmup_p1:end_p1]
nse_initial_p1 = NSE(
    pd_zwalm_out_initial_p1['qmodm3s'],
    Q_day.loc[start_endofwarmup_p1:end_p1,'Value'].values
)
mnse_initial_p1 = mNSE(
    pd_zwalm_out_initial_p1['qmodm3s'],
    Q_day.loc[start_endofwarmup_p1:end_p1,'Value'].values
)
print('NSE on p1 for initial set:' + str(nse_initial_p1))
print('mNSE on p1 for initial set:' + str(mnse_initial_p1))

nse_initial = NSE(
    pd_zwalm_out_initial.loc[start_endofwarmup_p1:end_p2,'qmodm3s'].values,
    Q_day.loc[start_endofwarmup_p1:end_p2,'Value'].values
)
mnse_initial = mNSE(
    pd_zwalm_out_initial.loc[start_endofwarmup_p1:end_p2,'qmodm3s'].values,
    Q_day.loc[start_endofwarmup_p1:end_p2,'Value'].values
)
print('NSE for initial set:' + str(nse_initial))
print('mNSE for initial set:' + str(mnse_initial))
 

In [None]:
pd_zwalm_out_initial

In [None]:
#fig, ax = plt.subplots(figsize = (10,6))
Q_day['Value'].hvplot(alpha = 0.7, label = 'observed') * pd_zwalm_out_initial['qmodm3s'].hvplot(
    alpha = 0.7, frame_width = 900, frame_height = 400,label = 'modelled', line_dash = 'dashed') 

## Nelder-Mead Calibration: starting from multiple initial conditions

In [None]:
names = parameters_initial.columns.to_list()
n_paramsets = 50
pd_init_params = parameter_sampling(names, bounds_opt, n_paramsets)
pd_init_params.head()
pd_opt_params = parameters_initial.copy()
col_names_perf = ['NSE_cal','NSE_val','NSE_full','mNSE_cal','mNSE_val','mNSE_full']
#empyt dataframe for parameters and performance
if append:
    pd_perf = pd.read_csv('data/Zwalm_PDM_parameters/NM_NSE_performances.csv')
    pd_opt_params = pd.read_csv('data/Zwalm_PDM_parameters/NM_NSE_parameters.csv')
    start = pd_opt_params.shape[0]
else:
    pd_perf = pd.DataFrame(columns = col_names_perf)
    pd_opt_params = pd.DataFrame(columns = names)
    start = 0
display(pd_perf.head())
display(pd_opt_params.head())

In [None]:
start

In [None]:
#Specifications for calibration 
performance_metric = 'NSE'
P_np = p_zwalm_p1['P_thiessen'].values
EP_np = ep_zwalm_p1['EP_thiessen'].values
deltat = np.single(1)
deltatout = np.single(24)
t_model = p1_period.values
t_calibration = p1_period_excl_warmup.values
Qobs = Q_day['Value']

In [None]:
if not os.path.exists('data/Zwalm_PDM_parameters'):
    os.mkdir('data/Zwalm_PDM_parameters')

In [None]:
#set_keepawake(keep_screen_awake=True)
if exec_optimisation:
        for i in np.arange(start, n_paramsets):
                # Optimize parametersets with NM
                pd_init_temp = pd_init_params.iloc[i,:]
                opt_out_NSE = Nelder_Mead_calibration(
                        pd_init_temp.values, parameters_initial.columns, bounds_opt, performance_metric, P_np, EP_np,
                        area_zwalm_new, deltat, deltatout, t_model, t_calibration, Qobs  
                )
                ## Assign optimized parameter sets
                pd_temp = pd.DataFrame(opt_out_NSE.x.reshape(1,-1), columns = names)
                pd_opt_params = pd.concat([pd_opt_params, pd_temp], axis = 0, ignore_index=True)
                
                ## Quantify performance of parameter sets 
                pd_zwalm_out = PDM(P = p_zwalm['P_thiessen'].values, 
                        EP = ep_zwalm['EP_thiessen'].values, t = p_zwalm['Timestamp'].values,
                        area = area_zwalm_new, deltat = deltat, deltatout = deltat_out ,parameters = pd_temp
                )
                pd_zwalm_out = pd_zwalm_out.set_index('Time')
                mnse_cal = mNSE(
                        pd_zwalm_out.loc[start_endofwarmup_p1:end_p1,'qmodm3s'].values,
                        Q_day.loc[start_endofwarmup_p1:end_p1,'Value'].values
                )
                nse_cal = NSE(
                        pd_zwalm_out.loc[start_endofwarmup_p1:end_p1,'qmodm3s'].values,
                        Q_day.loc[start_endofwarmup_p1:end_p1,'Value'].values
                )
                mnse_val = mNSE(
                        pd_zwalm_out.loc[start_p2:end_p2,'qmodm3s'].values,
                        Q_day.loc[start_p2:end_p2,'Value'].values
                )
                nse_val= NSE(
                        pd_zwalm_out.loc[start_p2:end_p2,'qmodm3s'].values,
                        Q_day.loc[start_p2:end_p2,'Value'].values
                ) #mistake was made here (mNSE was used) => wrong data in dataframe
                mnse_full = mNSE(
                        pd_zwalm_out.loc[start_endofwarmup_p1:end_p2,'qmodm3s'].values,
                        Q_day.loc[start_endofwarmup_p1:end_p2,'Value'].values
                )
                nse_full = NSE(
                        pd_zwalm_out.loc[start_endofwarmup_p1:end_p2,'qmodm3s'].values,
                        Q_day.loc[start_endofwarmup_p1:end_p2,'Value'].values
                )
                pd_temp_perf = pd.DataFrame(np.array([nse_cal, nse_val, nse_full, mnse_cal, mnse_val, mnse_full]).reshape(1,-1), columns= col_names_perf, index = [i])
                pd_perf = pd.concat([pd_perf, pd_temp_perf], axis=0, ignore_index = True)

                ## Write out
                pd_opt_params.to_csv('data/Zwalm_PDM_parameters/NM_NSE_parameters.csv', mode = 'w', index = False)
                pd_perf.to_csv('data/Zwalm_PDM_parameters/NM_NSE_performances.csv', mode = 'w', index = False)
                print('dataset ' + str(i) + ' out of '  + str(n_paramsets) + ' has been calibrated')
#unset_keepawake()

## Assess calibration results

In [None]:
pd_opt_params.head()

Recalculate the results with the parametersets (as doubts about the values saved during calibration)

In [None]:
pd.DataFrame(pd_opt_params.iloc[0,:].values.reshape(1,-1), columns = pd_opt_params.columns)

In [None]:
n_param_sets = pd_opt_params.shape[0]
pd_perf_recalc = pd.DataFrame(columns=pd_perf.columns, index = range(0,n_param_sets))
pd_list = []
for i in range(n_param_sets):
    pd_temp = pd.DataFrame(pd_opt_params.iloc[i,:].values.reshape(1,-1), columns = pd_opt_params.columns)
    pd_zwalm_out = PDM(P = p_zwalm['P_thiessen'].values, 
            EP = ep_zwalm['EP_thiessen'].values, t = p_zwalm['Timestamp'].values,
            area = area_zwalm_new, deltat = deltat, deltatout = deltat_out ,parameters = pd_temp
    )
    pd_zwalm_out = pd_zwalm_out.set_index('Time')
    mnse_cal = mNSE(
            pd_zwalm_out.loc[start_endofwarmup_p1:end_p1,'qmodm3s'].values,
            Q_day.loc[start_endofwarmup_p1:end_p1,'Value'].values
    )
    nse_cal = NSE(
            pd_zwalm_out.loc[start_endofwarmup_p1:end_p1,'qmodm3s'].values,
            Q_day.loc[start_endofwarmup_p1:end_p1,'Value'].values
    )
    mnse_val = mNSE(
            pd_zwalm_out.loc[start_p2:end_p2,'qmodm3s'].values,
            Q_day.loc[start_p2:end_p2,'Value'].values
    )
    nse_val= NSE(
            pd_zwalm_out.loc[start_p2:end_p2,'qmodm3s'].values,
            Q_day.loc[start_p2:end_p2,'Value'].values
    )
    mnse_full = mNSE(
            pd_zwalm_out.loc[start_endofwarmup_p1:end_p2,'qmodm3s'].values,
            Q_day.loc[start_endofwarmup_p1:end_p2,'Value'].values
    )
    nse_full = NSE(
            pd_zwalm_out.loc[start_endofwarmup_p1:end_p2,'qmodm3s'].values,
            Q_day.loc[start_endofwarmup_p1:end_p2,'Value'].values
    )
    pd_perf_recalc.iloc[i,:] =[nse_cal, nse_val, nse_full, mnse_cal, mnse_val, mnse_full]

In [None]:
fig, axes = plt.subplots(4,3, constrained_layout = True, figsize = (7,7))
iter = 0
for i in range(4):
    for j in range(3):
        axes[i,j].scatter(x = pd_opt_params.iloc[:,iter], y = pd_perf_recalc['NSE_cal'])
        axes[i,j].set_xlabel(pd_opt_params.columns[iter])
        axes[i,j].set_ylabel('NSE')
        iter = iter + 1
        fig.suptitle('NSE on calibration set')

Same Figure but now limit the axes

In [None]:
fig, axes = plt.subplots(4,3, constrained_layout = True, figsize = (7,7))
iter = 0
for i in range(4):
    for j in range(3):
        axes[i,j].scatter(x = pd_opt_params.iloc[:,iter], y = pd_perf['NSE_cal'])
        axes[i,j].set_xlabel(pd_opt_params.columns[iter])
        axes[i,j].set_ylabel('NSE')
        axes[i,j].set_ylim(0.5, pd_perf['NSE_cal'].max())
        iter = iter + 1
        fig.suptitle('NSE on calibration set')

In [None]:
pd_opt_params.columns[0]

### Best dataset

In [None]:
pd_perf_recalc.head()

Idea: determine best dataset based on best NSE and mNSE value for validation set. (paper on mNSE https://agupubs.onlinelibrary.wiley.com/doi/abs/10.1029/1998WR900018). Goal = highest weighted sum

In [None]:
sorted_NSE_val = pd_perf_recalc.sort_values('NSE_val', ascending=False)
# sorted_NSE_val['NSE_val_score'] = np.arange(1,51)
# sorted_mNSE_val = sorted_NSE_val.sort_values('mNSE_val',ascending=False)
# sorted_mNSE_val['mNSE_val_score'] = np.arange(1,51)
# sorted_NSE_full = sorted_mNSE_val.sort_values('NSE_full',ascending=False)
# sorted_NSE_full['NSE_full_score'] = np.arange(1,51)
# sorted_NSE_full['total_score'] = sorted_NSE_full['mNSE_val_score'] + sorted_NSE_full['NSE_val_score']
# sorted_total_score_val = sorted_NSE_full.sort_values('total_score')
#sorted_total_score_val.head(10)
sorted_NSE_val['score'] = 1/2*sorted_NSE_val['NSE_val'] + 1/2*sorted_NSE_val['mNSE_val']
sorted_score = sorted_NSE_val.sort_values('score',ascending=False)
sorted_score.head(10)


Plotting the best dataset according to validation data

In [None]:
index_best_param_set = sorted_score.iloc[0,:].name
print('original index of best parameter set: ' + str(index_best_param_set))
best_param_set = pd_opt_params.iloc[index_best_param_set,:]
display(best_param_set)
pd_zwalm_out_opt = PDM(P = p_zwalm['P_thiessen'].values, 
    EP = ep_zwalm['EP_thiessen'].values,
    t = p_zwalm['Timestamp'].values,
    area = area_zwalm_new, deltat = deltat, deltatout = deltat_out ,
    parameters = pd.DataFrame(best_param_set.to_dict(),index=[0]))
pd_zwalm_out_opt = pd_zwalm_out_opt.set_index('Time')

Check the NSE

In [None]:
nse_full_opt = NSE(
        pd_zwalm_out_opt.loc[start_endofwarmup_p1:end_p2,'qmodm3s'].values,
        Q_day.loc[start_endofwarmup_p1:end_p2,'Value'].values
)
print('Caculated NSE on full set: ' + str(nse_full_opt) )
print('NSE as obtained during the calibration exercise :' + str(pd_perf.loc[index_best_param_set,'NSE_full']) )

In [None]:
pd_perf.head(10)

In [None]:
hvplot.extension('bokeh')
Q_day['Value'].hvplot(alpha = 0.7, label = 'observed', line_width = 1.5) * pd_zwalm_out_initial['qmodm3s'].hvplot(
    alpha = 0.7, frame_width = 900, frame_height = 400,label = 'Modelled: initial',line_dash = 'dashed',line_width = 1.5) * pd_zwalm_out_opt['qmodm3s'].hvplot(
    alpha = 0.7,label = 'Modelled: optimised', line_dash = 'dashed', line_width = 1.5, color = 'green')

Scatter plot

In [None]:
fig, ax = plt.subplots()
ax.scatter(Q_day['Value'],pd_zwalm_out_opt['qmodm3s'], label = 'optimised parameterset',alpha = 0.5)
ax.scatter(Q_day['Value'],pd_zwalm_out_initial['qmodm3s'], label = 'initial parameterset',alpha = 0.5)
ax.legend()

QQ-plot

In [None]:
length = len(pd_zwalm_out_initial)
quantiles = np.linspace(0, 1, length)
obs_quan = np.quantile(Q_day['Value'].dropna(),quantiles)
mod_opt_quan = np.quantile(pd_zwalm_out_opt['qmodm3s'],quantiles)
mod_init_quan = np.quantile(pd_zwalm_out_initial['qmodm3s'],quantiles)
fig, ax = plt.subplots()
ax.plot(obs_quan, mod_opt_quan,marker = 'o',label = 'Optimised')
ax.plot(obs_quan, mod_init_quan, marker = 'o', label = 'Initial')
ax.plot(obs_quan, obs_quan, label = 'Ideal')
ax.legend()
ax.set_xlabel(r'$Q_{obs}$ [m$^3$/s]')
ax.set_ylabel(r'$Q_{mod}$ [m$^3$/s]')
ax.set_title('QQ-plot')

## 20 best NM parametersets

Plotting the 5 best according to validation dataset score

In [None]:
flow_dict = {}
top_nr = 20
for i in range(top_nr):
    index_temp = sorted_score.iloc[i,:].name
    param_set_temp = pd.DataFrame(pd_opt_params.iloc[index_temp,:].to_dict(), index = [0])
    pd_zwalm_out_temp = PDM(P = p_zwalm['P_thiessen'].values, 
            EP = ep_zwalm['EP_thiessen'].values, t = p_zwalm['Timestamp'].values,
            area = area_zwalm_new, deltat = deltat, deltatout = deltat_out ,parameters = param_set_temp
    )
    flow_dict[index_temp] = pd_zwalm_out_temp['qmodm3s']


In [None]:
fig, ax = plt.subplots()
ax.plot(obs_quan, obs_quan, label = 'Ideal')
for key in flow_dict.keys():
    temp_quantile = np.quantile(flow_dict[key],quantiles)
    ax.plot(obs_quan, temp_quantile, linewidth = 1, marker = 'o', markersize = 2)
    ax.legend()
    ax.set_title('The best ' + str(top_nr) + ' parametersets')


All very analogous behaviour

 # PSO calibration

In [None]:
exec_optimisation = False
if exec_optimisation:
    exec_write = True
    exec_read = False
else:
    exec_write = False
    exec_read = True

warnings.filterwarnings(action = 'ignore', category= RuntimeWarning)
warnings.filterwarnings(action = 'ignore', category= UserWarning)

On the lower bound of $q_{const}$:

During early calibration noted that negative $q_{const}$ led to negative flows. 
To prevent this, a quite strict lower boundary of -0.3 m^3/s was set for both NM and PSO calibration.

Experiment: set lower bound to 0 (analogous to previous parameterset of Cabus) to see if this improves high flow performance


In [None]:
zero_qconst = False #set tot true to exectue experiment
if not zero_qconst:
    lower_bound = np.array([160,0,0.1,1,0.9,0.1,0,700,0,1,0,-0.3]) #same bounds as NM
else:
    lower_bound = np.array([160,0,0.1,1,0.9,0.1,0,700,0,1,0,-0])
upper_bound = np.array([5000,300,2,3,40,15,5000,25000,150,1.000000000000001,20,0.03])
bounds = (lower_bound, upper_bound)
print(bounds)

In [None]:
max_iters = 100
n_particles = 70
n_param = max(parameters_initial.shape)
deltat = np.single(1)
deltat_out = np.single(24)
goal_function_NSE = lambda param: -PDM_calibration_wrapper_PSO(
    param, parameters_initial.columns, 'NSE',p_zwalm_p1['P_thiessen'].values,
    ep_zwalm_p1['EP_thiessen'].values, area_zwalm_new, deltat,
    deltat_out, p1_period.values, p1_period_excl_warmup.values, Q_day['Value']
)

Code below copied from the pyswarms documentation (https://pyswarms.readthedocs.io/en/latest/examples/tutorials/options_handler.html#Customizing-ending-options )

In [None]:
from pyswarms.backend.operators import compute_pbest, compute_objective_function
def optimize(objective_func, maxiters, oh_strategy,start_opts, end_opts):
    opt = ps.single.GlobalBestPSO(n_particles=n_particles, dimensions=n_param,
                                   options=start_opts, oh_strategy=oh_strategy, bounds =bounds)

    swarm = opt.swarm
    opt.bh.memory = swarm.position
    opt.vh.memory = swarm.position
    swarm.pbest_cost = np.full(opt.swarm_size[0], np.
    inf)

    for i in range(maxiters):
        # Compute cost for current position and personal best
        swarm.current_cost =  compute_objective_function(swarm, objective_func)
        swarm.pbest_pos, swarm.pbest_cost = compute_pbest(swarm)

        # Set best_cost_yet_found for ftol
        best_cost_yet_found = swarm.best_cost
        swarm.best_pos, swarm.best_cost = opt.top.compute_gbest(swarm)
        # Perform options update
        swarm.options = opt.oh( opt.options, iternow=i, itermax=maxiters, end_opts=end_opts )
        print("Iteration:", i," Options: ", swarm.options)    # print to see variation
        print("Best cost: ", best_cost_yet_found)
        # Perform velocity and position updates
        swarm.velocity = opt.top.compute_velocity(
            swarm, opt.velocity_clamp, opt.vh, opt.bounds
        )
        swarm.position = opt.top.compute_position(
            swarm, opt.bounds, opt.bh
        )
    # Obtain the final best_cost and the final best_position
    final_best_cost = swarm.best_cost.copy()
    final_best_pos = swarm.pbest_pos[
        swarm.pbest_cost.argmin()
    ].copy()
    return final_best_cost, final_best_pos

In [None]:
if exec_optimisation:
    maxiters = 100
    #from the hydrology paper cf. Obsidian
    start_opts = {'c1':2, 'c2':1.3, 'w':0.9}
    end_opts= {'c1':1.3, 'c2':2, 'w':0.4}    
    oh_strategy={ "w":'exp_decay', "c1":'lin_variation',"c2":'lin_variation'}
    cos, pos=optimize(goal_function_NSE, maxiters, oh_strategy, start_opts, end_opts)

In [None]:
if exec_write:
    PSO_opt_param = pd.DataFrame(pos.reshape(1,-1))
    PSO_opt_param.columns = parameters_initial.columns
    if not zero_qconst:
        PSO_opt_param.to_csv('data/Zwalm_PDM_parameters/PSO_adap_main_notebook.csv', index = False)
    else:
        PSO_opt_param.to_csv('data/Zwalm_PDM_parameters/PSO_adap_main_notebook_zero_q_const.csv', index = False)
if exec_read:
    #PSO_opt_param = pd.read_csv('data/Zwalm_PDM_parameters/p1_opt_param_mNSE_PSO_70_particles_qconst_strict.csv')
    #PSO_opt_param = pd.read_csv('data/Zwalm_PDM_parameters/p1_opt_param_NSE_PSO_70_particles_qconst_strict_adap_param_2.csv')
    if not zero_qconst:
        PSO_opt_param = pd.read_csv("data/Zwalm_PDM_parameters/PSO_adap_main_notebook_zero_q_const.csv")
    else:
        PSO_opt_param = pd.read_csv("data/Zwalm_PDM_parameters/PSO_adap_main_notebook.csv")

Check performance of PSO calibrated model

In [None]:
display(PSO_opt_param)
pd_zwalm_out_PSO_opt = PDM(P = p_zwalm['P_thiessen'].values, 
    EP = ep_zwalm['EP_thiessen'].values,
    t = p_zwalm['Timestamp'].values,
    area = area_zwalm_initial, deltat = deltat, deltatout = deltat_out ,
    parameters = PSO_opt_param)
pd_zwalm_out_PSO_opt = pd_zwalm_out_PSO_opt.set_index(['Time'])

nse_cal_PSO = NSE(
    pd_zwalm_out_PSO_opt.loc[start_endofwarmup_p1:end_p1,'qmodm3s'].values,
    Q_day.loc[start_endofwarmup_p1:end_p1,'Value'].values   
)
nse_val_PSO = NSE(
    pd_zwalm_out_PSO_opt.loc[start_p2:end_p2,'qmodm3s'].values,
    Q_day.loc[start_p2:end_p2,'Value'].values   
)
nse_full_PSO = NSE(
    pd_zwalm_out_PSO_opt.loc[start_endofwarmup_p1:end_p2,'qmodm3s'].values,
    Q_day.loc[start_endofwarmup_p1:end_p2,'Value'].values   
)
nse_cal_NM = pd_perf_recalc.loc[index_best_param_set,'NSE_cal']
nse_val_NM = pd_perf_recalc.loc[index_best_param_set,'NSE_val']
nse_full_NM = pd_perf_recalc.loc[index_best_param_set,'NSE_val']
print(f'PSO NSE on calibration data: {nse_cal_PSO} vs. {nse_cal_NM} for NM')
print(f'PSO NSE on validation data: {nse_val_PSO} vs. {nse_val_NM} for NM')
print(f'PSO NSE on full data: {nse_full_PSO} vs. {nse_full_NM} for NM')

- For -0.3 as lower bound: 0.7145301364385379, 0.6784564404022595 and 0.7001747672437036 as NSE for calibration, validation and testing respectively
- For 0 as lower bound: 0.683425753488703, 0.734903955230961 and 0.734903955230961 as NSE for calibration, validation and testing respectively

So -0.3 as lower bound gives better performance and will be used for NM comparison

Note: the `/p1_opt_param_mNSE_PSO_70_particles_qconst_strict.csv` was trained with non adaptive default parameters `{'c1': 0.5, 'c2': 0.3, 'w': 0.9}` on mNSE (not optimised in this notebook, cf temp folder). Has significantly lower NSE, but is a slightly better predictor of high flows. Will not be further considered

# Compare with the PSO calibrated result

Cf. performances above: generally a worse performer!

In [None]:
#PSO_opt_param = pd.read_csv('data/Zwalm_PDM_parameters/p1_opt_param_NSE_PSO_70_particles_qconst_strict_adap_param_2.csv')
#pd.read_csv('data/Zwalm_PDM_parameters/p1_opt_param_mNSE_PSO_70_particles_qconst_strict.csv')
pd_zwalm_out_PSO_opt = PDM(P = p_zwalm['P_thiessen'].values, 
    EP = ep_zwalm['EP_thiessen'].values,
    t = p_zwalm['Timestamp'].values,
    area = area_zwalm_initial, deltat = deltat, deltatout = deltat_out ,
    parameters = PSO_opt_param)
pd_zwalm_out_PSO_opt = pd_zwalm_out_PSO_opt.set_index(['Time'])

In [None]:
Q_day['Value'].hvplot(alpha = 0.7, label = 'observed', line_width = 1.5) * pd_zwalm_out_opt['qmodm3s'].hvplot(
    frame_width = 900, frame_height = 400, alpha = 0.7,label = 'Modelled: NM optimised', line_dash = 'dashed', line_width = 1.5) * pd_zwalm_out_PSO_opt['qmodm3s'].hvplot(
    alpha = 0.7,label = 'Modelled: PSO optimised', line_dash = 'dotted', line_width = 1.5, color ='green')

Extended QQ-plot

In [None]:
mod_opt_PSO_quan = np.quantile(pd_zwalm_out_PSO_opt['qmodm3s'],quantiles)
fig, ax = plt.subplots()
ax.plot(obs_quan, mod_init_quan, marker = 'o', label = 'Initial')
ax.plot(obs_quan, mod_opt_quan,marker = 'o',label = 'NM Optimised')
ax.plot(obs_quan, mod_opt_PSO_quan, marker = 'o', label = 'PSO Optimised')
ax.plot(obs_quan, obs_quan, label = 'Ideal')
ax.legend()
ax.set_xlabel(r'$Q_{obs}$ [m$^3$/s]')
ax.set_ylabel(r'$Q_{mod}$ [m$^3$/s]')
ax.set_title('QQ-plot')

~~PSO calibration is better at predicting the high flows!~~ This was only valid if calibrated with different parameters
~~Note: current implementation if 70 particles on mNSE of calibration~~ 

In [None]:
fig, ax = plt.subplots()
pd_zwalm_out_initial['Cstar'].plot(ax=ax,label='Initial')
pd_zwalm_out_opt['Cstar'].plot(ax=ax,label='Optimal NM')
pd_zwalm_out_PSO_opt['Cstar'].plot(ax=ax, label='Optimal PSO')
ax.legend()
ax.set_ylabel('C* [mm]')

# Save final parameter set

The set with the best performance (based on NSE and mNSE of the validation set) is used

In [None]:
best_param_set

In [None]:
best_param_NM = pd.DataFrame(best_param_set.values.reshape(1,-1), columns = pd_init_params.columns)
best_param_NM.to_csv("data/Zwalm_PDM_parameters/NM_opt_param.csv", index = False)