# Testing PSO as a calibration function

In [None]:
import os
import numpy as np
import pandas as pd
import geopandas as gpd
from pathlib import Path
import matplotlib.pyplot as plt
from datetime import timedelta, datetime
from dateutil.relativedelta import relativedelta
import hvplot
import hvplot.pandas
import scipy
import winsound 
import warnings
import pyswarms as ps
from pyswarms.utils.plotters import plot_cost_history
pad = Path(os.getcwd())
if pad.name == "model_training_and_calibration":
    pad_correct = pad.parent
    os.chdir(pad_correct)
from functions.PDM import PDM, PDM_calibration_wrapper_PSO
from functions.performance_metrics import NSE, mNSE

%load_ext autoreload
%autoreload 2

exec_optimisation = False
if exec_optimisation:
    exec_write = True
    exec_read = False
else:
    exec_write = False
    exec_read = True

warnings.filterwarnings(action = 'ignore', category= RuntimeWarning)
warnings.filterwarnings(action = 'ignore', category= UserWarning)
parameters_initial = pd.DataFrame({
    'cmax': 400.60999,
    'cmin':87.67600,
    'b':0.60000,
    'be':3.00000,
    'k1':8.00000,
    'k2':0.70000,
    'kb':5.04660,
    'kg':9000.00000,
    'St': 0.43043,
    'bg':1.00000,
    'tdly':2.00000,
    'qconst':0.00000,
    #'rainfac':0.00000 THIS IS NOT USED!
}, dtype = np.float32, index =[0])
display(parameters_initial)

area_zwalm_initial = np.single(109.2300034)
zwalm_shape = gpd.read_file('data/Zwalm_shape/zwalm_shapefile_emma_31370.shp')
area_zwalm_new = np.single(zwalm_shape.area[0]/10**6)
print('Area of the Zwalm by Cabus: ' + str(area_zwalm_initial) + '[km^2]')
print('Area of the Zwalm by shapefile: ' + str(area_zwalm_new) + '[km^2]')

In [None]:
preprocess_output_folder = Path('data/Zwalm_data/preprocess_output')
p_zwalm = pd.read_pickle(preprocess_output_folder/'zwalm_p_thiessen.pkl')
ep_zwalm = pd.read_pickle(preprocess_output_folder/'zwalm_ep_thiessen.pkl')
ep_zwalm.loc[ep_zwalm['EP_thiessen'] <0, 'EP_thiessen'] = 0 #ADVICE OF NIKO 21/12/2022
#Temporary fix! 
#ep_zwalm.loc[np.isnan(ep_zwalm['EP_thiessen']),'EP_thiessen'] = 0

pywaterinfo_output_folder = Path("data/Zwalm_data/pywaterinfo_output")
Q_day = pd.read_pickle(pywaterinfo_output_folder/"Q_day.pkl")
Q_day = Q_day.set_index('Timestamp')
warmup_months = 9
start_p1 = p_zwalm['Timestamp'].iloc[0]
start_endofwarmup_p1 = start_p1 + relativedelta(months = warmup_months)
end_p1 =  pd.Timestamp(datetime(year = 2019, month = 12, day = 31, hour = 23))
print('Characteristics of period 1: start = '  + str(start_p1) + ', start of post warmup = ' + str(start_endofwarmup_p1) + ' and end = ' + str(end_p1))

start_p2 = pd.Timestamp(datetime(year = 2020, month= 1, day = 1, hour = 0))
start_endofwarmup_p2 = start_p2 + relativedelta(months = warmup_months)
end_p2 = p_zwalm['Timestamp'].iloc[-1]
print('Characteristics of period 2: start = '  + str(start_p2) + ', start of post warmup = ' + str(start_endofwarmup_p2) + ' and end = ' + str(end_p2))

p1_period_excl_warmup = pd.date_range(start_endofwarmup_p1,end_p1,
freq = 'D') #used for scoring the model 
p1_period = pd.date_range(start_p1, end_p1, freq = 'H')
p2_period_excl_warmup = pd.date_range(start_endofwarmup_p2,end_p2,
freq = 'D') #used for scoring the model 
p2_period = pd.date_range(start_p2, end_p2, freq = 'H')
p_all_nowarmup = pd.date_range(start_endofwarmup_p1, end_p2)
p_all = pd.date_range(start_p1, end_p2)

#now subdivide ep data on p1 and p2
#for ease of selecting data, set time as index!
#select forcings for p1 period
p_zwalm_p1 = p_zwalm.set_index('Timestamp').loc[p1_period]
ep_zwalm_p1 = ep_zwalm.set_index('Timestamp').loc[p1_period]
#select forcings for p2 period
p_zwalm_p2 = p_zwalm.set_index('Timestamp').loc[p2_period]
ep_zwalm_p2 = ep_zwalm.set_index('Timestamp').loc[p2_period]


Changed calibration period from 2017 to end of 2019 on 06/03/2023

In [None]:
lower_bound = np.array([160,0,0.1,1,0.9,0.1,0,700,0,1,0,-0.03])
#set q const very strict to not allow negative flows! (-0.03 being very strict, -0.3 moderately strict)
upper_bound = np.array([5000,300,2,3,40,15,5000,25000,150,1.000000000000001,20,0.03])
bounds = (lower_bound, upper_bound)
print(bounds)

https://pyswarms.readthedocs.io/en/latest/api/pyswarms.single.html#module-pyswarms.single.global_best Documentation!

https://www.sciencedirect.com/science/article/pii/S2210650220303710 70 tot 500 vaak goede swarm size

In [None]:
#options = {'c1': 0.5, 'c2': 0.3, 'w': 0.9} #the default parameter values from the documentaiton site

#Update 19/03/2023: use adaptive parameters as defined in Obsidian
max_iter = 100
def update_params(iteration):
    w_min = 0.4
    w_max = 0.9
    c_min = 0.5
    c_max = 2.5
    w = w_max + (w_min - w_max)*iteration/max_iter
    c1 = c_max + (c_min - c_max)*iteration/max_iter
    c2 = c_min + (c_min - c_max)*iteration/max_iter
    iter_dict = {'w':w,'c1':c1,'c2':c2}
    print(iter_dict)
    return {'w':w,'c1':c1,'c2':c2}
options = {'c1':2.5,'c2':0.5,'w':0.9,'update_interval':1,'update_function':update_params}
n_particles = 70
#optional to add initial positions!
n_param = max(parameters_initial.shape)
# init_pos = np.zeros((n_particles,n_param))
# for i in range(n_particles):
#     init_pos[i,:] = parameters_initial.values * np.random.uniform(0.5,1.5,n_param)
#     init_pos[i, init_pos[i,:] < lower_bound] = lower_bound[init_pos[i,:] < lower_bound]
#     init_pos[i, init_pos[i,:] > upper_bound] = upper_bound[init_pos[i,:] > upper_bound]
#     #random perturbation from Cabus parameters!
optimizer = ps.single.GlobalBestPSO(
    n_particles= n_particles, dimensions = n_param,
    options = options, bounds=bounds,# init_pos=init_pos
)
#init_pos

In [None]:
deltat = np.single(1)
deltat_out = np.single(24)
goal_function_NSE = lambda param: -PDM_calibration_wrapper_PSO(
    param, parameters_initial.columns, 'NSE',p_zwalm_p1['P_thiessen'].values,
    ep_zwalm_p1['EP_thiessen'].values, area_zwalm_new, deltat,
    deltat_out, p1_period.values, p1_period_excl_warmup.values, Q_day['Value']
)

In [None]:
parameters_initial

Idea copied from the documentation below (20/03/2023): https://pyswarms.readthedocs.io/en/latest/examples/tutorials/options_handler.html#Customizing-ending-options. Will be saved in `p1_opt_param_NSE_PSO_70_particles_qconst_strict_adap_param_3.csv`

In [None]:
from pyswarms.backend.operators import compute_pbest, compute_objective_function
def optimize(objective_func, maxiters, oh_strategy,start_opts, end_opts):
    opt = ps.single.GlobalBestPSO(n_particles=n_particles, dimensions=n_param,
                                   options=start_opts, oh_strategy=oh_strategy, bounds =bounds)

    swarm = opt.swarm
    opt.bh.memory = swarm.position
    opt.vh.memory = swarm.position
    swarm.pbest_cost = np.full(opt.swarm_size[0], np.
    inf)

    for i in range(maxiters):
        # Compute cost for current position and personal best
        swarm.current_cost =  compute_objective_function(swarm, objective_func)
        swarm.pbest_pos, swarm.pbest_cost = compute_pbest(swarm)

        # Set best_cost_yet_found for ftol
        best_cost_yet_found = swarm.best_cost
        swarm.best_pos, swarm.best_cost = opt.top.compute_gbest(swarm)
        # Perform options update
        swarm.options = opt.oh( opt.options, iternow=i, itermax=maxiters, end_opts=end_opts )
        print("Iteration:", i," Options: ", swarm.options)    # print to see variation
        print("Best cost: ", best_cost_yet_found)
        # Perform velocity and position updates
        swarm.velocity = opt.top.compute_velocity(
            swarm, opt.velocity_clamp, opt.vh, opt.bounds
        )
        swarm.position = opt.top.compute_position(
            swarm, opt.bounds, opt.bh
        )
    # Obtain the final best_cost and the final best_position
    final_best_cost = swarm.best_cost.copy()
    final_best_pos = swarm.pbest_pos[
        swarm.pbest_cost.argmin()
    ].copy()
    return final_best_cost, final_best_pos

https://lup.lub.lu.se/luur/download?func=downloadFile&recordOId=9101091&fileOId=9101257

In [None]:
if exec_optimisation:
    maxiters = 100
    start_opts = {'c1':2.5, 'c2':0.5, 'w':0.9}
    end_opts= {'c1':0.5, 'c2':2.5, 'w':0.4}     # Ref:[1]
    oh_strategy={ "w":'exp_decay', "c1":'lin_variation',"c2":'lin_variation'}
    cos, pos=optimize(goal_function_NSE, maxiters, oh_strategy, start_opts, end_opts)

In [None]:
# if exec_optimisation:
#     cos, pos = optimizer.optimize(goal_function_NSE, iters = 100) 

In [None]:
# plot_cost_history(cost_history=optimizer.cost_history)
# plt.show()

https://stackoverflow.com/questions/59637245/choosing-initial-positions-in-pyswarm-particle-swarm-optimization 

To add an initial position!

Add custom loop

In [None]:
parameters_initial

In [None]:
# import winsound
# freq = 440
# duration = 1000
# winsound.Beep(freq, duration)

In [None]:
if exec_write:
    PSO_opt_param = pd.DataFrame(pos.reshape(1,-1))
    PSO_opt_param.columns = parameters_initial.columns
    PSO_opt_param.to_csv('data/Zwalm_PDM_parameters/p1_opt_param_NSE_PSO_70_particles_qconst_strict_adap_param_3.csv', index = False)
if exec_read:
    #PSO_opt_param = pd.read_csv('data/Zwalm_PDM_parameters/pall_opt_param_mNSE_PSO.csv')
    PSO_opt_param = pd.read_csv('data/Zwalm_PDM_parameters/p1_opt_param_NSE_PSO_70_particles_qconst_strict_adap_param_2.csv')

20/03/2023: `p1_opt_param_NSE_PSO_70_particles_qconst_strict_adap_param_2.csv` is een vrij goede dataset

Ook `data/Zwalm_PDM_parameters/p1_opt_param_mNSE_PSO_70_particles_qconst_strict.csv` vrij goed

mnse PSO 70 particles q const strict = vrij goede dataset IMO

In [None]:
#PSO_opt_param = pd.read_csv('data/Zwalm_PDM_parameters/p1_opt_param_mNSE_PSO_70_particles_qconst_strict.csv')
pd_zwalm_out_PSO_opt = PDM(P = p_zwalm['P_thiessen'].values, 
    EP = ep_zwalm['EP_thiessen'].values,
    t = p_zwalm['Timestamp'].values,
    area = area_zwalm_new, deltat = deltat, deltatout = deltat_out ,
    parameters = PSO_opt_param)
pd_zwalm_out_PSO_opt = pd_zwalm_out_PSO_opt.set_index(['Time'])
display( Q_day['Value'].hvplot(alpha = 0.7) * pd_zwalm_out_PSO_opt['qmodm3s'].hvplot(alpha = 0.7, frame_width = 900, 
frame_height = 400, title = 'PSO'))
nse_PSO = NSE(
    pd_zwalm_out_PSO_opt.loc[start_endofwarmup_p1:end_p2,'qmodm3s'].values,
    Q_day.loc[start_endofwarmup_p1:end_p2,'Value'].values
)
mnse_PSO = mNSE(
    pd_zwalm_out_PSO_opt.loc[start_endofwarmup_p1:end_p2,'qmodm3s'].values,
    Q_day.loc[start_endofwarmup_p1:end_p2,'Value'].values
)
display(PSO_opt_param)
print('NSE from end of warmup till end of dataset: ' + str(nse_PSO))
print('mNSE from end of warmup till end of dataset: ' + str(mnse_PSO))


Bemerk: als baseflow goed zit op kalibratieperiode, dan niet op validatieperiode...

## example from site

In [None]:
import numpy as np

# create a parameterized version of the classic Rosenbrock unconstrained optimzation function
def rosenbrock_with_args(x, a, b, c=0):
    f = (a - x[:, 0]) ** 2 + b * (x[:, 1] - x[:, 0] ** 2) ** 2 + c
    return f

In [None]:
x_max = 10 * np.ones(2)
x_min = -1 * x_max
bounds = (x_min, x_max)
options = {'c1': 0.5, 'c2': 0.3, 'w': 0.9}
optimizer = ps.single.GlobalBestPSO(n_particles=10, dimensions=2, options=options, bounds=bounds)

# now run the optimization, pass a=1 and b=100 as a tuple assigned to args

cost, pos = optimizer.optimize(rosenbrock_with_args, 1000, a=1, b=100, c=0)