# Tesiting PSO as a calibration function

In [17]:
import os
import numpy as np
import pandas as pd
import geopandas as gpd
from pathlib import Path
import matplotlib.pyplot as plt
from datetime import timedelta, datetime
from dateutil.relativedelta import relativedelta
import hvplot
import hvplot.pandas
import scipy
import winsound 
import warnings
import pyswarms as ps
pad = Path(os.getcwd())
if pad.name != "Python":
    pad_correct = Path("../../Python")
    os.chdir(pad_correct)
from functions.PDM import PDM, PDM_calibration_wrapper_PSO
from functions.performance_metrics import NSE, mNSE

%load_ext autoreload
%autoreload 2

exec_optimisation = True
if exec_optimisation:
    exec_write = True
    exec_read = False
else:
    exec_write = False
    exec_read = True

warnings.filterwarnings(action = 'ignore', category= RuntimeWarning)
warnings.filterwarnings(action = 'ignore', category= UserWarning)
parameters_initial = pd.DataFrame({
    'cmax': 400.60999,
    'cmin':87.67600,
    'b':0.60000,
    'be':3.00000,
    'k1':8.00000,
    'k2':0.70000,
    'kb':5.04660,
    'kg':9000.00000,
    'St': 0.43043,
    'bg':1.00000,
    'tdly':2.00000,
    'qconst':0.00000,
    #'rainfac':0.00000 THIS IS NOT USED!
}, dtype = np.float32, index =[0])
display(parameters_initial)

area_zwalm_initial = np.single(109.2300034)
zwalm_shape = gpd.read_file('data/Zwalm_shape/zwalm_shapefile_emma_31370.shp')
area_zwalm_new = np.single(zwalm_shape.area[0]/10**6)
print('Area of the Zwalm by Cabus: ' + str(area_zwalm_initial) + '[km^2]')
print('Area of the Zwalm by shapefile: ' + str(area_zwalm_new) + '[km^2]')

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


Unnamed: 0,cmax,cmin,b,be,k1,k2,kb,kg,St,bg,tdly,qconst
0,400.609985,87.676003,0.6,3.0,8.0,0.7,5.0466,9000.0,0.43043,1.0,2.0,0.0


Area of the Zwalm by Cabus: 109.23[km^2]
Area of the Zwalm by shapefile: 115.208626[km^2]


In [2]:
preprocess_output_folder = Path('data/Zwalm_data/preprocess_output')
p_zwalm = pd.read_pickle(preprocess_output_folder/'zwalm_p_thiessen.pkl')
ep_zwalm = pd.read_pickle(preprocess_output_folder/'zwalm_ep_thiessen.pkl')
ep_zwalm.loc[ep_zwalm['EP_thiessen'] <0, 'EP_thiessen'] = 0 #ADVICE OF NIKO 21/12/2022
#Temporary fix! 
#ep_zwalm.loc[np.isnan(ep_zwalm['EP_thiessen']),'EP_thiessen'] = 0

pywaterinfo_output_folder = Path("data/Zwalm_data/pywaterinfo_output")
Q_day = pd.read_pickle(pywaterinfo_output_folder/"Q_day.pkl")
Q_day = Q_day.set_index('Timestamp')
warmup_months = 9
start_p1 = p_zwalm['Timestamp'].iloc[0]
start_endofwarmup_p1 = start_p1 + relativedelta(months = warmup_months)
end_p1 =  pd.Timestamp(datetime(year = 2017, month = 12, day = 31, hour = 23))
print('Characteristics of period 1: start = '  + str(start_p1) + ', start of post warmup = ' + str(start_endofwarmup_p1) + ' and end = ' + str(end_p1))

start_p2 = pd.Timestamp(datetime(year = 2018, month= 1, day = 1, hour = 0))
start_endofwarmup_p2 = start_p2 + relativedelta(months = warmup_months)
end_p2 = p_zwalm['Timestamp'].iloc[-1]
print('Characteristics of period 2: start = '  + str(start_p2) + ', start of post warmup = ' + str(start_endofwarmup_p2) + ' and end = ' + str(end_p2))

p1_period_excl_warmup = pd.date_range(start_endofwarmup_p1,end_p1,
freq = 'D') #used for scoring the model 
p1_period = pd.date_range(start_p1, end_p1, freq = 'H')
p2_period_excl_warmup = pd.date_range(start_endofwarmup_p2,end_p2,
freq = 'D') #used for scoring the model 
p2_period = pd.date_range(start_p2, end_p2, freq = 'H')
p_all_nowarmup = pd.date_range(start_endofwarmup_p1, end_p2)
p_all = pd.date_range(start_p1, end_p2)

#now subdivide ep data on p1 and p2
#for ease of selecting data, set time as index!
#select forcings for p1 period
p_zwalm_p1 = p_zwalm.set_index('Timestamp').loc[p1_period]
ep_zwalm_p1 = ep_zwalm.set_index('Timestamp').loc[p1_period]
#select forcings for p2 period
p_zwalm_p2 = p_zwalm.set_index('Timestamp').loc[p2_period]
ep_zwalm_p2 = ep_zwalm.set_index('Timestamp').loc[p2_period]


Characteristics of period 1: start = 2012-01-01 00:00:00, start of post warmup = 2012-10-01 00:00:00 and end = 2017-12-31 23:00:00
Characteristics of period 2: start = 2018-01-01 00:00:00, start of post warmup = 2018-10-01 00:00:00 and end = 2022-11-05 23:00:00


In [8]:
lower_bound = np.array([160,0,0.1,1,0.9,0.1,0,700,0,1,0,-0.03])
#set q const very strict to not allow negative flows! 
upper_bound = np.array([5000,300,2,3,40,15,5000,25000,150,1.000000000000001,20,0.03])
bounds = (lower_bound, upper_bound)
print(bounds)

(array([ 1.6e+02,  0.0e+00,  1.0e-01,  1.0e+00,  9.0e-01,  1.0e-01,
        0.0e+00,  7.0e+02,  0.0e+00,  1.0e+00,  0.0e+00, -3.0e-02]), array([5.0e+03, 3.0e+02, 2.0e+00, 3.0e+00, 4.0e+01, 1.5e+01, 5.0e+03,
       2.5e+04, 1.5e+02, 1.0e+00, 2.0e+01, 3.0e-02]))


https://pyswarms.readthedocs.io/en/latest/api/pyswarms.single.html#module-pyswarms.single.global_best Documentation!

https://www.sciencedirect.com/science/article/pii/S2210650220303710 70 tot 500 vaak goede swarm size

In [9]:
options = {'c1': 0.5, 'c2': 0.3, 'w': 0.9}
n_particles = 70
#optional to add initial positions!
n_param = max(parameters_initial.shape)
init_pos = np.zeros((n_particles,n_param))
for i in range(n_particles):
    init_pos[i,:] = parameters_initial.values * np.random.uniform(0.5,1.5,n_param)
    init_pos[i, init_pos[i,:] < lower_bound] = lower_bound[init_pos[i,:] < lower_bound]
    init_pos[i, init_pos[i,:] > upper_bound] = upper_bound[init_pos[i,:] > upper_bound]
    #random perturbation from Cabus parameters!
optimizer = ps.single.GlobalBestPSO(
    n_particles= n_particles, dimensions = n_param,
    options = options, bounds=bounds,# init_pos=init_pos
)
init_pos

array([[5.88892557e+02, 8.45484858e+01, 8.22580767e-01, 2.72094879e+00,
        1.01747676e+01, 9.72187492e-01, 5.60730406e+00, 1.00087261e+04,
        3.73068465e-01, 1.00000000e+00, 2.21048294e+00, 0.00000000e+00],
       [3.27264247e+02, 1.18347493e+02, 6.39319426e-01, 1.55767892e+00,
        6.84408959e+00, 4.94977088e-01, 5.37209981e+00, 1.06534703e+04,
        2.16589571e-01, 1.00000000e+00, 2.38958657e+00, 0.00000000e+00],
       [2.55703222e+02, 7.77865964e+01, 6.05723140e-01, 3.00000000e+00,
        4.26534598e+00, 9.65293637e-01, 3.15171557e+00, 1.20866060e+04,
        2.16309006e-01, 1.00000000e+00, 1.61719485e+00, 0.00000000e+00],
       [2.53391102e+02, 7.56141830e+01, 7.30015054e-01, 1.59954097e+00,
        7.41320946e+00, 9.97762794e-01, 3.27099775e+00, 1.18506509e+04,
        3.45314492e-01, 1.00000000e+00, 2.88501783e+00, 0.00000000e+00],
       [4.15218028e+02, 5.72273794e+01, 3.74628775e-01, 3.00000000e+00,
        1.04361525e+01, 7.50591026e-01, 4.04277549e+00, 1.25

In [10]:
deltat = np.single(1)
deltat_out = np.single(24)
goal_function_mNSE = lambda param: -PDM_calibration_wrapper_PSO(
    param, parameters_initial.columns, 'mNSE',p_zwalm_p1['P_thiessen'].values,
    ep_zwalm_p1['EP_thiessen'].values, area_zwalm_new, deltat,
    deltat_out, p1_period.values, p1_period_excl_warmup.values, Q_day['Value']
)

In [11]:
parameters_initial

Unnamed: 0,cmax,cmin,b,be,k1,k2,kb,kg,St,bg,tdly,qconst
0,400.609985,87.676003,0.6,3.0,8.0,0.7,5.0466,9000.0,0.43043,1.0,2.0,0.0


In [12]:
if exec_optimisation:
    cos, pos = optimizer.optimize(goal_function_mNSE, iters = 100) 

2022-12-21 19:24:30,222 - pyswarms.single.global_best - INFO - Optimize for 100 iters with {'c1': 0.5, 'c2': 0.3, 'w': 0.9}
pyswarms.single.global_best: 100%|██████████|100/100, best_cost=-.351
2022-12-21 21:00:37,660 - pyswarms.single.global_best - INFO - Optimization finished | best cost: -0.3512290488594202, best pos: [ 6.18098882e+02  2.83419991e+02  6.57736156e-01  2.76003992e+00
  2.37146549e+01  3.18322267e+00  2.75485451e+03  1.41117927e+04
  9.38255593e+01  1.00000000e+00  1.67054804e+00 -2.95873348e-02]


https://stackoverflow.com/questions/59637245/choosing-initial-positions-in-pyswarm-particle-swarm-optimization 

To add an initial position!

In [13]:
import winsound
freq = 440
duration = 1000
winsound.Beep(freq, duration)

In [18]:
if exec_write:
    PSO_opt_param = pd.DataFrame(pos.reshape(1,-1))
    PSO_opt_param.columns = parameters_initial.columns
    PSO_opt_param.to_csv('data/Zwalm_PDM_parameters/p1_opt_param_mNSE_PSO_70_particles_qconst_strict.csv')
if exec_read:
    PSO_opt_param = pd.read_csv('data/Zwalm_PDM_parameters/pall_opt_param_mNSE_PSO.csv')

mnse PSO 70 particles q const strict = vrij goede dataset IMO

In [19]:
pd_zwalm_out_PSO_opt = PDM(P = p_zwalm['P_thiessen'].values, 
    EP = ep_zwalm['EP_thiessen'].values,
    t = p_zwalm['Timestamp'].values,
    area = area_zwalm_initial, deltat = deltat, deltatout = deltat_out ,
    parameters = PSO_opt_param)
pd_zwalm_out_PSO_opt = pd_zwalm_out_PSO_opt.set_index(['Time'])
display(pd_zwalm_out_PSO_opt['qmodm3s'].hvplot(alpha = 0.7, frame_width = 900, 
frame_height = 400, title = 'PSO') * Q_day['Value'].hvplot(alpha = 0.7))
nse_PSO = NSE(
    pd_zwalm_out_PSO_opt.loc[start_endofwarmup_p1:end_p2,'qmodm3s'].values,
    Q_day.loc[start_endofwarmup_p1:end_p2,'Value'].values
)
mnse_PSO = mNSE(
    pd_zwalm_out_PSO_opt.loc[start_endofwarmup_p1:end_p2,'qmodm3s'].values,
    Q_day.loc[start_endofwarmup_p1:end_p2,'Value'].values
)
display(PSO_opt_param)
print(nse_PSO)
print(mnse_PSO)


Unnamed: 0,cmax,cmin,b,be,k1,k2,kb,kg,St,bg,tdly,qconst
0,618.098882,283.419991,0.657736,2.76004,23.714655,3.183223,2754.854513,14111.792722,93.825559,1.0,1.670548,-0.029587


0.6677931607087904
0.5006297109074807


Bemerk: als baseflow goed zit op kalibratieperiode, dan niet op validatieperiode...

## example from site

In [None]:
import numpy as np

# create a parameterized version of the classic Rosenbrock unconstrained optimzation function
def rosenbrock_with_args(x, a, b, c=0):
    f = (a - x[:, 0]) ** 2 + b * (x[:, 1] - x[:, 0] ** 2) ** 2 + c
    import pdb; pdb.set_trace()
    return f

In [None]:
x_max = 10 * np.ones(2)
x_min = -1 * x_max
bounds = (x_min, x_max)
options = {'c1': 0.5, 'c2': 0.3, 'w': 0.9}
optimizer = ps.single.GlobalBestPSO(n_particles=10, dimensions=2, options=options, bounds=bounds)

# now run the optimization, pass a=1 and b=100 as a tuple assigned to args

cost, pos = optimizer.optimize(rosenbrock_with_args, 1000, a=1, b=100, c=0)

2022-12-21 16:40:36,455 - pyswarms.single.global_best - INFO - Optimize for 1000 iters with {'c1': 0.5, 'c2': 0.3, 'w': 0.9}
pyswarms.single.global_best:   0%|          |0/1000

> [1;32mc:\users\olivi\appdata\local\temp\ipykernel_11396\36592133.py[0m(7)[0;36mrosenbrock_with_args[1;34m()[0m

array([3.37209235e+05, 8.94189453e+05, 4.39685346e+03, 3.91787770e+05,
       7.13221558e+05, 6.45045604e+03, 6.18862956e+01, 1.61246027e+03,
       2.66994651e+05, 3.40259695e+03])
(10,)
(10, 2)


pyswarms.single.global_best:   0%|          |1/1000, best_cost=61.9

> [1;32mc:\users\olivi\appdata\local\temp\ipykernel_11396\36592133.py[0m(7)[0;36mrosenbrock_with_args[1;34m()[0m



pyswarms.single.global_best:   0%|          |1/1000, best_cost=61.9
