In [1]:
import sys
import itertools
import math
import numpy as np
import json
import warnings
from datetime import datetime
import matplotlib.pyplot as plt
from scipy.signal import dstep, dlti

import experimental.utils as utils
from experimental.agent import PIDTuningAgent
from experimental.environment import PIDTuningEnvironment
from experimental.runner import Runner
from experimental.runner_opt import Runner_opt

warnings.filterwarnings("ignore")

In [2]:
#Choose testcase and experiment
testcase = 6
experiment = 23

In [23]:
#Open json file

f = open(f'config/testcase_synt_{testcase}.json')
param_dict = json.load(f)

horizon = param_dict['horizon']
n_trials = param_dict['n_trials']
sigma = param_dict['sigma']

n = param_dict['n']
p = param_dict['p']
m = param_dict['m']

A = np.array(param_dict['A'])
b = np.array(param_dict['B'])
c = np.array(param_dict['C'])

#Step signal
y_0 = 1

In [24]:
#Define dictionary for the errors of the algorithms
optimal = "optimal"
pidtuning = "pidtuning"
alg_list = [optimal, pidtuning]
errors = {alg: np.zeros((n_trials, horizon)) for alg in alg_list}

In [25]:
#Define noises
np.random.seed(1)
noise = np.random.normal(0, sigma, (n_trials, horizon, n))
out_noise = np.random.normal(0, sigma, (n_trials, horizon, m))

In [6]:
#Define range of possible PID parameters
log_space = np.logspace(0, 1, num=28, base=10)

K_P_range_start = 0.0
K_P_range_end = 1.8
K_P_range = (log_space - log_space.min()) / (log_space.max() - log_space.min()) *\
      (K_P_range_end - K_P_range_start) + K_P_range_start

K_I_range_start = 0.0
K_I_range_end = 2.3
K_I_range = (log_space - log_space.min()) / (log_space.max() - log_space.min()) *\
      (K_I_range_end - K_I_range_start) + K_I_range_start

K_D_range_start = 0.0
K_D_range_end = 0.8
K_D_range = (log_space - log_space.min()) / (log_space.max() - log_space.min()) *\
      (K_D_range_end - K_D_range_start) + K_D_range_start

In [None]:
#Build list of ammissible PID parameters
pid_actions = []
for K in list(itertools.product(K_P_range, K_I_range, K_D_range)):
    bar_A = utils.compute_bar_a(A, b, c, K)
    if (np.max(np.absolute(np.linalg.eigvals(bar_A))) < 0.4): 
        pid_actions.append(np.array(K).reshape(3,1))

pid_actions = np.array(pid_actions)
n_arms = pid_actions.shape[0]

In [None]:
print(n_arms)
print(np.max(pid_actions[:,0,:]))
print(np.max(pid_actions[:,1,:]))
print(np.max(pid_actions[:,2,:]))

### Regret

In [None]:
#Load data

errors[pidtuning] = np.load(f'.\experiments\experiment_{experiment}\pid_tuning_errors{experiment}.npy', allow_pickle=True)
if (experiment!=1):
    loaded = np.load(f".\experiments\experiment_{experiment}\experiment_{experiment}.npz", allow_pickle=True)
    errors[optimal] = loaded['optimal_errors']
    K_opt = loaded['K_opt']
    K_opt_idx = loaded['K_opt_idx']
    pid_actions = loaded['pid_actions']
else:
    errors[optimal] = np.load(".\experiments\experiment_1\optimal_errors1.npy", allow_pickle=True)
    K_opt = np.load(".\experiments\experiment_1\K_opt_1.npy", allow_pickle=True)
    K_opt_idx = np.load(".\experiments\experiment_1\K_opt_idx_1.npy", allow_pickle=True)

#Print shapes
print(np.shape(errors[pidtuning]))
print(np.shape(errors[optimal]))

#Print best PID action for each simulation
print(K_opt_idx)

#Print max P, I, D parameters contained in pid_actions
print("Max P parameter:", np.max(pid_actions[:,0,:]))
print("Max I parameter:", np.max(pid_actions[:,1,:]))
print("Max D parameter:", np.max(pid_actions[:,2,:]))

In [None]:
#Compute regret for PIDTuning

inst_regret = np.zeros((n_trials, horizon))
cum_regret = np.zeros((n_trials, horizon))

inst_regret =  errors[pidtuning] **2 - errors[optimal][:,0:horizon]**2
for trial_i in range(n_trials):
    cum_regret[trial_i] = np.cumsum(inst_regret[trial_i])
cum_regret_mean = np.mean(cum_regret, axis=0)
cum_regret_std = np.std(cum_regret, axis=0) / np.sqrt(n_trials)

In [None]:
#Plot cumulative mean regret with std deviation
plt.figure(figsize=(8, 6))

x_ticks = np.linspace(0, len(cum_regret_mean), 5, dtype=int)
y_ticks = np.linspace(np.floor(min(cum_regret_mean - cum_regret_std)), 
                      np.ceil(max(cum_regret_mean + cum_regret_std)), 5)
plt.plot(cum_regret_mean, label='Ïƒ = 0.001')
plt.fill_between(range(len(cum_regret_mean)), 
                 cum_regret_mean - cum_regret_std, 
                 cum_regret_mean + cum_regret_std, 
                 color='b', alpha=0.2)


plt.xticks(x_ticks, fontsize=20)
plt.yticks(y_ticks, fontsize=20)
plt.xlabel('Number of rounds', fontsize=20)
plt.ylabel('Cumulative Regret', fontsize=20)
#plt.legend()
plt.grid(True)
#plt.title("Cumulative mean regret with standard deviation")
plt.show()

#Plot all simulations
x_ticks = np.linspace(0, len(cum_regret[0]), 5, dtype=int)
y_ticks = np.linspace(0, np.ceil(np.max(cum_regret)), 5)
plt.figure(figsize=(8, 6))
for i in range(2):
    label = f'Simulation {i+1}'
    plt.plot(cum_regret[i], label=label)
plt.xticks(x_ticks, fontsize=12) 
plt.yticks(y_ticks, fontsize=12)
plt.xlabel('Number of rounds', fontsize=16)
plt.ylabel('Cumulative Regret', fontsize=16)
plt.legend(fontsize=11)
plt.grid(True)
#plt.title("Cumulative regret for each simulation")
plt.show()


In [None]:
#Plot pulled PID actions
pulled_arms = np.load(f".\experiments\experiment_{experiment}\pulled_arms_{experiment}.npy", allow_pickle=True)
pulled_arms = pulled_arms.reshape(n_trials, horizon, 3)
pid_actions_reshaped = pid_actions.reshape(len(pid_actions), 3)

list = np.zeros((n_trials, horizon))
for sim_i in range(n_trials):
    for t in range(horizon):
        match = np.all(pid_actions_reshaped == pulled_arms[sim_i, t], axis=1)
        index = np.where(match)[0]
        list[sim_i, t] = index

sim = 5
plt.figure(figsize=(15, 6))
plt.scatter(np.arange(horizon), list[sim], s=10)
plt.grid(True)
plt.yticks(range(0, len(pid_actions)))
plt.xticks(range(0, horizon, int(horizon/10)))
plt.xlabel('Time Horizon')
plt.ylabel('PID action index')
plt.title(f"Pulled PID actions in simulation {sim}")
plt.show()