In [1]:
import numpy as np
import pandas as pd
import importlib

import ipywidgets as widgets
import optuna

In [2]:
import cpagent
import cprender
import cpenvs
import cppid

_ = importlib.reload(cppid)
_ = importlib.reload(cpagent)
_ = importlib.reload(cprender)
_ = importlib.reload(cpenvs)

In [3]:
def objective(trial: optuna.Trial):
    agent = cppid.PidAgent(
        (
            trial.suggest_float("KP_pole", 1e-3, 1e0, log=True),
            trial.suggest_float("KI_pole", 1e-4, 1e0, log=True), 
            trial.suggest_float("KD_pole", 1e-3, 1e1, log=True)
        ), 
        (
            trial.suggest_float("KP_cart", 1e-4, 1e0, log=True),
            # trial.suggest_float("KI_cart", 1e-5, 1e0, log=True), 
            # trial.suggest_float("KD_cart", 1e-5, 1e1, log=True)
            0.0, 
            0.0
        ), 
    )

    df = cpagent.execute_cartpole(agent, env=cpenvs.StandardCartPoleEnv(), num_episodes=5)
    reward = np.sum(0.5 - np.abs(df["cart_pos"]))
    return reward


In [4]:
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=300)

[I 2024-02-24 11:08:39,581] A new study created in memory with name: no-name-e2345d19-59a5-4beb-9962-7095b5d0d48b
[I 2024-02-24 11:08:39,633] Trial 0 finished with value: -73.0197713819552 and parameters: {'KP_pole': 0.280883394425156, 'KI_pole': 0.000805935725121863, 'KD_pole': 4.102937417752145, 'KP_cart': 0.00014254062705161801}. Best is trial 0 with value: -73.0197713819552.
[I 2024-02-24 11:08:39,738] Trial 1 finished with value: 407.1668750188434 and parameters: {'KP_pole': 0.05826764690676928, 'KI_pole': 0.006686164226715573, 'KD_pole': 0.06331399937409167, 'KP_cart': 0.008821247312032796}. Best is trial 1 with value: 407.1668750188434.
[I 2024-02-24 11:08:39,786] Trial 2 finished with value: 51.555114236417474 and parameters: {'KP_pole': 0.0701159922469684, 'KI_pole': 0.0010580142413271574, 'KD_pole': 0.16242512295642172, 'KP_cart': 0.017869409869723615}. Best is trial 1 with value: 407.1668750188434.
[I 2024-02-24 11:08:39,823] Trial 3 finished with value: -197.13449612600354 

In [9]:
trial = study.best_trial

print("  Value: ", trial.value)

print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

  Value:  1225.3633001326089
  Params: 
    KP_pole: 0.4269690262500987
    KI_pole: 0.6819268584283704
    KD_pole: 0.016663340792730685
    KP_cart: 0.2898639337298743


KP_pole: 0.6247963198201079
KI_pole: 0.08105321218590597
KD_pole: 2.1302067923471855
KP_cart: 0.4098958164477601
KI_cart: 0.015858240208657504
KD_cart: 0.03466579096997909

In [10]:
agent = cppid.PidAgent((0.4269690262500987, 0.6819268584283704, 0.016663340792730685), (0.2898639337298743, 0.00, 0.0))
df = cpagent.execute_cartpole(agent, env=cpenvs.StandardCartPoleEnv())
df

Unnamed: 0,ep,t,cart_pos,cart_vel,pole_ang,pole_vel,pos_deviation,reward,cart_pos_setpoint
0,0,0,-0.042162,-0.010219,-0.021819,0.030990,-0.042162,,0.0
1,0,1,-0.042366,-0.205021,-0.021199,0.316710,-0.042366,1.0,0.0
2,0,2,-0.046466,-0.399835,-0.014865,0.602632,-0.046466,1.0,0.0
3,0,3,-0.054463,-0.594746,-0.002812,0.890596,-0.054463,1.0,0.0
4,0,4,-0.066358,-0.789830,0.015000,1.182394,-0.066358,1.0,0.0
...,...,...,...,...,...,...,...,...,...
10015,19,496,0.027895,0.042963,-0.018445,-0.038710,0.027895,1.0,0.0
10016,19,497,0.028755,-0.151889,-0.019219,0.248097,0.028755,1.0,0.0
10017,19,498,0.025717,-0.346731,-0.014257,0.534656,0.025717,1.0,0.0
10018,19,499,0.018782,-0.151412,-0.003564,0.237515,0.018782,1.0,0.0


In [11]:
_ = widgets.interact(
    lambda ep, t: cprender.render_cartpole_state_df(df, t, ep),
    ep=widgets.IntSlider(min=0, max=20, step=1, value=0),
    t=widgets.IntSlider(min=0, max=500, step=1, value=0),
)

interactive(children=(IntSlider(value=0, description='ep', max=20), IntSlider(value=0, description='t', max=50…

In [12]:
EP = 0

fig = cprender.lineplot(df, ep=EP)
fig.show()