In [1]:
import numpy as np
import pandas as pd
import importlib

import ipywidgets as widgets
import optuna

In [2]:
import cpagent
import cprender
import cpenvs
import cppid

_ = importlib.reload(cppid)
_ = importlib.reload(cpagent)
_ = importlib.reload(cprender)
_ = importlib.reload(cpenvs)

In [9]:
def objective(trial: optuna.Trial):
    agent = cppid.PidAgent(
        (
            trial.suggest_float("KP_pole", 1e-3, 1e0, log=True),
            trial.suggest_float("KI_pole", 1e-4, 1e0, log=True), 
            trial.suggest_float("KD_pole", 1e-3, 1e1, log=True)
        ), 
        (
            trial.suggest_float("KP_cart", 1e-4, 1e0, log=True),
            # trial.suggest_float("KI_cart", 1e-5, 1e0, log=True), 
            # trial.suggest_float("KD_cart", 1e-5, 1e1, log=True)
            0.0, 
            0.0
        ), 
    )

    df = cpagent.execute_cartpole(agent, env=cpenvs.StandardCartPoleEnv(), num_episodes=5)
    reward = np.sum(0.5 - np.abs(df["cart_pos"]))
    return reward


In [10]:
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=300)

[I 2024-01-04 16:09:58,288] A new study created in memory with name: no-name-59f35f8c-cddc-4ea2-8c9a-bc45d37a9d7f
[I 2024-01-04 16:09:58,325] Trial 0 finished with value: 46.712527203257196 and parameters: {'KP_pole': 0.0028622059367789396, 'KI_pole': 0.0033231620275421913, 'KD_pole': 0.0010882991611844482, 'KP_cart': 0.000536277881823585}. Best is trial 0 with value: 46.712527203257196.
[I 2024-01-04 16:09:58,345] Trial 1 finished with value: 59.12211215577554 and parameters: {'KP_pole': 0.14372921919873116, 'KI_pole': 0.0031139514233801393, 'KD_pole': 0.010019717473512692, 'KP_cart': 0.06854482640640658}. Best is trial 1 with value: 59.12211215577554.


[I 2024-01-04 16:09:58,394] Trial 2 finished with value: 71.00397439469816 and parameters: {'KP_pole': 0.040681343609988034, 'KI_pole': 0.0018569382996904593, 'KD_pole': 0.0026937194622268755, 'KP_cart': 0.0003903869578975652}. Best is trial 2 with value: 71.00397439469816.
[I 2024-01-04 16:09:58,418] Trial 3 finished with value: 32.8147895923903 and parameters: {'KP_pole': 0.0012647600950263503, 'KI_pole': 0.2243782517197108, 'KD_pole': 0.035394724356658594, 'KP_cart': 0.0001611497181224312}. Best is trial 2 with value: 71.00397439469816.
[I 2024-01-04 16:09:58,523] Trial 4 finished with value: -379.11715592755354 and parameters: {'KP_pole': 0.003282112265828757, 'KI_pole': 0.00015023175463823034, 'KD_pole': 1.1994813771557535, 'KP_cart': 0.0007131709096023901}. Best is trial 2 with value: 71.00397439469816.
[I 2024-01-04 16:09:58,560] Trial 5 finished with value: 69.1663247566903 and parameters: {'KP_pole': 0.003554331010182674, 'KI_pole': 0.00020693591747322955, 'KD_pole': 0.1795660

In [6]:
trial = study.best_trial

print("  Value: ", trial.value)

print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

  Value:  1232.6343768140016
  Params: 
    KP_pole: 0.6247963198201079
    KI_pole: 0.08105321218590597
    KD_pole: 2.1302067923471855
    KP_cart: 0.4098958164477601
    KI_cart: 0.015858240208657504
    KD_cart: 0.03466579096997909


KP_pole: 0.6247963198201079
KI_pole: 0.08105321218590597
KD_pole: 2.1302067923471855
KP_cart: 0.4098958164477601
KI_cart: 0.015858240208657504
KD_cart: 0.03466579096997909

In [7]:
agent = cppid.PidAgent((0.6247963198201079, 0.08105321218590597, 2.1302067923471855), (0.4098958164477601, 0.015858240208657504, 0.03466579096997909))
df = cpagent.execute_cartpole(agent, env=cpenvs.StandardCartPoleEnv())
df

Unnamed: 0,ep,t,cart_pos,cart_vel,pole_ang,pole_vel,pos_deviation,reward,cart_pos_setpoint
0,0,0,0.020191,0.024857,-0.037751,0.028779,0.020191,,0.0
1,0,1,0.020688,-0.169704,-0.037175,0.309316,0.020688,1.0,0.0
2,0,2,0.017294,-0.364277,-0.030989,0.590047,0.017294,1.0,0.0
3,0,3,0.010008,-0.558951,-0.019188,0.872809,0.010008,1.0,0.0
4,0,4,-0.001171,-0.363574,-0.001732,0.574156,-0.001171,1.0,0.0
...,...,...,...,...,...,...,...,...,...
10015,19,496,0.001957,-0.406256,-0.002080,0.608271,0.001957,1.0,0.0
10016,19,497,-0.006168,-0.211105,0.010085,0.314934,-0.006168,1.0,0.0
10017,19,498,-0.010390,-0.016128,0.016384,0.025449,-0.010390,1.0,0.0
10018,19,499,-0.010713,0.178755,0.016893,-0.262020,-0.010713,1.0,0.0


In [None]:
_ = widgets.interact(
    lambda ep, t: cprender.render_cartpole_state_df(df, t, ep),
    ep=widgets.IntSlider(min=0, max=20, step=1, value=0),
    t=widgets.IntSlider(min=0, max=500, step=1, value=0),
)

In [8]:
EP = 0

fig = cprender.lineplot(df, ep=EP)
fig.show()