# Create the environment with different reward functions

In [18]:
import numpy as np
from gym_unbalanced_disk import UnbalancedDisk
import gymnasium as gym
from gymnasium import spaces

class AC_UnbalancedDisk(UnbalancedDisk):
    def __init__(self, umax=3., dt=0.025, render_mode='human'):
        super().__init__(umax=umax, dt=dt, render_mode=render_mode)

        self.target = np.pi
        low = [-np.pi, -40]
        high = [np.pi, 40]
        self.observation_space = spaces.Box(
            low=np.array(low, dtype=np.float32),
            high=np.array(high, dtype=np.float32),
            shape=(2,)
        )

        self.recent_omegas = []

    def step(self, action):
        obs, reward, terminated, truncated, info = super().step(action)

        th = obs[0]
        omega = obs[1]

        # Normalize angle so π maps to 0
        theta = ((th - np.pi) % (2 * np.pi)) - np.pi

        # Update buffer of recent omega values
        self.recent_omegas.append(omega)
        if len(self.recent_omegas) > 10:
            self.recent_omegas.pop(0)

        # Base reward structure
        if abs(theta) < np.pi / 2:
            reward = min(-0.5, -5 + abs(omega))
        elif abs(theta) > np.pi / 2 and abs(theta) < 3 * np.pi / 4:
            reward = abs(theta)**2 / (1 + abs(omega))**1
        elif abs(theta) > 3 * np.pi / 4 and abs(theta) < 11 * np.pi / 12:
            reward = abs(theta)**4 / (1 + abs(omega))**2

        
        else:
            reward = abs(theta)**4 / (1 + abs(omega))**2
             

        return obs, reward, terminated, truncated, info

    def reset(self, seed=None, options=None):
        obs, info = super().reset()
        self.recent_omegas = []  # Reset history
        return obs, info

class AC_UnbalancedDisk1(UnbalancedDisk):
    def __init__(self, umax=3., dt=0.025, render_mode='human', randomize_friction=True):
        super().__init__(umax=umax, dt=dt, render_mode=render_mode)

        self.target = np.pi
        self.randomize_friction = randomize_friction  

        low = [-np.pi, -40]
        high = [np.pi, 40]
        self.observation_space = spaces.Box(
            low=np.array(low, dtype=np.float32),
            high=np.array(high, dtype=np.float32),
            shape=(2,)
        )

        self.recent_omegas = []

    def step(self, action):
        # Sample a new random friction multiplier every step if enabled
        friction_scale = np.random.uniform(0.6, 1.5) if self.randomize_friction else 1.0

        # Temporarily patch gamma and Fc
        original_gamma = self.gamma
        original_Fc = self.Fc
        self.gamma *= friction_scale
        self.Fc *= friction_scale

        # Do the physics step using modified friction
        obs, reward, terminated, truncated, info = super().step(action)

        # Restore friction coefficients
        self.gamma = original_gamma
        self.Fc = original_Fc

        # Reward logic
        th = obs[0]
        omega = obs[1]
        theta = ((th - np.pi) % (2 * np.pi)) - np.pi
        # self.recent_omegas.append(omega)

        theta_abs = np.abs(((th + np.pi) % (2 * np.pi)) - np.pi)  # shortest distance to target (π)
        omega_abs = np.abs(omega)

        # Region 1: θ < 0.5π
        if theta_abs < 0.5 * np.pi:
            if omega_abs < 5:
                reward = np.maximum(-omega_abs**2, -0.5) - np.abs(2 * np.sin(0.5 * (np.pi - np.pi * (omega_abs) / 5)))
            else:
                reward = np.maximum(-omega_abs**2, -0.5)

        # Region 2: 0.5π ≤ θ < 0.75π
        elif 0.5 * np.pi <= theta_abs < 0.75 * np.pi:
            reward = 2 * (-np.cos(theta_abs)) / (0.01 + omega_abs)

        # Region 3: 0.75π ≤ θ ≤ π
        elif 0.75 * np.pi <= theta_abs <= np.pi:
            reward = (-np.cos(theta_abs - np.pi / 4))**0.5 / (0.01 + omega_abs**2)

        
        # Normalize theta around zero (distance from target)
        # theta_abs = np.abs(((th + np.pi) % (2 * np.pi)) - np.pi)  # shortest distance to target (π)
        # omega_abs = np.abs(omega)

        # # Region 1: θ < 0.5π
        # if theta_abs < 0.5 * np.pi:
        #     if omega_abs < 5:
        #         reward = np.maximum(-omega_abs**2, -0.5) - np.abs(2 * np.sin(0.5 * (np.pi - np.pi * (omega_abs) / 5)))
        #     else:
        #         reward = np.maximum(-omega_abs**2, -0.5)

        # # Region 2: 0.5π ≤ θ < 0.75π
        # elif 0.5 * np.pi <= theta_abs < 0.75 * np.pi:
        #     reward = 2 * (-np.cos(theta_abs)) / (0.01 + omega_abs)

        # # Region 3: 0.75π ≤ θ ≤ π
        # elif 0.75 * np.pi <= theta_abs <= np.pi:
        #     reward = (-np.cos(theta_abs - np.pi / 2))**0.5 / (0.0001 + omega_abs**1.5)
        
        return obs, reward, terminated, truncated, info


    def reset(self, seed=None, options=None):
        obs, info = super().reset()
        self.recent_omegas = []
        return obs, info

        
class AC_UnbalancedDisk5(UnbalancedDisk):
    def __init__(self, umax=3., dt=0.025, render_mode='human', randomize_friction=True):
        super().__init__(umax=umax, dt=dt, render_mode=render_mode)

        self.target = np.pi
        self.randomize_friction = randomize_friction

        low = [-np.pi, -40]
        high = [np.pi, 40]
        self.observation_space = spaces.Box(
            low=np.array(low, dtype=np.float32),
            high=np.array(high, dtype=np.float32),
            shape=(2,)
        )

        self.recent_omegas = []

    def step(self, action):
        # Sample a new random friction multiplier every step if enabled
        friction_scale = np.random.uniform(0.6, 1.5) if self.randomize_friction else 1.0

        # Temporarily patch gamma and Fc
        original_gamma = self.gamma
        original_Fc = self.Fc
        self.gamma *= friction_scale
        self.Fc *= friction_scale

        # Perform physics step
        obs, reward, terminated, truncated, info = super().step(action)

        # Restore original friction
        self.gamma = original_gamma
        self.Fc = original_Fc

        # Extract angle and angular velocity
        th = obs[0]
        omega = obs[1]

        # Normalize angle around π (i.e., make π → 0)
        theta = ((th - np.pi) % (2 * np.pi)) - np.pi
        theta_abs = abs(theta)
        omega_abs = abs(omega)

        # Apply reward based on region
        if theta_abs <= 0.5 * np.pi:
            reward = min(-1, -(np.pi) - 1 + abs(omega))
        elif theta_abs <= 0.75 * np.pi:
            reward = (-np.cos(theta_abs)) / (0.1 + omega_abs)
        else:
            reward = 5*(-np.cos(theta_abs))**0.5 / (0.1 + omega_abs)
            # Add anti-stall penalty
            # Stall detection: angular velocity near zero for several steps
            

        return obs, reward, terminated, truncated, info

        # if len(self.recent_omegas) > 10:
        #     self.recent_omegas.pop(0)

        # if abs(theta) < np.pi / 2:
        #     reward = min(-1, -np.pi -1 + abs(omega))
        # elif abs(theta) > np.pi / 2 and abs(theta) < 3 * np.pi / 4:
        #     reward = abs(theta)**2 / (1 + abs(omega))
        # elif abs(theta) > 3 * np.pi / 4 and abs(theta) < 11 * np.pi / 12:
        #     reward = abs(theta)**4 / (1 + abs(omega))**2
        #     if all(abs(w) < 0.05 for w in self.recent_omegas):
        #         reward = 1 / (1 + abs(omega))
        # else:
        #     reward = abs(theta)**4 / (1 + abs(omega))**2


    def reset(self, seed=None, options=None):
        obs, info = super().reset()
        self.recent_omegas = []
        return obs, info


class DQN_UnbalancedDisk(AC_UnbalancedDisk5):
    def __init__(self, umax=3., dt=0.025, n_actions=10, randomize_friction=True, render_mode='human'):
        super().__init__(umax=umax, dt=dt, randomize_friction=randomize_friction, render_mode=render_mode)


        self.actions = np.linspace(-umax, umax, n_actions)

        # Override action space to Discrete
        self.action_space = spaces.Discrete(n_actions)

    def step(self, action):
        idx = int(np.argmin(np.abs(self.actions - action)))
        obs, reward, terminated, truncated, info = super().step(self.actions[idx])

        return obs, reward, terminated, truncated, info

# Train one DQN for the 1st Reward Function

In [2]:
import optuna
from optuna.pruners import MedianPruner
from optuna.samplers import TPESampler
import torch
import torch.nn as nn
import numpy as np
import random

from stable_baselines3 import DQN
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.callbacks import EvalCallback, StopTrainingOnMaxEpisodes, CallbackList
from gymnasium.wrappers import TimeLimit

n_actions = 10 

env = Monitor(TimeLimit(DQN_UnbalancedDisk(n_actions=n_actions,randomize_friction=True), max_episode_steps=500))
# Evaluation env (no random friction)
eval_env = Monitor(TimeLimit(DQN_UnbalancedDisk(n_actions=n_actions,randomize_friction=False), max_episode_steps=500))

learning_rate = 1e-3
target_update_interval = 1_000
policy_kwargs = dict(
        net_arch=[256,256,256],
        activation_fn=nn.ReLU
    )

# Callbacks
stop_cb = StopTrainingOnMaxEpisodes(max_episodes=100, verbose=1)
eval_cb = EvalCallback(
    eval_env,
    best_model_save_path=f"./best_dqn_christos/",
    log_path=None,
    eval_freq=5000,
    deterministic=True,
    render=False,
)
callback = CallbackList([stop_cb, eval_cb])

# Model
model = DQN(
    "MlpPolicy",
    env,
    learning_rate=learning_rate,
    #exploration_fraction=exploration_fraction,
    #exploration_final_eps=exploration_final_eps,
    target_update_interval=target_update_interval,
    gamma=0.95,
    batch_size=256,
    policy_kwargs=policy_kwargs,
    verbose=1,
    seed=42,
)

model.learn(total_timesteps=500_000, callback=callback)

Using cuda device
Wrapping the env in a DummyVecEnv.
-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 500       |
|    ep_rew_mean      | -1.05e+03 |
|    exploration_rate | 0.962     |
| time/               |           |
|    episodes         | 4         |
|    fps              | 1149      |
|    time_elapsed     | 1         |
|    total_timesteps  | 2000      |
| train/              |           |
|    learning_rate    | 0.001     |
|    loss             | 0.00525   |
|    n_updates        | 474       |
-----------------------------------
-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 500       |
|    ep_rew_mean      | -1.05e+03 |
|    exploration_rate | 0.924     |
| time/               |           |
|    episodes         | 8         |
|    fps              | 1208      |
|    time_elapsed     | 3         |
|    total_timesteps  | 4000      |
| train/              |           |
|    learni

<stable_baselines3.dqn.dqn.DQN at 0x7012c53b16c0>

In [19]:
env = DQN_UnbalancedDisk(randomize_friction=False)
# best trial 11
# Trial 11 finished with value: 1425.5780135999998 and parameters: {'learning_rate': 0.002910910188933282, 'gamma': 0.9629183426032542, 'batch_size': 256, 'n_actions': 10, 'net_arch_style': 'medium', 'activation_fn': 'relu'}
#model = DQN.load('optuna_dqn_trials/optuna_best_model_trial_21/best_model.zip')
obs, _ = env.reset()
for i in range(5000):
    action, _states = model.predict(obs)  # policy
    obs, reward, terminated, truncated, info = env.step(action)
    env.render()
    t = (obs[0] + np.pi)%(2*np.pi) - np.pi
    print( f'theta = {t: .4f}, omega: {obs[1]: .4f}, action: {action: .4f}')
    if terminated or truncated:
        obs, _ = env.reset()
    
env.close()

theta =  0.0251, omega:  1.9008, action:  4.0000
theta =  0.0926, omega:  3.5873, action:  6.0000
theta =  0.2010, omega:  4.9410, action:  6.0000
theta =  0.3353, omega:  5.8723, action:  6.0000
theta =  0.4901, omega:  6.3365, action:  6.0000
theta =  0.6491, omega:  6.3497, action:  7.0000
theta =  0.8046, omega:  5.9633, action:  7.0000
theta =  0.9458, omega:  5.2663, action:  7.0000
theta =  1.0667, omega:  4.3469, action:  3.0000
theta =  1.1618, omega:  3.2907, action:  6.0000
theta =  1.2298, omega:  2.1642, action:  6.0000
theta =  1.2705, omega:  1.0170, action:  6.0000
theta =  1.2598, omega: -1.7330, action:  0.0000
theta =  1.1846, omega: -4.2738, action:  0.0000
theta =  1.0482, omega: -6.5997, action:  0.0000
theta =  0.8575, omega: -8.5831, action:  0.0000
theta =  0.6228, omega: -10.0546, action:  0.0000
theta =  0.3627, omega: -10.8358, action:  0.0000
theta =  0.0895, omega: -10.8008, action:  0.0000
theta = -0.1723, omega: -9.9276, action:  0.0000
theta = -0.4014, 

KeyboardInterrupt: 

# Optuna hyperparameter optimization

In [20]:
import optuna
from optuna.pruners import MedianPruner
from optuna.samplers import TPESampler
import torch
import torch.nn as nn
import numpy as np
import random

from stable_baselines3 import DQN
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.callbacks import EvalCallback, StopTrainingOnMaxEpisodes, CallbackList
from gymnasium.wrappers import TimeLimit



# def set_seed(seed):
#     np.random.seed(seed)
#     random.seed(seed)
#     torch.manual_seed(seed)

def objective(trial):
    # set_seed(42)  # Fixed seed for reproducibility

    # Sample hyperparameters
    learning_rate = trial.suggest_float("learning_rate", 1e-4, 1e-2, log=True)
    # ent_coef = trial.suggest_float("ent_coef", 1e-5, 0.1, log=True)
    exploration_fraction = trial.suggest_float("exploration_fraction", 0.1, 0.5, log=True)
    exploration_final_eps = trial.suggest_float("exploration_final_eps", 1e-3, 0.1, log=True)
    gamma = trial.suggest_float("gamma", 0.95, 0.99)
    target_update_interval = trial.suggest_categorical("target_update_interval", [500, 1000, 5000, 10_000])
    batch_size = trial.suggest_categorical("batch_size", [256, 512])
    n_actions = trial.suggest_int("n_actions", 8, 24)
    net_arch = trial.suggest_categorical("net_arch_style", ["medium", "large", "huge"])
    if net_arch == "medium":
        net_arch_cfg = [128, 128]
    elif net_arch == "large":
        net_arch_cfg = [128, 128, 128]
    elif net_arch == "huge":
        net_arch_cfg = [256, 256, 256]


    policy_kwargs = dict(
        net_arch=net_arch_cfg,
        activation_fn=nn.ReLU
    )
    #buffer_size = trial.suggest_categorical("buffer_size", [10_000, 100_000, 500_000])

    # Training env
    env = Monitor(TimeLimit(DQN_UnbalancedDisk(n_actions=n_actions,randomize_friction=True), max_episode_steps=600))

    # Evaluation env (no random friction)
    eval_env = Monitor(TimeLimit(DQN_UnbalancedDisk(n_actions=n_actions,randomize_friction=False), max_episode_steps=600))

    # Callbacks
    stop_cb = StopTrainingOnMaxEpisodes(max_episodes=150, verbose=1)
    eval_cb = EvalCallback(
        eval_env,
        best_model_save_path=f"./optuna_dqn2_trials/optuna_best_model_trial_{trial.number}",
        log_path=None,
        eval_freq=5000,
        deterministic=True,
        render=False,
    )
    callback = CallbackList([stop_cb, eval_cb])

    # Model
    model = DQN(
        "MlpPolicy",
        env,
        learning_rate=learning_rate,
        exploration_fraction=exploration_fraction,
        exploration_final_eps=exploration_final_eps,
        target_update_interval=target_update_interval,
        gamma=gamma,
        batch_size=batch_size,
        policy_kwargs=policy_kwargs,
        verbose=1,
        seed=42,
    )

    model.learn(total_timesteps=1_00_000, callback=callback)

    return eval_cb.best_mean_reward

# Optuna study
study = optuna.create_study(direction="maximize", sampler=TPESampler(), pruner=MedianPruner())
study.optimize(objective, n_trials=20)

# Show best result
print("Best trial value:", study.best_trial.value)
print("Best hyperparameters:", study.best_trial.params)


[I 2025-06-25 03:02:16,386] A new study created in memory with name: no-name-8ed675e3-11c5-4fd4-bda7-c2f4684cff05


Using cuda device
Wrapping the env in a DummyVecEnv.
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 600      |
|    ep_rew_mean      | -1.8e+03 |
|    exploration_rate | 0.949    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 1366     |
|    time_elapsed     | 1        |
|    total_timesteps  | 2400     |
| train/              |          |
|    learning_rate    | 0.000853 |
|    loss             | 0.207    |
|    n_updates        | 574      |
----------------------------------
-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 600       |
|    ep_rew_mean      | -1.78e+03 |
|    exploration_rate | 0.898     |
| time/               |           |
|    episodes         | 8         |
|    fps              | 1341      |
|    time_elapsed     | 3         |
|    total_timesteps  | 4800      |
| train/              |           |
|    learning_rate    | 0.

[I 2025-06-25 03:04:04,553] Trial 0 finished with value: 9311.878546 and parameters: {'learning_rate': 0.0008531201903112037, 'exploration_fraction': 0.46778418743876093, 'exploration_final_eps': 0.005306016110069708, 'gamma': 0.9606172326893972, 'target_update_interval': 500, 'batch_size': 256, 'n_actions': 8, 'net_arch_style': 'medium'}. Best is trial 0 with value: 9311.878546.


Using cuda device
Wrapping the env in a DummyVecEnv.
-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 600       |
|    ep_rew_mean      | -1.81e+03 |
|    exploration_rate | 0.849     |
| time/               |           |
|    episodes         | 4         |
|    fps              | 1208      |
|    time_elapsed     | 1         |
|    total_timesteps  | 2400      |
| train/              |           |
|    learning_rate    | 0.000757  |
|    loss             | 0.00141   |
|    n_updates        | 574       |
-----------------------------------
-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 600       |
|    ep_rew_mean      | -1.56e+03 |
|    exploration_rate | 0.699     |
| time/               |           |
|    episodes         | 8         |
|    fps              | 1229      |
|    time_elapsed     | 3         |
|    total_timesteps  | 4800      |
| train/              |           |
|    learni

[I 2025-06-25 03:05:52,816] Trial 1 finished with value: 3297.8401393999993 and parameters: {'learning_rate': 0.0007572839834961514, 'exploration_fraction': 0.1528418630427323, 'exploration_final_eps': 0.04043118557565367, 'gamma': 0.979833080372951, 'target_update_interval': 10000, 'batch_size': 256, 'n_actions': 22, 'net_arch_style': 'huge'}. Best is trial 0 with value: 9311.878546.


Using cuda device
Wrapping the env in a DummyVecEnv.
-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 600       |
|    ep_rew_mean      | -1.77e+03 |
|    exploration_rate | 0.931     |
| time/               |           |
|    episodes         | 4         |
|    fps              | 1463      |
|    time_elapsed     | 1         |
|    total_timesteps  | 2400      |
| train/              |           |
|    learning_rate    | 0.000321  |
|    loss             | 0.0148    |
|    n_updates        | 574       |
-----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 600      |
|    ep_rew_mean      | -1.7e+03 |
|    exploration_rate | 0.861    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 1377     |
|    time_elapsed     | 3        |
|    total_timesteps  | 4800     |
| train/              |          |
|    learning_rate    

[I 2025-06-25 03:07:27,814] Trial 2 finished with value: -466.873164 and parameters: {'learning_rate': 0.00032108808035123595, 'exploration_fraction': 0.3292000266958512, 'exploration_final_eps': 0.04748099669485608, 'gamma': 0.9704859371227581, 'target_update_interval': 10000, 'batch_size': 512, 'n_actions': 8, 'net_arch_style': 'medium'}. Best is trial 0 with value: 9311.878546.


Using cuda device
Wrapping the env in a DummyVecEnv.
-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 600       |
|    ep_rew_mean      | -1.84e+03 |
|    exploration_rate | 0.888     |
| time/               |           |
|    episodes         | 4         |
|    fps              | 1272      |
|    time_elapsed     | 1         |
|    total_timesteps  | 2400      |
| train/              |           |
|    learning_rate    | 0.000243  |
|    loss             | 0.19      |
|    n_updates        | 574       |
-----------------------------------
-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 600       |
|    ep_rew_mean      | -1.71e+03 |
|    exploration_rate | 0.776     |
| time/               |           |
|    episodes         | 8         |
|    fps              | 1319      |
|    time_elapsed     | 3         |
|    total_timesteps  | 4800      |
| train/              |           |
|    learni

[I 2025-06-25 03:09:10,291] Trial 3 finished with value: 1451.8645513999998 and parameters: {'learning_rate': 0.00024275674391600188, 'exploration_fraction': 0.21259207073712108, 'exploration_final_eps': 0.006375113812586001, 'gamma': 0.9579069196138921, 'target_update_interval': 500, 'batch_size': 512, 'n_actions': 20, 'net_arch_style': 'medium'}. Best is trial 0 with value: 9311.878546.


Using cuda device
Wrapping the env in a DummyVecEnv.
-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 600       |
|    ep_rew_mean      | -1.71e+03 |
|    exploration_rate | 0.91      |
| time/               |           |
|    episodes         | 4         |
|    fps              | 1265      |
|    time_elapsed     | 1         |
|    total_timesteps  | 2400      |
| train/              |           |
|    learning_rate    | 0.000854  |
|    loss             | 0.00235   |
|    n_updates        | 574       |
-----------------------------------
-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 600       |
|    ep_rew_mean      | -1.56e+03 |
|    exploration_rate | 0.82      |
| time/               |           |
|    episodes         | 8         |
|    fps              | 1231      |
|    time_elapsed     | 3         |
|    total_timesteps  | 4800      |
| train/              |           |
|    learni

[I 2025-06-25 03:10:59,891] Trial 4 finished with value: -207.23530879999998 and parameters: {'learning_rate': 0.0008535044676979867, 'exploration_fraction': 0.2623474295624548, 'exploration_final_eps': 0.015239257752978016, 'gamma': 0.9629114703312606, 'target_update_interval': 5000, 'batch_size': 512, 'n_actions': 14, 'net_arch_style': 'large'}. Best is trial 0 with value: 9311.878546.


Using cuda device
Wrapping the env in a DummyVecEnv.
-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 600       |
|    ep_rew_mean      | -1.81e+03 |
|    exploration_rate | 0.937     |
| time/               |           |
|    episodes         | 4         |
|    fps              | 1411      |
|    time_elapsed     | 1         |
|    total_timesteps  | 2400      |
| train/              |           |
|    learning_rate    | 0.00685   |
|    loss             | 0.0109    |
|    n_updates        | 574       |
-----------------------------------
-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 600       |
|    ep_rew_mean      | -1.75e+03 |
|    exploration_rate | 0.874     |
| time/               |           |
|    episodes         | 8         |
|    fps              | 1415      |
|    time_elapsed     | 3         |
|    total_timesteps  | 4800      |
| train/              |           |
|    learni

[I 2025-06-25 03:13:15,975] Trial 5 finished with value: 19683.4258638 and parameters: {'learning_rate': 0.006854834012003672, 'exploration_fraction': 0.37781539370692047, 'exploration_final_eps': 0.00898464888128019, 'gamma': 0.9869285458492697, 'target_update_interval': 1000, 'batch_size': 512, 'n_actions': 13, 'net_arch_style': 'medium'}. Best is trial 5 with value: 19683.4258638.


Using cuda device
Wrapping the env in a DummyVecEnv.
-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 600       |
|    ep_rew_mean      | -1.52e+03 |
|    exploration_rate | 0.831     |
| time/               |           |
|    episodes         | 4         |
|    fps              | 1404      |
|    time_elapsed     | 1         |
|    total_timesteps  | 2400      |
| train/              |           |
|    learning_rate    | 0.00543   |
|    loss             | 0.0129    |
|    n_updates        | 574       |
-----------------------------------
-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 600       |
|    ep_rew_mean      | -1.39e+03 |
|    exploration_rate | 0.661     |
| time/               |           |
|    episodes         | 8         |
|    fps              | 1362      |
|    time_elapsed     | 3         |
|    total_timesteps  | 4800      |
| train/              |           |
|    learni

[I 2025-06-25 03:16:07,025] Trial 6 finished with value: 25282.755853 and parameters: {'learning_rate': 0.005429531577496494, 'exploration_fraction': 0.13874049817687506, 'exploration_final_eps': 0.020846582329240512, 'gamma': 0.967470364234846, 'target_update_interval': 500, 'batch_size': 256, 'n_actions': 9, 'net_arch_style': 'large'}. Best is trial 6 with value: 25282.755853.


Using cuda device
Wrapping the env in a DummyVecEnv.
-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 600       |
|    ep_rew_mean      | -1.94e+03 |
|    exploration_rate | 0.932     |
| time/               |           |
|    episodes         | 4         |
|    fps              | 1212      |
|    time_elapsed     | 1         |
|    total_timesteps  | 2400      |
| train/              |           |
|    learning_rate    | 0.0079    |
|    loss             | 0.00961   |
|    n_updates        | 574       |
-----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 600      |
|    ep_rew_mean      | -1.9e+03 |
|    exploration_rate | 0.864    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 1221     |
|    time_elapsed     | 3        |
|    total_timesteps  | 4800     |
| train/              |          |
|    learning_rate    

[I 2025-06-25 03:18:23,441] Trial 7 finished with value: -200.63647659999998 and parameters: {'learning_rate': 0.007896176318298666, 'exploration_fraction': 0.3514758903876649, 'exploration_final_eps': 0.0022551637177505917, 'gamma': 0.9859398067065549, 'target_update_interval': 500, 'batch_size': 256, 'n_actions': 24, 'net_arch_style': 'large'}. Best is trial 6 with value: 25282.755853.


Using cuda device
Wrapping the env in a DummyVecEnv.
-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 600       |
|    ep_rew_mean      | -1.71e+03 |
|    exploration_rate | 0.875     |
| time/               |           |
|    episodes         | 4         |
|    fps              | 514       |
|    time_elapsed     | 4         |
|    total_timesteps  | 2400      |
| train/              |           |
|    learning_rate    | 0.00376   |
|    loss             | 0.0366    |
|    n_updates        | 574       |
-----------------------------------
-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 600       |
|    ep_rew_mean      | -1.59e+03 |
|    exploration_rate | 0.749     |
| time/               |           |
|    episodes         | 8         |
|    fps              | 504       |
|    time_elapsed     | 9         |
|    total_timesteps  | 4800      |
| train/              |           |
|    learni

[I 2025-06-25 03:23:10,773] Trial 8 finished with value: 3931.1481324 and parameters: {'learning_rate': 0.0037573556403088905, 'exploration_fraction': 0.188985029537983, 'exploration_final_eps': 0.012889221148000077, 'gamma': 0.9611058881129871, 'target_update_interval': 500, 'batch_size': 512, 'n_actions': 12, 'net_arch_style': 'medium'}. Best is trial 6 with value: 25282.755853.


Using cuda device
Wrapping the env in a DummyVecEnv.
-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 600       |
|    ep_rew_mean      | -1.79e+03 |
|    exploration_rate | 0.922     |
| time/               |           |
|    episodes         | 4         |
|    fps              | 439       |
|    time_elapsed     | 5         |
|    total_timesteps  | 2400      |
| train/              |           |
|    learning_rate    | 0.00231   |
|    loss             | 0.0308    |
|    n_updates        | 574       |
-----------------------------------
-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 600       |
|    ep_rew_mean      | -1.74e+03 |
|    exploration_rate | 0.844     |
| time/               |           |
|    episodes         | 8         |
|    fps              | 466       |
|    time_elapsed     | 10        |
|    total_timesteps  | 4800      |
| train/              |           |
|    learni

[I 2025-06-25 03:26:56,316] Trial 9 finished with value: -226.48711599999996 and parameters: {'learning_rate': 0.002305931970239223, 'exploration_fraction': 0.2916275097319186, 'exploration_final_eps': 0.05208479810922657, 'gamma': 0.9873156890266516, 'target_update_interval': 1000, 'batch_size': 512, 'n_actions': 8, 'net_arch_style': 'medium'}. Best is trial 6 with value: 25282.755853.


Using cuda device
Wrapping the env in a DummyVecEnv.
-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 600       |
|    ep_rew_mean      | -1.47e+03 |
|    exploration_rate | 0.783     |
| time/               |           |
|    episodes         | 4         |
|    fps              | 698       |
|    time_elapsed     | 3         |
|    total_timesteps  | 2400      |
| train/              |           |
|    learning_rate    | 0.00237   |
|    loss             | 0.00182   |
|    n_updates        | 574       |
-----------------------------------
-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 600       |
|    ep_rew_mean      | -1.19e+03 |
|    exploration_rate | 0.566     |
| time/               |           |
|    episodes         | 8         |
|    fps              | 588       |
|    time_elapsed     | 8         |
|    total_timesteps  | 4800      |
| train/              |           |
|    learni

[I 2025-06-25 03:31:27,467] Trial 10 finished with value: 7949.219603 and parameters: {'learning_rate': 0.002365370045814019, 'exploration_fraction': 0.11050356516065298, 'exploration_final_eps': 0.001359060904245945, 'gamma': 0.9508628348834633, 'target_update_interval': 5000, 'batch_size': 256, 'n_actions': 18, 'net_arch_style': 'large'}. Best is trial 6 with value: 25282.755853.


Using cuda device
Wrapping the env in a DummyVecEnv.
-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 600       |
|    ep_rew_mean      | -1.51e+03 |
|    exploration_rate | 0.768     |
| time/               |           |
|    episodes         | 4         |
|    fps              | 502       |
|    time_elapsed     | 4         |
|    total_timesteps  | 2400      |
| train/              |           |
|    learning_rate    | 0.00985   |
|    loss             | 0.0115    |
|    n_updates        | 574       |
-----------------------------------
-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 600       |
|    ep_rew_mean      | -1.33e+03 |
|    exploration_rate | 0.536     |
| time/               |           |
|    episodes         | 8         |
|    fps              | 450       |
|    time_elapsed     | 10        |
|    total_timesteps  | 4800      |
| train/              |           |
|    learni

[I 2025-06-25 03:35:42,729] Trial 11 finished with value: 20820.575138599997 and parameters: {'learning_rate': 0.009854361140511659, 'exploration_fraction': 0.10130018151633134, 'exploration_final_eps': 0.021272417165644746, 'gamma': 0.9733341620058231, 'target_update_interval': 1000, 'batch_size': 256, 'n_actions': 12, 'net_arch_style': 'huge'}. Best is trial 6 with value: 25282.755853.


New best mean reward!
Using cuda device
Wrapping the env in a DummyVecEnv.
-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 600       |
|    ep_rew_mean      | -1.49e+03 |
|    exploration_rate | 0.77      |
| time/               |           |
|    episodes         | 4         |
|    fps              | 589       |
|    time_elapsed     | 4         |
|    total_timesteps  | 2400      |
| train/              |           |
|    learning_rate    | 0.00978   |
|    loss             | 0.0101    |
|    n_updates        | 574       |
-----------------------------------
-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 600       |
|    ep_rew_mean      | -1.31e+03 |
|    exploration_rate | 0.541     |
| time/               |           |
|    episodes         | 8         |
|    fps              | 607       |
|    time_elapsed     | 7         |
|    total_timesteps  | 4800      |
| train/              |  

[I 2025-06-25 03:41:10,754] Trial 12 finished with value: 20537.574299599997 and parameters: {'learning_rate': 0.009775793250269145, 'exploration_fraction': 0.10196235002044642, 'exploration_final_eps': 0.02429503092394037, 'gamma': 0.9741488951223726, 'target_update_interval': 1000, 'batch_size': 256, 'n_actions': 11, 'net_arch_style': 'huge'}. Best is trial 6 with value: 25282.755853.


Using cuda device
Wrapping the env in a DummyVecEnv.
-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 600       |
|    ep_rew_mean      | -1.63e+03 |
|    exploration_rate | 0.844     |
| time/               |           |
|    episodes         | 4         |
|    fps              | 622       |
|    time_elapsed     | 3         |
|    total_timesteps  | 2400      |
| train/              |           |
|    learning_rate    | 0.004     |
|    loss             | 0.0123    |
|    n_updates        | 574       |
-----------------------------------
-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 600       |
|    ep_rew_mean      | -1.44e+03 |
|    exploration_rate | 0.689     |
| time/               |           |
|    episodes         | 8         |
|    fps              | 603       |
|    time_elapsed     | 7         |
|    total_timesteps  | 4800      |
| train/              |           |
|    learni

[I 2025-06-25 03:46:51,161] Trial 13 finished with value: 24649.675007 and parameters: {'learning_rate': 0.004004079402118777, 'exploration_fraction': 0.1410198900120414, 'exploration_final_eps': 0.08562996975539182, 'gamma': 0.9764055084975232, 'target_update_interval': 1000, 'batch_size': 256, 'n_actions': 16, 'net_arch_style': 'huge'}. Best is trial 6 with value: 25282.755853.


Using cuda device
Wrapping the env in a DummyVecEnv.
-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 600       |
|    ep_rew_mean      | -1.66e+03 |
|    exploration_rate | 0.849     |
| time/               |           |
|    episodes         | 4         |
|    fps              | 673       |
|    time_elapsed     | 3         |
|    total_timesteps  | 2400      |
| train/              |           |
|    learning_rate    | 0.00364   |
|    loss             | 0.0115    |
|    n_updates        | 574       |
-----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 600      |
|    ep_rew_mean      | -1.5e+03 |
|    exploration_rate | 0.699    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 661      |
|    time_elapsed     | 7        |
|    total_timesteps  | 4800     |
| train/              |          |
|    learning_rate    

[I 2025-06-25 03:51:38,930] Trial 14 finished with value: 24245.1628296 and parameters: {'learning_rate': 0.0036425595833122475, 'exploration_fraction': 0.14399982252715515, 'exploration_final_eps': 0.09612154766675504, 'gamma': 0.9782548759043739, 'target_update_interval': 500, 'batch_size': 256, 'n_actions': 16, 'net_arch_style': 'huge'}. Best is trial 6 with value: 25282.755853.


Using cuda device
Wrapping the env in a DummyVecEnv.
-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 600       |
|    ep_rew_mean      | -1.62e+03 |
|    exploration_rate | 0.846     |
| time/               |           |
|    episodes         | 4         |
|    fps              | 1320      |
|    time_elapsed     | 1         |
|    total_timesteps  | 2400      |
| train/              |           |
|    learning_rate    | 0.00176   |
|    loss             | 0.0174    |
|    n_updates        | 574       |
-----------------------------------
-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 600       |
|    ep_rew_mean      | -1.44e+03 |
|    exploration_rate | 0.691     |
| time/               |           |
|    episodes         | 8         |
|    fps              | 1360      |
|    time_elapsed     | 3         |
|    total_timesteps  | 4800      |
| train/              |           |
|    learni

[I 2025-06-25 03:53:34,407] Trial 15 finished with value: 12.1786286 and parameters: {'learning_rate': 0.0017600377482063336, 'exploration_fraction': 0.14147710530038426, 'exploration_final_eps': 0.09002059853708337, 'gamma': 0.9651312107073359, 'target_update_interval': 1000, 'batch_size': 256, 'n_actions': 16, 'net_arch_style': 'large'}. Best is trial 6 with value: 25282.755853.


Using cuda device
Wrapping the env in a DummyVecEnv.
-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 600       |
|    ep_rew_mean      | -1.76e+03 |
|    exploration_rate | 0.866     |
| time/               |           |
|    episodes         | 4         |
|    fps              | 1236      |
|    time_elapsed     | 1         |
|    total_timesteps  | 2400      |
| train/              |           |
|    learning_rate    | 0.00011   |
|    loss             | 0.00856   |
|    n_updates        | 574       |
-----------------------------------
-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 600       |
|    ep_rew_mean      | -1.52e+03 |
|    exploration_rate | 0.732     |
| time/               |           |
|    episodes         | 8         |
|    fps              | 1279      |
|    time_elapsed     | 3         |
|    total_timesteps  | 4800      |
| train/              |           |
|    learni

[I 2025-06-25 03:55:21,068] Trial 16 finished with value: -235.84519559999998 and parameters: {'learning_rate': 0.00011001099382987985, 'exploration_fraction': 0.17338671031082187, 'exploration_final_eps': 0.03155663062042575, 'gamma': 0.9672522373809624, 'target_update_interval': 10000, 'batch_size': 256, 'n_actions': 19, 'net_arch_style': 'huge'}. Best is trial 6 with value: 25282.755853.


Using cuda device
Wrapping the env in a DummyVecEnv.
-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 600       |
|    ep_rew_mean      | -1.52e+03 |
|    exploration_rate | 0.799     |
| time/               |           |
|    episodes         | 4         |
|    fps              | 1137      |
|    time_elapsed     | 2         |
|    total_timesteps  | 2400      |
| train/              |           |
|    learning_rate    | 0.00475   |
|    loss             | 0.00161   |
|    n_updates        | 574       |
-----------------------------------
-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 600       |
|    ep_rew_mean      | -1.33e+03 |
|    exploration_rate | 0.598     |
| time/               |           |
|    episodes         | 8         |
|    fps              | 1235      |
|    time_elapsed     | 3         |
|    total_timesteps  | 4800      |
| train/              |           |
|    learni

[I 2025-06-25 03:57:17,600] Trial 17 finished with value: 7934.3034812 and parameters: {'learning_rate': 0.0047498840765754806, 'exploration_fraction': 0.11900463713171845, 'exploration_final_eps': 0.003488196930060216, 'gamma': 0.9805130596671003, 'target_update_interval': 5000, 'batch_size': 256, 'n_actions': 10, 'net_arch_style': 'large'}. Best is trial 6 with value: 25282.755853.


Using cuda device
Wrapping the env in a DummyVecEnv.
-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 600       |
|    ep_rew_mean      | -1.53e+03 |
|    exploration_rate | 0.82      |
| time/               |           |
|    episodes         | 4         |
|    fps              | 1459      |
|    time_elapsed     | 1         |
|    total_timesteps  | 2400      |
| train/              |           |
|    learning_rate    | 0.00156   |
|    loss             | 0.0114    |
|    n_updates        | 574       |
-----------------------------------
-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 600       |
|    ep_rew_mean      | -1.37e+03 |
|    exploration_rate | 0.639     |
| time/               |           |
|    episodes         | 8         |
|    fps              | 1433      |
|    time_elapsed     | 3         |
|    total_timesteps  | 4800      |
| train/              |           |
|    learni

[I 2025-06-25 04:00:41,877] Trial 18 finished with value: 24468.560498400002 and parameters: {'learning_rate': 0.0015594255051943494, 'exploration_fraction': 0.12555631975036113, 'exploration_final_eps': 0.05626555575554941, 'gamma': 0.9548530525313613, 'target_update_interval': 500, 'batch_size': 256, 'n_actions': 14, 'net_arch_style': 'huge'}. Best is trial 6 with value: 25282.755853.


Using cuda device
Wrapping the env in a DummyVecEnv.
-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 600       |
|    ep_rew_mean      | -1.77e+03 |
|    exploration_rate | 0.897     |
| time/               |           |
|    episodes         | 4         |
|    fps              | 1233      |
|    time_elapsed     | 1         |
|    total_timesteps  | 2400      |
| train/              |           |
|    learning_rate    | 0.00511   |
|    loss             | 0.0129    |
|    n_updates        | 574       |
-----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 600      |
|    ep_rew_mean      | -1.6e+03 |
|    exploration_rate | 0.793    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 1269     |
|    time_elapsed     | 3        |
|    total_timesteps  | 4800     |
| train/              |          |
|    learning_rate    

[I 2025-06-25 04:03:33,046] Trial 19 finished with value: 24281.047806199997 and parameters: {'learning_rate': 0.005105125464393112, 'exploration_fraction': 0.22859800313973272, 'exploration_final_eps': 0.016319978931880516, 'gamma': 0.9746110545342782, 'target_update_interval': 1000, 'batch_size': 256, 'n_actions': 17, 'net_arch_style': 'large'}. Best is trial 6 with value: 25282.755853.


Best trial value: 25282.755853
Best hyperparameters: {'learning_rate': 0.005429531577496494, 'exploration_fraction': 0.13874049817687506, 'exploration_final_eps': 0.020846582329240512, 'gamma': 0.967470364234846, 'target_update_interval': 500, 'batch_size': 256, 'n_actions': 9, 'net_arch_style': 'large'}


In [21]:
env = DQN_UnbalancedDisk(randomize_friction=False)
# best trial 11
# Trial 11 finished with value: 1425.5780135999998 and parameters: {'learning_rate': 0.002910910188933282, 'gamma': 0.9629183426032542, 'batch_size': 256, 'n_actions': 10, 'net_arch_style': 'medium', 'activation_fn': 'relu'}
model = DQN.load('optuna_dqn2_trials/optuna_best_model_trial_6/best_model.zip')
obs, _ = env.reset()
for i in range(5000):
    action, _states = model.predict(obs)  # policy
    obs, reward, terminated, truncated, info = env.step(action)
    env.render()
    t = (obs[0] + np.pi)%(2*np.pi) - np.pi
    print( f'theta = {t: .4f}, omega: {obs[1]: .4f}, action: {action: .4f}')
    if terminated or truncated:
        obs, _ = env.reset()
    
env.close()

theta =  0.0248, omega:  1.9001, action:  7.0000
theta =  0.0932, omega:  3.5875, action:  7.0000
theta =  0.2005, omega:  4.9387, action:  7.0000
theta =  0.3354, omega:  5.8683, action:  7.0000
theta =  0.4902, omega:  6.3334, action:  5.0000
theta =  0.6515, omega:  6.3463, action:  5.0000
theta =  0.8072, omega:  5.9622, action:  6.0000
theta =  0.9444, omega:  5.2645, action:  6.0000
theta =  1.0654, omega:  4.3463, action:  6.0000
theta =  1.1611, omega:  3.2914, action:  6.0000
theta =  1.2294, omega:  2.1637, action:  6.0000
theta =  1.2481, omega: -0.6836, action:  0.0000
theta =  1.2005, omega: -3.2563, action:  0.0000
theta =  1.0866, omega: -5.6508, action:  0.0000
theta =  0.9166, omega: -7.7547, action:  0.0000
theta =  0.7014, omega: -9.4096, action:  0.0000
theta =  0.4511, omega: -10.4468, action:  0.0000
theta =  0.1853, omega: -10.7127, action:  0.0000
theta = -0.0759, omega: -10.1482, action:  0.0000
theta = -0.3158, omega: -8.8129, action:  0.0000
theta = -0.5135, 

In [None]:
from stable_baselines3 import DQN
import torch.nn as nn
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.callbacks import EvalCallback, StopTrainingOnMaxEpisodes
from gymnasium.wrappers import TimeLimit

policy_kwargs = dict(activation_fn=nn.GELU,
                     net_arch=[128, 256, 128])

actions = [10,20, 50]

# Create and wrap your env
env = DQN_UnbalancedDisk(umax=3.0, dt=0.025, n_actions=20)
env = TimeLimit(env, max_episode_steps=500)
env = Monitor(env)

eval_env = DQN_UnbalancedDisk(umax=3.0, dt=0.025, n_actions=20)
eval_env = TimeLimit(eval_env, max_episode_steps=500)
eval_env = Monitor(eval_env)

# Instantiate and train DQN
stop_cb = StopTrainingOnMaxEpisodes(max_episodes=200, verbose=1)

# Save best model based on mean reward
eval_cb = EvalCallback(
    eval_env,
    best_model_save_path="./best_dqn_model",
    log_path="./logs",
    eval_freq=5000,
    deterministic=True,
    render=False
)

# Chain both callbacks
from stable_baselines3.common.callbacks import CallbackList
callback = CallbackList([stop_cb, eval_cb])

# Model
model_dqn = DQN(
    policy='MlpPolicy',
    env=env,
    learning_rate=1e-3,
    verbose=1,
    policy_kwargs=policy_kwargs
)

# Train
model_dqn.learn(
    total_timesteps=1_000_000,
    callback=callback,
)

Using cpu device
Wrapping the env in a DummyVecEnv.
-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 500       |
|    ep_rew_mean      | -2.08e+03 |
|    exploration_rate | 0.981     |
| time/               |           |
|    episodes         | 4         |
|    fps              | 244       |
|    time_elapsed     | 8         |
|    total_timesteps  | 2000      |
| train/              |           |
|    learning_rate    | 0.001     |
|    loss             | 0.00691   |
|    n_updates        | 474       |
-----------------------------------
-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 500       |
|    ep_rew_mean      | -2.04e+03 |
|    exploration_rate | 0.962     |
| time/               |           |
|    episodes         | 8         |
|    fps              | 284       |
|    time_elapsed     | 14        |
|    total_timesteps  | 4000      |
| train/              |           |
|    learnin

<stable_baselines3.dqn.dqn.DQN at 0x2b5e5c9fcd0>

In [22]:
env = DQN_UnbalancedDisk()
#model = SAC.load('sac_unbalanced_disk_v1', env=env)
obs, _ = env.reset()
for i in range(5000):
    action, _states = model_dqn.predict(obs)  # policy
    obs, reward, terminated, truncated, info = env.step(action)
    env.render()
    t = (obs[0] + np.pi)%(2*np.pi) - np.pi
    print( f'theta = {t: .4f}, omega: {obs[1]: .4f}')
    if terminated or truncated:
        obs, _ = env.reset()
    
env.close()

theta =  0.0235, omega:  1.9021
theta =  0.0920, omega:  3.5932
theta =  0.2013, omega:  4.9465
theta =  0.3363, omega:  5.8763
theta =  0.4932, omega:  6.3420
theta =  0.6502, omega:  6.3529
theta =  0.8045, omega:  5.9685
theta =  0.9447, omega:  5.2712
theta =  1.0663, omega:  4.3506
theta =  1.1402, omega:  1.4606
theta =  1.1418, omega: -1.2047
theta =  1.0816, omega: -3.6230
theta =  0.9665, omega: -5.3699
theta =  0.8310, omega: -5.4438
theta =  0.6982, omega: -5.2263
theta =  0.5719, omega: -4.7044
theta =  0.4664, omega: -3.8872
theta =  0.3575, omega: -4.6456
theta =  0.2370, omega: -5.0423
theta =  0.1103, omega: -5.0387
theta = -0.0121, omega: -4.6408
theta = -0.1194, omega: -3.8952
theta = -0.1817, omega: -1.0493
theta = -0.1726, omega:  1.5729
theta = -0.1044, omega:  3.9022
theta =  0.0163, omega:  5.8521
theta =  0.1598, omega:  5.4583
theta =  0.3102, omega:  6.4755
theta =  0.4778, omega:  6.9765
theta =  0.6546, omega:  6.9776
theta =  0.8241, omega:  6.5447
theta = 