In [5]:
import gymnasium as gym
from stable_baselines3 import PPO
from config import config
from stable_baselines3.common.utils import set_random_seed
from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.evaluation import evaluate_policy
import optuna
from stable_baselines3.common.callbacks import EvalCallback,BaseCallback
from config2 import config2
env_id = "intersection-v0"#"racetrack-v0"
num_cpu = 4

# env = make_vec_env(env_id, n_envs=num_cpu)
# env = gym.make(env_id, render_mode="rgb_array", config=config
            #    )
# env.unwrapped.configure(config)

In [6]:
config

{'observation': {'type': 'GrayscaleObservation',
  'observation_shape': (64, 64),
  'stack_size': 4,
  'weights': [0.2989, 0.587, 0.114],
  'scaling': 1.75},
 'action': {'type': 'ContinuousAction', 'longitudinal': True, 'lateral': True},
 'duration': 13,
 'destination': 'o1',
 'initial_vehicle_count': 150,
 'spawn_probability': 0.7,
 'screen_width': 600,
 'screen_height': 600,
 'centering_position': [0.5, 0.5],
 'scaling': 7.15,
 'collision_reward': -5,
 'high_speed_reward': 0,
 'arrived_reward': 3,
 'normalize_reward': False,
 'simulation_frequency': 15,
 'policy_frequency': 5,
 'offroad_terminal': False}

In [7]:
model = PPO.load("models/optuna_last_chance_130000.zip")
eval_env = gym.make(env_id, render_mode="rgb_array", config=config)
# eval_env = gym.make("highway-fast-v0", render_mode="rgb_array", config=config)

In [8]:
while True:
  done = truncated = False
  obs, info = eval_env.reset()
  while not (done or truncated):
    action, _states = model.predict(obs, deterministic=True)
    obs, reward, done, truncated, info = eval_env.step(action)
    eval_env.render()

AttributeError: 'NoneType' object has no attribute 'get_image'

In [None]:
class DoneCallback(BaseCallback):
    def __init__(self, check_freq: int, save_freq: int, save_path: str, verbose=1):
        super(DoneCallback, self).__init__(verbose)
        self.check_freq = check_freq
        self.save_freq = save_freq
        self.save_path = save_path
        self.done_count = 0

    def _on_step(self) -> bool:
        if 'done' in self.locals.keys():
            if self.locals['done']:
                self.done_count += 1

        if self.n_calls % self.check_freq == 0:
            print(f"Step: {self.num_timesteps} Done count: {self.done_count}")

        if self.n_calls % self.save_freq == 0:
            self.model.save(self.save_path + str(self.num_timesteps))

        return True

In [3]:
def optimize_ppo(trial):
    """ Learning hyperparameters we want to optimise"""
    return {
        'n_steps': int(trial.suggest_loguniform('n_steps', 32, 2048)),
        'gamma': trial.suggest_categorical('gamma', [0.9, 0.95, 0.98, 0.99, 0.999, 0.9999]),
        'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-2),
        'ent_coef': trial.suggest_loguniform('ent_coef', 0.00000001, 0.1),
        'clip_range': trial.suggest_uniform('clip_range', 0.1, 0.4),
        'n_epochs': int(trial.suggest_loguniform('n_epochs', 1, 10)),
    }

def objective(trial):
    """ Objective function for optimization """
    env = make_vec_env(env_id, n_envs=num_cpu,env_kwargs={"config":config})
    log_dir = "logs"
    model = PPO('CnnPolicy', env, verbose=0,tensorboard_log=log_dir,**optimize_ppo(trial))

    callback = DoneCallback(check_freq=128, save_freq=5000, save_path="./models/hypertuning_")

    # Create an evaluation environment
    eval_env = make_vec_env(env_id, n_envs=1,env_kwargs={"config":config})

    # Create the evaluation callback
    eval_callback = EvalCallback(eval_env, best_model_save_path='./logs/',
                                 log_path='./logs/', eval_freq=500,
                                 deterministic=True, render=False)

    # Train the model, passing both the training and evaluation callbacks
    model.learn(total_timesteps=512*50, callback=[callback, eval_callback], progress_bar=True)

    # Retrieve the best reward
    best_reward = eval_callback.best_mean_reward
    return best_reward

study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=30)

print("Number of finished trials: ", len(study.trials))
print("Best trial:")
trial = study.best_trial

print("  Value: ", trial.value)
print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

[I 2024-04-25 13:44:12,889] Trial 25 finished with value: 27.0 and parameters: {'n_steps': 1087.4953333804424, 'gamma': 0.98, 'learning_rate': 6.598882302687722e-05, 'ent_coef': 0.01786356354334364, 'clip_range': 0.16873577386904987, 'n_epochs': 1.3576674975524343}. Best is trial 17 with value: 42.6.
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=268 and n_envs=4)


Output()

[I 2024-04-25 14:18:29,601] Trial 26 finished with value: 24.2 and parameters: {'n_steps': 268.4160444968466, 'gamma': 0.98, 'learning_rate': 4.2792045832687424e-05, 'ent_coef': 0.00038792901717832427, 'clip_range': 0.2608593226901129, 'n_epochs': 2.3643554800545536}. Best is trial 17 with value: 42.6.
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=1399 and n_envs=4)


Output()

[I 2024-04-25 14:55:05,422] Trial 27 finished with value: 30.6 and parameters: {'n_steps': 1399.2680023410692, 'gamma': 0.9, 'learning_rate': 0.0004844749478513348, 'ent_coef': 0.0037842381393682185, 'clip_range': 0.19211280396774147, 'n_epochs': 1.0024868182827513}. Best is trial 17 with value: 42.6.
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=637 and n_envs=4)


Output()

[I 2024-04-25 15:34:06,611] Trial 28 finished with value: 29.0 and parameters: {'n_steps': 637.6502617322847, 'gamma': 0.9999, 'learning_rate': 0.0001530564457810309, 'ent_coef': 0.02862055317357903, 'clip_range': 0.21957397940070242, 'n_epochs': 1.5580612834160128}. Best is trial 17 with value: 42.6.
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=417 and n_envs=4)


Output()

[I 2024-04-25 16:12:57,361] Trial 29 finished with value: 17.6 and parameters: {'n_steps': 417.10382828176597, 'gamma': 0.99, 'learning_rate': 2.111541073383337e-05, 'ent_coef': 2.827020910421531e-05, 'clip_range': 0.2751309421738334, 'n_epochs': 3.644051417068777}. Best is trial 17 with value: 42.6.


Number of finished trials:  30
Best trial:
  Value:  42.6
  Params: 
    n_steps: 1298.0336974151803
    gamma: 0.98
    learning_rate: 0.0001291858786170927
    ent_coef: 0.0016824274086785702
    clip_range: 0.19565974040447376
    n_epochs: 1.2218276926103449


In [30]:
from highway_env.envs import IntersectionEnv

IntersectionEnv.default_config()

{'observation': {'type': 'Kinematics',
  'vehicles_count': 15,
  'features': ['presence', 'x', 'y', 'vx', 'vy', 'cos_h', 'sin_h'],
  'features_range': {'x': [-100, 100],
   'y': [-100, 100],
   'vx': [-20, 20],
   'vy': [-20, 20]},
  'absolute': True,
  'flatten': False,
  'observe_intentions': False},
 'action': {'type': 'DiscreteMetaAction',
  'longitudinal': True,
  'lateral': False,
  'target_speeds': [0, 4.5, 9]},
 'simulation_frequency': 15,
 'policy_frequency': 1,
 'other_vehicles_type': 'highway_env.vehicle.behavior.IDMVehicle',
 'screen_width': 600,
 'screen_height': 600,
 'centering_position': [0.5, 0.6],
 'scaling': 7.15,
 'show_trajectories': False,
 'render_agent': True,
 'offscreen_rendering': False,
 'manual_control': False,
 'real_time_rendering': False,
 'duration': 13,
 'destination': 'o1',
 'controlled_vehicles': 1,
 'initial_vehicle_count': 10,
 'spawn_probability': 0.6,
 'collision_reward': -5,
 'high_speed_reward': 1,
 'arrived_reward': 1,
 'reward_speed_range': [

Best trial:
  Value:  42.6
  Params: 
    n_steps: 1298.0336974151803
    gamma: 0.98
    learning_rate: 0.0001291858786170927
    ent_coef: 0.0016824274086785702
    clip_range: 0.19565974040447376
    n_epochs: 1.2218276926103449