In [1]:
import torch
print(torch.cuda.is_available())

True


# REINFORCE (Policy Gradient)

In [1]:
### One-cell script to run evaluation
import yaml
import numpy as np
from env import create_env
from algorithms.reinforce import REINFORCEAgent

SEED = 42
ENV_CONFIG = './configs/env.yaml'
MODEL_CONFIG = './configs/reinforce.yaml'
# MODEL_PATH = './models/reinforce_ep3000.pth'
MODEL_PATH = './models/reinforce_best.pth'


eval_env = create_env(
    config_filepath=ENV_CONFIG,
    render_mode='rgb_array',
)
eval_env.reset(seed=SEED)

# Set rendering parameters
# eval_env.config.update({
#     'offscreen_rendering': False,
#     'real_time_rendering': True,
#     'render_agent': True,
#     'show_trajectories': False
# })


# Display env configs
print("Environment configuration:")
for key in eval_env.config.keys():
    print(f'{key}: {eval_env.config[key]}')

with open(MODEL_CONFIG, 'r') as file:
    config = yaml.safe_load(file)
    print("Model configuration:")
    print(config)

state_size = np.prod(eval_env.observation_space.shape)
action_size = eval_env.action_space.shape[0]
print(f"State size: {state_size}, Action size: {action_size}")
agent = REINFORCEAgent(
    state_size=state_size,
    hidden_size=config['hidden_size'],
    action_size=action_size,
    learning_rate=config['learning_rate'],
    gamma=config['gamma'],
    model_path=MODEL_PATH,
)

agent.load_model(
    model_path=MODEL_PATH,
)

agent.evaluate(
    env=eval_env,
    num_episodes=config['num_episodes_eval'],
    top_k=config['top_k'],
)

  from pkg_resources import resource_stream, resource_exists


Environment configuration:
observation: {'type': 'Kinematics', 'vehicles_count': 10, 'features': ['presence', 'x', 'y', 'vx', 'vy', 'cos_h', 'sin_h'], 'features_range': {'x': [-100, 100], 'y': [-100, 100], 'vx': [-20, 20], 'vy': [-20, 20]}, 'absolute': False, 'sorted': True, 'normalize': True, 'include_road_info': True, 'include_vehicle_info': True, 'include_goal_info': True, 'history_length': 5}
action: {'type': 'ContinuousAction', 'continuous': True, 'normalize': True, 'clip_actions': True}
simulation_frequency: 10
policy_frequency: 10
other_vehicles_type: highway_env.vehicle.behavior.IDMVehicle
screen_width: 600
screen_height: 600
centering_position: [0.5, 0.6]
scaling: 7.15
show_trajectories: False
render_agent: True
offscreen_rendering: False
manual_control: False
real_time_rendering: False
duration: 100
destination: o1
controlled_vehicles: 1
initial_vehicle_count: 10
spawn_probability: 0.6
collision_reward: -20.0
high_speed_reward: 0.1
arrived_reward: 50.0
reward_speed_range: [0.

Evaluating REINFORCE Agent:   1%|          | 1/100 [00:01<03:16,  1.99s/it]

Episode 0 completed with reward 1.00 and 58 frames


Evaluating REINFORCE Agent:   2%|▏         | 2/100 [00:07<06:19,  3.87s/it]

Episode 1 completed with reward 92.00 and 170 frames


Evaluating REINFORCE Agent:   3%|▎         | 3/100 [00:09<05:20,  3.30s/it]

Episode 2 completed with reward 20.00 and 94 frames


Evaluating REINFORCE Agent:   4%|▍         | 4/100 [00:12<04:36,  2.88s/it]

Episode 3 completed with reward 1.00 and 78 frames


Evaluating REINFORCE Agent:   5%|▌         | 5/100 [00:18<06:19,  4.00s/it]

Episode 4 completed with reward 96.00 and 172 frames


Evaluating REINFORCE Agent:   6%|▌         | 6/100 [00:19<05:00,  3.20s/it]

Episode 5 completed with reward 1.00 and 60 frames


Evaluating REINFORCE Agent:   7%|▋         | 7/100 [00:22<04:39,  3.01s/it]

Episode 6 completed with reward 3.00 and 85 frames


Evaluating REINFORCE Agent:   8%|▊         | 8/100 [00:23<03:41,  2.41s/it]

Episode 7 completed with reward 1.00 and 43 frames


Evaluating REINFORCE Agent:   9%|▉         | 9/100 [00:28<04:53,  3.23s/it]

Episode 8 completed with reward 77.00 and 158 frames


Evaluating REINFORCE Agent:  10%|█         | 10/100 [00:29<03:50,  2.56s/it]

Episode 9 completed with reward 1.00 and 45 frames


Evaluating REINFORCE Agent:  11%|█         | 11/100 [00:31<03:22,  2.27s/it]

Episode 10 completed with reward 1.00 and 62 frames


Evaluating REINFORCE Agent:  12%|█▏        | 12/100 [00:32<02:52,  1.96s/it]

Episode 11 completed with reward 1.00 and 57 frames


Evaluating REINFORCE Agent:  13%|█▎        | 13/100 [00:34<02:55,  2.02s/it]

Episode 12 completed with reward 1.00 and 75 frames


Evaluating REINFORCE Agent:  14%|█▍        | 14/100 [00:38<03:39,  2.56s/it]

Episode 13 completed with reward 63.00 and 128 frames


Evaluating REINFORCE Agent:  15%|█▌        | 15/100 [00:39<02:58,  2.10s/it]

Episode 14 completed with reward 1.00 and 38 frames


Evaluating REINFORCE Agent:  16%|█▌        | 16/100 [00:40<02:30,  1.80s/it]

Episode 15 completed with reward 1.00 and 46 frames


Evaluating REINFORCE Agent:  17%|█▋        | 17/100 [00:42<02:25,  1.75s/it]

Episode 16 completed with reward 1.00 and 56 frames


Evaluating REINFORCE Agent:  18%|█▊        | 18/100 [00:43<02:08,  1.57s/it]

Episode 17 completed with reward 1.00 and 50 frames


Evaluating REINFORCE Agent:  19%|█▉        | 19/100 [00:48<03:44,  2.77s/it]

Episode 18 completed with reward 71.00 and 159 frames


Evaluating REINFORCE Agent:  20%|██        | 20/100 [00:50<03:11,  2.39s/it]

Episode 19 completed with reward 1.00 and 56 frames


Evaluating REINFORCE Agent:  21%|██        | 21/100 [00:52<03:05,  2.35s/it]

Episode 20 completed with reward 1.00 and 76 frames


Evaluating REINFORCE Agent:  22%|██▏       | 22/100 [00:58<04:22,  3.36s/it]

Episode 21 completed with reward 72.00 and 152 frames


Evaluating REINFORCE Agent:  23%|██▎       | 23/100 [01:02<04:48,  3.74s/it]

Episode 22 completed with reward 68.00 and 147 frames


Evaluating REINFORCE Agent:  24%|██▍       | 24/100 [01:04<04:05,  3.24s/it]

Episode 23 completed with reward 1.00 and 69 frames


Evaluating REINFORCE Agent:  25%|██▌       | 25/100 [01:05<03:06,  2.48s/it]

Episode 24 completed with reward 1.00 and 31 frames


Evaluating REINFORCE Agent:  26%|██▌       | 26/100 [01:06<02:35,  2.10s/it]

Episode 25 completed with reward 1.00 and 48 frames


Evaluating REINFORCE Agent:  27%|██▋       | 27/100 [01:08<02:16,  1.87s/it]

Episode 26 completed with reward 1.00 and 48 frames


Evaluating REINFORCE Agent:  28%|██▊       | 28/100 [01:09<01:56,  1.61s/it]

Episode 27 completed with reward 1.00 and 45 frames


Evaluating REINFORCE Agent:  29%|██▉       | 29/100 [01:10<01:46,  1.50s/it]

Episode 28 completed with reward 1.00 and 45 frames


Evaluating REINFORCE Agent:  30%|███       | 30/100 [01:12<01:53,  1.61s/it]

Episode 29 completed with reward 1.00 and 69 frames


Evaluating REINFORCE Agent:  31%|███       | 31/100 [01:15<02:24,  2.10s/it]

Episode 30 completed with reward 28.00 and 103 frames


Evaluating REINFORCE Agent:  32%|███▏      | 32/100 [01:17<02:12,  1.95s/it]

Episode 31 completed with reward 1.00 and 49 frames


Evaluating REINFORCE Agent:  33%|███▎      | 33/100 [01:19<02:17,  2.06s/it]

Episode 32 completed with reward 1.00 and 72 frames


Evaluating REINFORCE Agent:  34%|███▍      | 34/100 [01:20<01:59,  1.82s/it]

Episode 33 completed with reward 1.00 and 47 frames


Evaluating REINFORCE Agent:  35%|███▌      | 35/100 [01:22<01:58,  1.82s/it]

Episode 34 completed with reward 1.00 and 56 frames


Evaluating REINFORCE Agent:  36%|███▌      | 36/100 [01:24<02:00,  1.88s/it]

Episode 35 completed with reward 1.00 and 70 frames


Evaluating REINFORCE Agent:  37%|███▋      | 37/100 [01:27<02:08,  2.04s/it]

Episode 36 completed with reward 3.00 and 80 frames


Evaluating REINFORCE Agent:  38%|███▊      | 38/100 [01:28<01:50,  1.78s/it]

Episode 37 completed with reward 1.00 and 42 frames


Evaluating REINFORCE Agent:  39%|███▉      | 39/100 [01:29<01:35,  1.57s/it]

Episode 38 completed with reward 1.00 and 37 frames


Evaluating REINFORCE Agent:  40%|████      | 40/100 [01:30<01:25,  1.43s/it]

Episode 39 completed with reward 1.00 and 34 frames


Evaluating REINFORCE Agent:  41%|████      | 41/100 [01:37<03:01,  3.07s/it]

Episode 40 completed with reward 78.00 and 160 frames


Evaluating REINFORCE Agent:  42%|████▏     | 42/100 [01:38<02:32,  2.62s/it]

Episode 41 completed with reward 1.00 and 55 frames


Evaluating REINFORCE Agent:  43%|████▎     | 43/100 [01:41<02:25,  2.55s/it]

Episode 42 completed with reward 1.00 and 79 frames


Evaluating REINFORCE Agent:  44%|████▍     | 44/100 [01:47<03:18,  3.54s/it]

Episode 43 completed with reward 70.00 and 155 frames


Evaluating REINFORCE Agent:  45%|████▌     | 45/100 [01:49<02:48,  3.06s/it]

Episode 44 completed with reward 1.00 and 65 frames


Evaluating REINFORCE Agent:  46%|████▌     | 46/100 [01:49<02:09,  2.39s/it]

Episode 45 completed with reward 13.00 and 30 frames


Evaluating REINFORCE Agent:  47%|████▋     | 47/100 [01:51<01:49,  2.06s/it]

Episode 46 completed with reward 1.00 and 42 frames


Evaluating REINFORCE Agent:  48%|████▊     | 48/100 [01:51<01:27,  1.68s/it]

Episode 47 completed with reward 5.00 and 30 frames


Evaluating REINFORCE Agent:  49%|████▉     | 49/100 [01:54<01:34,  1.86s/it]

Episode 48 completed with reward 1.00 and 77 frames


Evaluating REINFORCE Agent:  50%|█████     | 50/100 [01:55<01:20,  1.61s/it]

Episode 49 completed with reward 1.00 and 36 frames


Evaluating REINFORCE Agent:  51%|█████     | 51/100 [01:56<01:11,  1.45s/it]

Episode 50 completed with reward 1.00 and 35 frames


Evaluating REINFORCE Agent:  52%|█████▏    | 52/100 [02:01<02:05,  2.62s/it]

Episode 51 completed with reward 54.00 and 144 frames


Evaluating REINFORCE Agent:  53%|█████▎    | 53/100 [02:02<01:43,  2.19s/it]

Episode 52 completed with reward 1.00 and 38 frames


Evaluating REINFORCE Agent:  54%|█████▍    | 54/100 [02:04<01:27,  1.91s/it]

Episode 53 completed with reward 1.00 and 42 frames


Evaluating REINFORCE Agent:  55%|█████▌    | 55/100 [02:06<01:34,  2.09s/it]

Episode 54 completed with reward 1.00 and 85 frames


Evaluating REINFORCE Agent:  56%|█████▌    | 56/100 [02:07<01:22,  1.87s/it]

Episode 55 completed with reward 1.00 and 54 frames


Evaluating REINFORCE Agent:  57%|█████▋    | 57/100 [02:13<02:07,  2.97s/it]

Episode 56 completed with reward 65.00 and 144 frames


Evaluating REINFORCE Agent:  58%|█████▊    | 58/100 [02:14<01:43,  2.47s/it]

Episode 57 completed with reward 1.00 and 48 frames


Evaluating REINFORCE Agent:  59%|█████▉    | 59/100 [02:17<01:42,  2.49s/it]

Episode 58 completed with reward 10.00 and 93 frames


Evaluating REINFORCE Agent:  60%|██████    | 60/100 [02:18<01:21,  2.03s/it]

Episode 59 completed with reward 1.00 and 34 frames


Evaluating REINFORCE Agent:  61%|██████    | 61/100 [02:20<01:15,  1.93s/it]

Episode 60 completed with reward 1.00 and 54 frames


Evaluating REINFORCE Agent:  62%|██████▏   | 62/100 [02:22<01:23,  2.19s/it]

Episode 61 completed with reward 1.00 and 82 frames


Evaluating REINFORCE Agent:  63%|██████▎   | 63/100 [02:23<01:07,  1.83s/it]

Episode 62 completed with reward 1.00 and 35 frames


Evaluating REINFORCE Agent:  64%|██████▍   | 64/100 [02:26<01:14,  2.08s/it]

Episode 63 completed with reward 10.00 and 91 frames


Evaluating REINFORCE Agent:  65%|██████▌   | 65/100 [02:27<01:04,  1.85s/it]

Episode 64 completed with reward 1.00 and 55 frames


Evaluating REINFORCE Agent:  66%|██████▌   | 66/100 [02:31<01:23,  2.46s/it]

Episode 65 completed with reward 45.00 and 127 frames


Evaluating REINFORCE Agent:  67%|██████▋   | 67/100 [02:33<01:15,  2.28s/it]

Episode 66 completed with reward 1.00 and 73 frames


Evaluating REINFORCE Agent:  68%|██████▊   | 68/100 [02:36<01:23,  2.61s/it]

Episode 67 completed with reward 13.00 and 101 frames


Evaluating REINFORCE Agent:  69%|██████▉   | 69/100 [02:38<01:08,  2.20s/it]

Episode 68 completed with reward 1.00 and 47 frames


Evaluating REINFORCE Agent:  70%|███████   | 70/100 [02:38<00:53,  1.79s/it]

Episode 69 completed with reward 1.00 and 38 frames


Evaluating REINFORCE Agent:  71%|███████   | 71/100 [02:39<00:43,  1.49s/it]

Episode 70 completed with reward 1.00 and 34 frames


Evaluating REINFORCE Agent:  72%|███████▏  | 72/100 [02:42<00:54,  1.96s/it]

Episode 71 completed with reward 10.00 and 103 frames


Evaluating REINFORCE Agent:  73%|███████▎  | 73/100 [02:44<00:51,  1.90s/it]

Episode 72 completed with reward 1.00 and 64 frames


: 

In [1]:
import yaml
import numpy as np
from env import create_env
from algorithms.reinforce import REINFORCEAgent
%reload_ext autoreload
%autoreload 2

SEED = 42
ENV_CONFIG = './configs/env.yaml'
MODEL_CONFIG = './configs/reinforce.yaml'
MODEL_PATH = './models/reinforce.pth'

  from pkg_resources import resource_stream, resource_exists


In [2]:
env = create_env(
    config_filepath=ENV_CONFIG,
    render_mode=None,
)
env.reset(seed=SEED)

# Display env configs
for key in env.config.keys():
    print(f'{key}: {env.config[key]}')

observation: {'type': 'Kinematics', 'vehicles_count': 10, 'features': ['presence', 'x', 'y', 'vx', 'vy', 'cos_h', 'sin_h'], 'features_range': {'x': [-100, 100], 'y': [-100, 100], 'vx': [-20, 20], 'vy': [-20, 20]}, 'absolute': False, 'sorted': True, 'normalize': True, 'include_road_info': True, 'include_vehicle_info': True, 'include_goal_info': True, 'history_length': 5}
action: {'type': 'ContinuousAction', 'continuous': True, 'normalize': True, 'clip_actions': True}
simulation_frequency: 10
policy_frequency: 10
other_vehicles_type: highway_env.vehicle.behavior.IDMVehicle
screen_width: 600
screen_height: 600
centering_position: [0.5, 0.6]
scaling: 7.15
show_trajectories: False
render_agent: True
offscreen_rendering: False
manual_control: False
real_time_rendering: False
duration: 100
destination: o1
controlled_vehicles: 1
initial_vehicle_count: 10
spawn_probability: 0.6
collision_reward: -20.0
high_speed_reward: 0.1
arrived_reward: 50.0
reward_speed_range: [0.0, 3.0]
normalize_reward: T

## Load Model Configs

In [3]:
with open(MODEL_CONFIG, 'r') as file:
    config = yaml.safe_load(file)

for key in config.keys():
    print(f'{key}: {config[key]}')

hidden_size: 64
learning_rate: 0.001
gamma: 0.8
num_episodes_train: 3000
print_freq: 100
save_freq: 1000
num_episodes_eval: 100
top_k: 5


## Create Agent

In [4]:
state_size = np.prod(env.observation_space.shape)
action_size = env.action_space.shape[0]
print(f"State size: {state_size}, Action size: {action_size}")
agent = REINFORCEAgent(
    state_size=state_size,
    hidden_size=config['hidden_size'],
    action_size=action_size,
    learning_rate=config['learning_rate'],
    gamma=config['gamma'],
    model_path=MODEL_PATH,
)

State size: 70, Action size: 2
Using device: cuda


## Train Agent

In [5]:
agent.train(
    env=env,
    num_episodes=config['num_episodes_train'],
    print_freq=config['print_freq'],
    save_freq=config['save_freq'],
)

Training REINFORCE Agent:   0%|          | 1/3000 [00:00<27:23,  1.82it/s]

Max reward: 1.00 at episode 1
Model saved to ./models/reinforce_best.pth


Training REINFORCE Agent:   2%|▏         | 65/3000 [00:28<1:07:21,  1.38s/it]

Max reward: 4.00 at episode 65
Model saved to ./models/reinforce_best.pth
Max reward: 64.00 at episode 66
Model saved to ./models/reinforce_best.pth


Training REINFORCE Agent:   3%|▎         | 100/3000 [00:52<27:06,  1.78it/s] 

New best recent average: 1.66 (consistency ratio: 0.03)
Episode 100/3000 | Max reward: 64.00 | Avg reward: 1.66 | Recent avg: 1.66 | Consistency ratio: 0.03 | Entropy coef: 0.050


Training REINFORCE Agent:   6%|▌         | 184/3000 [01:45<1:29:13,  1.90s/it]

New best recent average: 1.79 (consistency ratio: 0.02)
Max reward: 74.00 at episode 184
Model saved to ./models/reinforce_best.pth


Training REINFORCE Agent:   6%|▋         | 194/3000 [01:53<39:23,  1.19it/s]  

New best recent average: 1.91 (consistency ratio: 0.03)


Training REINFORCE Agent:   7%|▋         | 197/3000 [01:57<1:04:42,  1.39s/it]

New best recent average: 2.46 (consistency ratio: 0.03)


Training REINFORCE Agent:   7%|▋         | 200/3000 [02:01<1:06:40,  1.43s/it]

New best recent average: 2.54 (consistency ratio: 0.03)
Episode 200/3000 | Max reward: 74.00 | Avg reward: 2.10 | Recent avg: 2.54 | Consistency ratio: 0.03 | Entropy coef: 0.050


Training REINFORCE Agent:   7%|▋         | 213/3000 [02:11<53:45,  1.16s/it]  

New best recent average: 2.67 (consistency ratio: 0.04)


Training REINFORCE Agent:   7%|▋         | 214/3000 [02:13<1:04:42,  1.39s/it]

New best recent average: 2.71 (consistency ratio: 0.04)


Training REINFORCE Agent:   7%|▋         | 215/3000 [02:15<1:12:54,  1.57s/it]

New best recent average: 2.74 (consistency ratio: 0.04)


Training REINFORCE Agent:   8%|▊         | 231/3000 [02:30<1:21:44,  1.77s/it]

New best recent average: 3.41 (consistency ratio: 0.05)


Training REINFORCE Agent:  10%|█         | 300/3000 [03:27<32:38,  1.38it/s]  

Episode 300/3000 | Max reward: 74.00 | Avg reward: 2.11 | Recent avg: 2.12 | Consistency ratio: 0.03 | Entropy coef: 0.050


Training REINFORCE Agent:  12%|█▏        | 364/3000 [04:39<1:16:25,  1.74s/it]

New best recent average: 3.62 (consistency ratio: 0.05)


Training REINFORCE Agent:  12%|█▏        | 365/3000 [04:42<1:31:45,  2.09s/it]

New best recent average: 3.89 (consistency ratio: 0.05)


Training REINFORCE Agent:  13%|█▎        | 378/3000 [04:55<53:14,  1.22s/it]  

New best recent average: 4.92 (consistency ratio: 0.05)
Max reward: 104.00 at episode 379
Model saved to ./models/reinforce_best.pth


Training REINFORCE Agent:  13%|█▎        | 386/3000 [05:08<1:01:19,  1.41s/it]

New best recent average: 5.04 (consistency ratio: 0.05)


Training REINFORCE Agent:  13%|█▎        | 393/3000 [05:16<57:35,  1.33s/it]  

New best recent average: 5.20 (consistency ratio: 0.05)


Training REINFORCE Agent:  13%|█▎        | 398/3000 [05:24<1:28:13,  2.03s/it]

New best recent average: 6.02 (consistency ratio: 0.06)


Training REINFORCE Agent:  13%|█▎        | 400/3000 [05:26<1:07:27,  1.56s/it]

Episode 400/3000 | Max reward: 104.00 | Avg reward: 3.08 | Recent avg: 6.02 | Consistency ratio: 0.06 | Entropy coef: 0.050


Training REINFORCE Agent:  13%|█▎        | 403/3000 [05:31<1:10:43,  1.63s/it]

New best recent average: 6.43 (consistency ratio: 0.06)


Training REINFORCE Agent:  14%|█▍        | 423/3000 [05:53<1:03:33,  1.48s/it]

New best recent average: 6.52 (consistency ratio: 0.06)


Training REINFORCE Agent:  14%|█▍        | 426/3000 [05:58<1:14:24,  1.73s/it]

New best recent average: 6.95 (consistency ratio: 0.07)


Training REINFORCE Agent:  14%|█▍        | 431/3000 [06:03<1:02:47,  1.47s/it]

New best recent average: 6.98 (consistency ratio: 0.07)


Training REINFORCE Agent:  15%|█▍        | 438/3000 [06:12<1:14:28,  1.74s/it]

New best recent average: 7.49 (consistency ratio: 0.07)


Training REINFORCE Agent:  15%|█▍        | 442/3000 [06:19<1:26:56,  2.04s/it]

New best recent average: 8.20 (consistency ratio: 0.08)


Training REINFORCE Agent:  15%|█▍        | 443/3000 [06:22<1:46:09,  2.49s/it]

New best recent average: 8.80 (consistency ratio: 0.08)


Training REINFORCE Agent:  15%|█▌        | 450/3000 [06:31<1:05:24,  1.54s/it]

New best recent average: 9.16 (consistency ratio: 0.09)


Training REINFORCE Agent:  17%|█▋        | 500/3000 [07:30<46:34,  1.12s/it]  

Episode 500/3000 | Max reward: 104.00 | Avg reward: 4.08 | Recent avg: 8.07 | Consistency ratio: 0.10 | Entropy coef: 0.050


Training REINFORCE Agent:  17%|█▋        | 508/3000 [07:44<1:29:14,  2.15s/it]

New best recent average: 9.71 (consistency ratio: 0.11)


Training REINFORCE Agent:  17%|█▋        | 511/3000 [07:51<1:55:00,  2.77s/it]

New best recent average: 10.64 (consistency ratio: 0.11)


Training REINFORCE Agent:  17%|█▋        | 512/3000 [07:53<1:40:33,  2.43s/it]

New best recent average: 10.65 (consistency ratio: 0.11)


Training REINFORCE Agent:  17%|█▋        | 513/3000 [07:57<1:57:10,  2.83s/it]

New best recent average: 11.12 (consistency ratio: 0.12)


Training REINFORCE Agent:  17%|█▋        | 518/3000 [08:04<1:32:01,  2.22s/it]

New best recent average: 11.59 (consistency ratio: 0.12)


Training REINFORCE Agent:  17%|█▋        | 520/3000 [08:09<1:41:00,  2.44s/it]

New best recent average: 12.01 (consistency ratio: 0.13)


Training REINFORCE Agent:  18%|█▊        | 525/3000 [08:16<1:21:00,  1.96s/it]

New best recent average: 12.57 (consistency ratio: 0.13)


Training REINFORCE Agent:  18%|█▊        | 531/3000 [08:26<1:27:52,  2.14s/it]

New best recent average: 13.05 (consistency ratio: 0.14)


Training REINFORCE Agent:  18%|█▊        | 533/3000 [08:29<1:16:04,  1.85s/it]

New best recent average: 13.22 (consistency ratio: 0.14)


Training REINFORCE Agent:  18%|█▊        | 537/3000 [08:36<1:33:01,  2.27s/it]

New best recent average: 14.01 (consistency ratio: 0.15)


Training REINFORCE Agent:  19%|█▊        | 561/3000 [09:15<1:16:32,  1.88s/it]

New best recent average: 14.41 (consistency ratio: 0.15)


Training REINFORCE Agent:  19%|█▊        | 562/3000 [09:18<1:36:00,  2.36s/it]

New best recent average: 14.79 (consistency ratio: 0.16)


Training REINFORCE Agent:  19%|█▉        | 580/3000 [09:42<50:23,  1.25s/it]  

Increasing exploration due to no improvement


Training REINFORCE Agent:  20%|██        | 600/3000 [10:07<34:05,  1.17it/s]  

Episode 600/3000 | Max reward: 104.00 | Avg reward: 5.52 | Recent avg: 12.70 | Consistency ratio: 0.13 | Entropy coef: 0.055


Training REINFORCE Agent:  22%|██▏       | 660/3000 [11:26<40:33,  1.04s/it]  

Performance degradation detected! Loading previous best model...
Model loaded from ./models/reinforce_best.pth





RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.cuda.FloatTensor [64, 4]], which is output 0 of AsStridedBackward0, is at version 663; expected version 662 instead. Hint: enable anomaly detection to find the operation that failed to compute its gradient, with torch.autograd.set_detect_anomaly(True).

## Save Model Weights if Desired
#### Highest reward runs during training are automatically saved

In [None]:
agent.save_model(
    model_path=MODEL_PATH,
)

## Evaluate Agent Performance

In [8]:
eval_env = create_env(
    config_filepath=ENV_CONFIG,
    render_mode='rgb_array',
)
eval_env.reset(seed=SEED)

# Display env configs
for key in eval_env.config.keys():
    print(f'{key}: {eval_env.config[key]}')

observation: {'type': 'Kinematics', 'vehicles_count': 10, 'features': ['presence', 'x', 'y', 'vx', 'vy', 'cos_h', 'sin_h'], 'features_range': {'x': [-100, 100], 'y': [-100, 100], 'vx': [-20, 20], 'vy': [-20, 20]}, 'absolute': False, 'sorted': True, 'normalize': True, 'include_road_info': True, 'include_vehicle_info': True, 'include_goal_info': True, 'history_length': 5}
action: {'type': 'ContinuousAction', 'continuous': True, 'normalize': True, 'clip_actions': True}
simulation_frequency: 10
policy_frequency: 10
other_vehicles_type: highway_env.vehicle.behavior.IDMVehicle
screen_width: 600
screen_height: 600
centering_position: [0.5, 0.6]
scaling: 7.15
show_trajectories: False
render_agent: True
offscreen_rendering: False
manual_control: False
real_time_rendering: False
duration: 100
destination: o1
controlled_vehicles: 1
initial_vehicle_count: 10
spawn_probability: 0.6
collision_reward: -20.0
high_speed_reward: 0.1
arrived_reward: 50.0
reward_speed_range: [0.0, 3.0]
normalize_reward: T

In [9]:
agent.load_model(
    model_path='./models/reinforce_best.pth',
)

Model loaded from ./models/reinforce_best.pth


In [None]:
agent.evaluate(
    env=eval_env,
    num_episodes=config['num_episodes_eval'],
    top_k=config['top_k'],
)

Evaluating REINFORCE Agent:   1%|          | 1/100 [00:00<01:23,  1.19it/s]

Episode 0 completed with reward 18.00 and 30 frames


Evaluating REINFORCE Agent:   2%|▏         | 2/100 [00:01<01:16,  1.27it/s]

Episode 1 completed with reward 19.00 and 30 frames


Evaluating REINFORCE Agent:   3%|▎         | 3/100 [00:03<01:49,  1.13s/it]

Episode 2 completed with reward 1.00 and 59 frames


Evaluating REINFORCE Agent:   4%|▍         | 4/100 [00:04<01:46,  1.11s/it]

Episode 3 completed with reward 13.00 and 43 frames


Evaluating REINFORCE Agent:   5%|▌         | 5/100 [00:05<01:35,  1.01s/it]

Episode 4 completed with reward 30.00 and 30 frames


Evaluating REINFORCE Agent:   6%|▌         | 6/100 [00:05<01:24,  1.11it/s]

Episode 5 completed with reward 14.00 and 30 frames


Evaluating REINFORCE Agent:   7%|▋         | 7/100 [00:06<01:24,  1.10it/s]

Episode 6 completed with reward 14.00 and 40 frames


Evaluating REINFORCE Agent:   8%|▊         | 8/100 [00:07<01:34,  1.03s/it]

Episode 7 completed with reward 1.00 and 52 frames


Evaluating REINFORCE Agent:   9%|▉         | 9/100 [00:08<01:21,  1.11it/s]

Episode 8 completed with reward 21.00 and 30 frames


Evaluating REINFORCE Agent:  10%|█         | 10/100 [00:09<01:16,  1.18it/s]

Episode 9 completed with reward 1.00 and 34 frames


Evaluating REINFORCE Agent:  11%|█         | 11/100 [00:10<01:18,  1.13it/s]

Episode 10 completed with reward 1.00 and 41 frames


Evaluating REINFORCE Agent:  12%|█▏        | 12/100 [00:10<01:11,  1.22it/s]

Episode 11 completed with reward 17.00 and 30 frames


Evaluating REINFORCE Agent:  13%|█▎        | 13/100 [00:11<01:05,  1.33it/s]

Episode 12 completed with reward 10.00 and 30 frames


Evaluating REINFORCE Agent:  14%|█▍        | 14/100 [00:12<01:07,  1.27it/s]

Episode 13 completed with reward 8.00 and 40 frames


Evaluating REINFORCE Agent:  15%|█▌        | 15/100 [00:12<01:02,  1.37it/s]

Episode 14 completed with reward 13.00 and 30 frames


Evaluating REINFORCE Agent:  16%|█▌        | 16/100 [00:14<01:16,  1.10it/s]

Episode 15 completed with reward 1.00 and 62 frames


Evaluating REINFORCE Agent:  17%|█▋        | 17/100 [00:14<01:09,  1.19it/s]

Episode 16 completed with reward 9.00 and 33 frames


Evaluating REINFORCE Agent:  18%|█▊        | 18/100 [00:17<01:51,  1.36s/it]

Episode 17 completed with reward 19.00 and 88 frames


Evaluating REINFORCE Agent:  19%|█▉        | 19/100 [00:18<01:31,  1.14s/it]

Episode 18 completed with reward 24.00 and 30 frames


Evaluating REINFORCE Agent:  20%|██        | 20/100 [00:18<01:19,  1.01it/s]

Episode 19 completed with reward 11.00 and 30 frames


Evaluating REINFORCE Agent:  21%|██        | 21/100 [00:19<01:11,  1.10it/s]

Episode 20 completed with reward 8.00 and 30 frames


Evaluating REINFORCE Agent:  22%|██▏       | 22/100 [00:20<01:06,  1.17it/s]

Episode 21 completed with reward 12.00 and 33 frames


Evaluating REINFORCE Agent:  23%|██▎       | 23/100 [00:20<01:01,  1.25it/s]

Episode 22 completed with reward 21.00 and 30 frames


Evaluating REINFORCE Agent:  24%|██▍       | 24/100 [00:21<01:02,  1.23it/s]

Episode 23 completed with reward 1.00 and 39 frames


Evaluating REINFORCE Agent:  25%|██▌       | 25/100 [00:22<01:05,  1.15it/s]

Episode 24 completed with reward 1.00 and 42 frames


Evaluating REINFORCE Agent:  26%|██▌       | 26/100 [00:23<01:03,  1.17it/s]

Episode 25 completed with reward 17.00 and 30 frames


Evaluating REINFORCE Agent:  27%|██▋       | 27/100 [00:24<01:01,  1.18it/s]

Episode 26 completed with reward 1.00 and 38 frames


Evaluating REINFORCE Agent:  28%|██▊       | 28/100 [00:27<01:43,  1.44s/it]

Episode 27 completed with reward 8.00 and 109 frames


Evaluating REINFORCE Agent:  29%|██▉       | 29/100 [00:28<01:30,  1.27s/it]

Episode 28 completed with reward 1.00 and 37 frames


Evaluating REINFORCE Agent:  30%|███       | 30/100 [00:28<01:15,  1.08s/it]

Episode 29 completed with reward 14.00 and 30 frames


Evaluating REINFORCE Agent:  31%|███       | 31/100 [00:29<01:06,  1.04it/s]

Episode 30 completed with reward 11.00 and 30 frames


Evaluating REINFORCE Agent:  32%|███▏      | 32/100 [00:30<00:59,  1.15it/s]

Episode 31 completed with reward 3.00 and 30 frames


Evaluating REINFORCE Agent:  33%|███▎      | 33/100 [00:30<00:54,  1.23it/s]

Episode 32 completed with reward 14.00 and 30 frames


Evaluating REINFORCE Agent:  34%|███▍      | 34/100 [00:31<00:52,  1.27it/s]

Episode 33 completed with reward 1.00 and 30 frames


Evaluating REINFORCE Agent:  35%|███▌      | 35/100 [00:32<00:49,  1.33it/s]

Episode 34 completed with reward 2.00 and 30 frames


Evaluating REINFORCE Agent:  36%|███▌      | 36/100 [00:32<00:47,  1.35it/s]

Episode 35 completed with reward 11.00 and 30 frames


Evaluating REINFORCE Agent:  37%|███▋      | 37/100 [00:33<00:46,  1.36it/s]

Episode 36 completed with reward 10.00 and 30 frames


Evaluating REINFORCE Agent:  38%|███▊      | 38/100 [00:34<00:44,  1.41it/s]

Episode 37 completed with reward 6.00 and 30 frames


Evaluating REINFORCE Agent:  39%|███▉      | 39/100 [00:36<01:15,  1.24s/it]

Episode 38 completed with reward 10.00 and 87 frames


Evaluating REINFORCE Agent:  40%|████      | 40/100 [00:37<01:04,  1.08s/it]

Episode 39 completed with reward 13.00 and 30 frames


Evaluating REINFORCE Agent:  41%|████      | 41/100 [00:38<01:02,  1.06s/it]

Episode 40 completed with reward 1.00 and 45 frames


Evaluating REINFORCE Agent:  42%|████▏     | 42/100 [00:39<00:55,  1.05it/s]

Episode 41 completed with reward 19.00 and 30 frames


Evaluating REINFORCE Agent:  43%|████▎     | 43/100 [00:39<00:49,  1.16it/s]

Episode 42 completed with reward 23.00 and 30 frames


Evaluating REINFORCE Agent:  44%|████▍     | 44/100 [00:41<01:01,  1.09s/it]

Error during episode 43: 'NoneType' object has no attribute 'get_image'


Evaluating REINFORCE Agent:  45%|████▌     | 45/100 [00:42<01:05,  1.19s/it]

Episode 44 completed with reward 1.00 and 45 frames


Evaluating REINFORCE Agent:  46%|████▌     | 46/100 [00:43<01:02,  1.15s/it]

Episode 45 completed with reward 1.00 and 43 frames


Evaluating REINFORCE Agent:  47%|████▋     | 47/100 [00:45<00:59,  1.13s/it]

Episode 46 completed with reward 1.00 and 40 frames


Evaluating REINFORCE Agent:  47%|████▋     | 47/100 [00:46<00:52,  1.01it/s]


KeyboardInterrupt: 

: 