# REINFORCE (Policy Gradient)

In [None]:
### One-cell script to run evaluation
import yaml
import numpy as np
from env import create_env
from algorithms.reinforce import REINFORCEAgent

SEED = 42
ENV_CONFIG = './configs/env.yaml'
MODEL_CONFIG = './configs/reinforce.yaml'
MODEL_PATH = './models/reinforce_ep3000.pth'

eval_env = create_env(
    config_filepath=ENV_CONFIG,
    render_mode='rgb_array',
)
eval_env.reset(seed=SEED)

# Display env configs
print("Environment configuration:")
for key in eval_env.config.keys():
    print(f'{key}: {eval_env.config[key]}')

with open(MODEL_CONFIG, 'r') as file:
    config = yaml.safe_load(file)
    print("Model configuration:")
    print(config)

state_size = np.prod(eval_env.observation_space.shape)
action_size = eval_env.action_space.shape[0]
print(f"State size: {state_size}, Action size: {action_size}")
agent = REINFORCEAgent(
    state_size=state_size,
    hidden_size=config['hidden_size'],
    action_size=action_size,
    learning_rate=config['learning_rate'],
    gamma=config['gamma'],
)

agent.load_model(
    model_path = MODEL_PATH,
)

agent.evaluate(
    env=eval_env,
    num_episodes=config['num_episodes_eval'],
    top_k=config['top_k'],
)

  interval_1 = [(a - r) @ u / rqu, (b - r) @ u / rqu]


Environment configuration:
observation: {'type': 'LidarObservation', 'cells': 72, 'angle_range': 3.14, 'maximum_range': 100, 'normalize': True}
action: {'type': 'ContinuousAction'}
simulation_frequency: 15
policy_frequency: 2
other_vehicles_type: highway_env.vehicle.behavior.IDMVehicle
screen_width: 600
screen_height: 600
centering_position: [0.5, 0.6]
scaling: 7.15
show_trajectories: False
render_agent: True
offscreen_rendering: False
manual_control: False
real_time_rendering: False
duration: 50
destination: o1
controlled_vehicles: 1
initial_vehicle_count: 10
spawn_probability: 0.6
collision_reward: -100.0
high_speed_reward: 0.0
arrived_reward: 50.0
reward_speed_range: [0.0, 3.0]
normalize_reward: True
offroad_terminal: False
vehicles_count: 10
vehicle: {'acceleration': 3.0, 'steering': 0.4}
Model configuration:
{'hidden_size': 64, 'learning_rate': 0.001, 'gamma': 0.8, 'num_episodes_train': 3000, 'print_freq': 100, 'save_freq': 1000, 'num_episodes_eval': 100, 'top_k': 5}
State size: 1

Evaluating REINFORCE Agent: 100%|██████████| 100/100 [01:00<00:00,  1.64it/s]


: 

In [2]:
import yaml
import numpy as np
from env import create_env
from algorithms.reinforce import REINFORCEAgent
%reload_ext autoreload
%autoreload 2

SEED = 42
ENV_CONFIG = './configs/env.yaml'
MODEL_CONFIG = './configs/reinforce.yaml'
MODEL_PATH = './models/reinforce_left.pth'

In [3]:
env = create_env(
    config_filepath=ENV_CONFIG,
    render_mode=None,
)
env.reset(seed=SEED)

# Display env configs
for key in env.config.keys():
    print(f'{key}: {env.config[key]}')

observation: {'type': 'LidarObservation', 'cells': 72, 'angle_range': 3.14, 'maximum_range': 100, 'normalize': True}
action: {'type': 'ContinuousAction'}
simulation_frequency: 15
policy_frequency: 2
other_vehicles_type: highway_env.vehicle.behavior.IDMVehicle
screen_width: 600
screen_height: 600
centering_position: [0.5, 0.6]
scaling: 7.15
show_trajectories: False
render_agent: True
offscreen_rendering: False
manual_control: False
real_time_rendering: False
duration: 50
destination: o2
controlled_vehicles: 1
initial_vehicle_count: 10
spawn_probability: 0.6
collision_reward: -100.0
high_speed_reward: 0.0
arrived_reward: 50.0
reward_speed_range: [0.0, 3.0]
normalize_reward: True
offroad_terminal: False
vehicles_count: 10
vehicle: {'acceleration': 3.0, 'steering': 0.4}


## Load Model Configs

In [4]:
with open(MODEL_CONFIG, 'r') as file:
    config = yaml.safe_load(file)

for key in config.keys():
    print(f'{key}: {config[key]}')

hidden_size: 64
learning_rate: 0.001
gamma: 0.8
num_episodes_train: 3000
print_freq: 100
save_freq: 1000
num_episodes_eval: 100
top_k: 5


## Create Agent

In [6]:
state_size = np.prod(env.observation_space.shape)
action_size = env.action_space.shape[0]
print(f"State size: {state_size}, Action size: {action_size}")
agent = REINFORCEAgent(
    state_size=state_size,
    hidden_size=config['hidden_size'],
    action_size=action_size,
    learning_rate=config['learning_rate'],
    gamma=config['gamma'],
    model_path=MODEL_PATH,
)

State size: 144, Action size: 2


## Train Agent or Load Weights

In [7]:
# agent.load_model(
#     model_path = MODEL_PATH,
# )

In [None]:
agent.train(
    env=env,
    num_episodes=config['num_episodes_train'],
    print_freq=config['print_freq'],
    save_freq=config['save_freq'],
)

  interval_1 = [(a - r) @ u / rqu, (b - r) @ u / rqu]
  interval_2 = [(a - r) @ v / rqv, (d - r) @ v / rqv]
Training REINFORCE Agent:   3%|▎         | 83/3000 [00:07<03:48, 12.74it/s]

## Save Model Weights

In [None]:
agent.save_model(
    model_path=MODEL_PATH,
)

## Evaluate Agent Performance

In [17]:
eval_env = create_env(
    config_filepath=ENV_CONFIG,
    render_mode='rgb_array',
)
eval_env.reset(seed=SEED)

# Display env configs
for key in eval_env.config.keys():
    print(f'{key}: {eval_env.config[key]}')

observation: {'type': 'LidarObservation', 'cells': 72, 'angle_range': 3.14, 'maximum_range': 100, 'normalize': True}
action: {'type': 'ContinuousAction'}
simulation_frequency: 15
policy_frequency: 2
other_vehicles_type: highway_env.vehicle.behavior.IDMVehicle
screen_width: 600
screen_height: 600
centering_position: [0.5, 0.6]
scaling: 7.15
show_trajectories: False
render_agent: True
offscreen_rendering: False
manual_control: False
real_time_rendering: False
duration: 50
destination: o1
controlled_vehicles: 1
initial_vehicle_count: 10
spawn_probability: 0.6
collision_reward: -100.0
high_speed_reward: 0.0
arrived_reward: 50.0
reward_speed_range: [0.0, 3.0]
normalize_reward: True
offroad_terminal: False
vehicles_count: 10
vehicle: {'acceleration': 3.0, 'steering': 0.4}


In [1]:
agent.evaluate(
    env=eval_env,
    num_episodes=config['num_episodes_eval'],
    top_k=config['top_k'],
)

NameError: name 'agent' is not defined