In [1]:
# Make environment
# Working directory
import sys, os
if os.path.exists('road_env'):
    sys.path.append('.')
else:
    sys.path.append('..')

# Register environment
from road_env import register_road_envs
register_road_envs()

# Make environment
import gymnasium as gym
env = gym.make('urban-road-v0', render_mode='rgb_array')
state_dim = env.observation_space.shape[0]
action_dim = env.action_space.shape[0]
max_action = env.action_space.high[0]
print('State dim:', state_dim, 'Action dim:', action_dim, 'Action range:', (-max_action, max_action))

State dim: 35 Action dim: 2 Action range: (-1.0, 1.0)


In [2]:
# Make DRL Agent
hidden_dim = 512

from rl_algorithms2.sac_v2 import SAC_Trainer, replay_buffer
agent = SAC_Trainer(
    state_dim=state_dim,
    action_dim=action_dim,
    action_range=max_action,
    hidden_dim=hidden_dim,
    replay_buffer=replay_buffer
)

cpu
Soft Q Network (1,2):  SoftQNetwork(
  (linear1): Linear(in_features=37, out_features=512, bias=True)
  (linear2): Linear(in_features=512, out_features=512, bias=True)
  (linear3): Linear(in_features=512, out_features=512, bias=True)
  (linear4): Linear(in_features=512, out_features=1, bias=True)
)
Policy Network:  PolicyNetwork(
  (linear1): Linear(in_features=35, out_features=512, bias=True)
  (linear2): Linear(in_features=512, out_features=512, bias=True)
  (linear3): Linear(in_features=512, out_features=512, bias=True)
  (linear4): Linear(in_features=512, out_features=512, bias=True)
  (mean_linear): Linear(in_features=512, out_features=2, bias=True)
  (log_std_linear): Linear(in_features=512, out_features=2, bias=True)
)


In [3]:
# Load trained model
model_type = 'sac_v2'
train_id = '230704203226'
episode = '9999'
model_dir = '../../data/models/' + model_type + '-' + train_id + '/' + episode
agent.load_model(model_dir)
print('Loaded from', model_dir)

Loaded from ../../data/models/sac_v2-230704203226/9999


In [9]:
from enum import Enum
from timeit
class OCCLUSION(Enum):
    Low = 1
    Medium = 2
    High = 3

def run_test(env,
             occlusion_level,
             agent,
             num_episode=20,
             num_step=999,
             auto_randseed=True,
             render=False):
    if isinstance(occlusion_level, OCCLUSION):
        occlusion_level = occlusion_level._value_
    env.configure({
        'duration': num_step,
        'obstacle_preset': occlusion_level
    })

    for episode in range(num_episode):
        step = 0
        episode_reward = 0

        if auto_randseed:
            env.configure({
                'random_seed': int(episode * occlusion_level)
            })
        obs, info = env.reset()
        while True: # Number of steps controlled by env.config['duration]
            action = agent.policy_net.get_action(obs, deterministic=True)
            obs, reward, done, truncated, info = env.step(action)
            if render:
                env.render()

            step += 1
            episode_reward += reward
            if done or truncated:
                break
        print(f'Episode: {episode+1}, Steps: {step}, Reward: {episode_reward:.2f}')