# Demonstration of Vectorisation of OR_Gym Environments

In [15]:
import time

from typing import Callable

import or_gym
from or_gym.utils import create_env
import gym
import numpy as np

from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv
from stable_baselines3.common.utils import set_random_seed
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.env_util import make_vec_env

In the cell below. Set the number of cores of your CPU (google colab has 2).

In [16]:
env_id = "InvManagement-v1"
num_cpu = 2  # Number of processes to use

# Create the vectorized environment (DummyVecEnv)
vec_env = make_vec_env(env_id, n_envs=num_cpu)

model = PPO('MlpPolicy', vec_env, verbose=0)

In [17]:
n_timesteps = 400000

# Timing block for Multiprocessed RL Training
start_time = time.time()
model.learn(n_timesteps)
total_time_multi = time.time() - start_time
##############################################

print(f"Took {total_time_multi:.2f}s for multiprocessed version - {n_timesteps / total_time_multi:.2f} FPS")

single_process_model = PPO('MlpPolicy', env_id, verbose=0)

# Timing block for Single Process RL Training
start_time = time.time()
single_process_model.learn(n_timesteps)
total_time_single = time.time() - start_time
##############################################

print(f"Took {total_time_single:.2f}s for single process version - {n_timesteps / total_time_single:.2f} FPS")

print("Multiprocessed training is {:.2f}x faster!".format(total_time_single / total_time_multi))

Took 726.28s for multiprocessed version - 550.75 FPS
Took 982.81s for single process version - 407.00 FPS
Multiprocessed training is 1.35x faster!


In [18]:
# Evaluate the trained agent
eval_env = gym.make(env_id)
mean_reward, std_reward = evaluate_policy(model, eval_env, n_eval_episodes=10)
print(f'Mean reward: {mean_reward} +/- {std_reward:.2f}')

Mean reward: -1.991901731491089 +/- 4.18
