In [1]:
import gymnasium as gym
import gymnasium_robotics as gymr
from stable_baselines3 import A2C, SAC, PPO, TD3, DDPG, HerReplayBuffer
from stable_baselines3.common.evaluation import evaluate_policy
from sb3_contrib import TQC
from stable_baselines3.common.noise import NormalActionNoise
from sb3_contrib.common.wrappers.time_feature import TimeFeatureWrapper
import psutil
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import SubprocVecEnv, DummyVecEnv
from stable_baselines3.common.callbacks import EvalCallback
import numpy as np
import multiprocessing

In [2]:
num_cpu_cores = multiprocessing.cpu_count()
num_cpu_cores

26

In [5]:
env = gym.make('FetchPickAndPlace-v2')

## n_actions

In [4]:
n_actions = env.action_space.shape[-1]
action_noise = NormalActionNoise(mean=np.zeros(n_actions), sigma=0.1 * np.ones(n_actions))

In [28]:
model = TQC(
    'MultiInputPolicy',
    env,
    replay_buffer_class=HerReplayBuffer,
    replay_buffer_kwargs=dict(
        n_sampled_goal=4,
        goal_selection_strategy="future"
    ),
    action_noise=action_noise,
    learning_rate=3e-4,
    batch_size=256,
    buffer_size=1000000,
    learning_starts=10000,
    train_freq=1000,
    gradient_steps=1000,
    verbose=1
)

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [29]:
model.learn(total_timesteps=500000)

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 50       |
|    ep_rew_mean     | -50      |
|    success_rate    | 0        |
| time/              |          |
|    episodes        | 4        |
|    fps             | 838      |
|    time_elapsed    | 0        |
|    total_timesteps | 200      |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 50       |
|    ep_rew_mean     | -50      |
|    success_rate    | 0        |
| time/              |          |
|    episodes        | 8        |
|    fps             | 876      |
|    time_elapsed    | 0        |
|    total_timesteps | 400      |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 50       |
|    ep_rew_mean     | -50      |
|    success_rate    | 0        |
| time/              |          |
|    episodes        | 12       |
|    fps      

KeyboardInterrupt: 

In [48]:
import gymnasium as gym
import psutil
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import SubprocVecEnv, DummyVecEnv

# Define a function to create the environment
def make_env(env_id, rank, seed=0):
    def _init():
        env = gym.make(env_id)
        # Check if the environment has a `seed` method
        if hasattr(env.unwrapped, 'seed'):
            env.unwrapped.seed(seed + rank)
        return env
    return _init

def check_resources():
    # Check CPU and memory usage
    cpu_usage = psutil.cpu_percent(interval=1)
    memory_info = psutil.virtual_memory()
    return cpu_usage, memory_info

def test_parallel_envs(env_id, max_envs):
    for num_envs in range(1, max_envs + 1):
        try:
            print(f"Testing with {num_envs} parallel environments...")
            envs = SubprocVecEnv([make_env(env_id, i) for i in range(num_envs)])
            model = DDPG(
                'MultiInputPolicy',
                envs,
                replay_buffer_class=HerReplayBuffer,
                replay_buffer_kwargs=dict(
                    n_sampled_goal=4,
                    goal_selection_strategy="future"
                ),
                action_noise=action_noise,
                learning_rate=1e-3,
                batch_size=1024,
                buffer_size=1000000,
                learning_starts=10000,
                train_freq=1000,
                gamma=0.95,
                tau=0.05,
                verbose=1
            )
            model.learn(total_timesteps=1000)

            # Check resources after running the environments
            cpu_usage, memory_info = check_resources()
            print(f"CPU usage: {cpu_usage}%")
            print(f"Memory usage: {memory_info.percent}%")
            
            envs.close()

            if memory_info.percent > 90:  # Memory usage threshold
                print("Memory usage too high. Stopping the test.")
                break
        except Exception as e:
            print(f"Failed with {num_envs} environments: {e}")
            break

env_id = 'FetchPickAndPlace-v2'
max_envs = 20  # Adjust based on your initial estimation
test_parallel_envs(env_id, max_envs)

Testing with 1 parallel environments...


Process ForkServerProcess-480:
Process ForkServerProcess-510:
Process ForkServerProcess-466:
Process ForkServerProcess-473:
Process ForkServerProcess-503:
Process ForkServerProcess-476:
Process ForkServerProcess-474:
Process ForkServerProcess-467:
Process ForkServerProcess-505:
Process ForkServerProcess-464:
Process ForkServerProcess-479:
Process ForkServerProcess-497:
Process ForkServerProcess-504:
Process ForkServerProcess-495:
Process ForkServerProcess-472:
Process ForkServerProcess-506:
Process ForkServerProcess-468:
Process ForkServerProcess-502:
Process ForkServerProcess-509:
Process ForkServerProcess-477:
Process ForkServerProcess-513:
Process ForkServerProcess-514:
Process ForkServerProcess-481:
Process ForkServerProcess-515:
Process ForkServerProcess-470:
Process ForkServerProcess-463:
Process ForkServerProcess-508:
Process ForkServerProcess-486:
Process ForkServerProcess-507:
Process ForkServerProcess-498:
Process ForkServerProcess-499:
Process ForkServerProcess-512:
Process 

KeyboardInterrupt: 

Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):


## Parallel training

In [5]:
#n_actions = 4
#action_noise = NormalActionNoise(mean=np.zeros(n_actions), sigma=0.1 * np.ones(n_actions))

In [3]:
def linear_schedule(initial_value: float):
    """
    Linear learning rate schedule.

    :param initial_value: Initial learning rate.
    :return: schedule that computes
      current learning rate depending on remaining progress
    """
    def func(progress_remaining: float) -> float:
        """
        Progress will decrease from 1 (beginning) to 0.

        :param progress_remaining:
        :return: current learning rate
        """
        return progress_remaining * initial_value

    return func

In [4]:
def make_env(env_id, rank, seed=0):
    def _init():
        env = gym.make(env_id, max_episode_steps=100)
        # Check if the environment has a `seed` method
        if hasattr(env.unwrapped, 'seed'):
            env.unwrapped.seed(seed + rank)
        return env
    return _init

In [5]:
env_id = 'FetchPickAndPlace-v2'
num_envs = 20  # Number of parallel environments

In [6]:
#envs = SubprocVecEnv([make_env(env_id, i) for i in range(num_envs)])
envs = DummyVecEnv([make_env(env_id, i) for i in range(num_envs)])

In [7]:
eval_env = DummyVecEnv([make_env(env_id, 20)])

In [9]:
model = TQC(
                'MultiInputPolicy',
                envs,
                replay_buffer_class=HerReplayBuffer,
                replay_buffer_kwargs=dict(
                    n_sampled_goal=4,
                    goal_selection_strategy="future"
                ),
                learning_rate=linear_schedule(0.001),
                batch_size=512,
                buffer_size=1000000,
                learning_starts=2100,
                train_freq=1,
                gradient_steps=-1,
                gamma=0.98,
                tau=0.005,
                policy_kwargs=dict(n_critics=2, net_arch=[128, 256, 64]),
                verbose=1,
            )

Using cuda device


In [10]:
eval_callback = EvalCallback(eval_env, 
                             best_model_save_path='./logs/',
                             log_path='./logs/', 
                             eval_freq=10000,
                             deterministic=True, 
                             render=False)

In [None]:
model.learn(total_timesteps=1000000, callback=eval_callback)

---------------------------------
| rollout/           |          |
|    success_rate    | 0.1      |
| time/              |          |
|    episodes        | 4        |
|    fps             | 483      |
|    time_elapsed    | 4        |
|    total_timesteps | 2000     |
---------------------------------
---------------------------------
| rollout/           |          |
|    success_rate    | 0.1      |
| time/              |          |
|    episodes        | 8        |
|    fps             | 483      |
|    time_elapsed    | 4        |
|    total_timesteps | 2000     |
---------------------------------
---------------------------------
| rollout/           |          |
|    success_rate    | 0.1      |
| time/              |          |
|    episodes        | 12       |
|    fps             | 483      |
|    time_elapsed    | 4        |
|    total_timesteps | 2000     |
---------------------------------
---------------------------------
| rollout/           |          |
|    success_r

  logger.warn(


---------------------------------
| rollout/           |          |
|    success_rate    | 0.05     |
| time/              |          |
|    episodes        | 24       |
|    fps             | 104      |
|    time_elapsed    | 38       |
|    total_timesteps | 4000     |
| train/             |          |
|    actor_loss      | -6.86    |
|    critic_loss     | 0.0177   |
|    ent_coef        | 0.156    |
|    ent_coef_loss   | -12.3    |
|    learning_rate   | 0.000996 |
|    n_updates       | 1880     |
---------------------------------
---------------------------------
| rollout/           |          |
|    success_rate    | 0.05     |
| time/              |          |
|    episodes        | 28       |
|    fps             | 104      |
|    time_elapsed    | 38       |
|    total_timesteps | 4000     |
---------------------------------
---------------------------------
| rollout/           |          |
|    success_rate    | 0.05     |
| time/              |          |
|    episodes 

---------------------------------
| rollout/           |          |
|    success_rate    | 0.01     |
| time/              |          |
|    episodes        | 488      |
|    fps             | 20       |
|    time_elapsed    | 2442     |
|    total_timesteps | 50000    |
---------------------------------
---------------------------------
| rollout/           |          |
|    success_rate    | 0.01     |
| time/              |          |
|    episodes        | 492      |
|    fps             | 20       |
|    time_elapsed    | 2442     |
|    total_timesteps | 50000    |
---------------------------------
---------------------------------
| rollout/           |          |
|    success_rate    | 0.01     |
| time/              |          |
|    episodes        | 496      |
|    fps             | 20       |
|    time_elapsed    | 2442     |
|    total_timesteps | 50000    |
---------------------------------
---------------------------------
| rollout/           |          |
|    success_r

----------------------------------
| rollout/           |           |
|    success_rate    | 0.01      |
| time/              |           |
|    episodes        | 584       |
|    fps             | 19        |
|    time_elapsed    | 3001      |
|    total_timesteps | 60000     |
| train/             |           |
|    actor_loss      | -5.67e+03 |
|    critic_loss     | 239       |
|    ent_coef        | 6.03      |
|    ent_coef_loss   | 0.0949    |
|    learning_rate   | 0.00094   |
|    n_updates       | 57880     |
----------------------------------
---------------------------------
| rollout/           |          |
|    success_rate    | 0.01     |
| time/              |          |
|    episodes        | 588      |
|    fps             | 19       |
|    time_elapsed    | 3001     |
|    total_timesteps | 60000    |
---------------------------------
---------------------------------
| rollout/           |          |
|    success_rate    | 0.01     |
| time/              |          

---------------------------------
| rollout/           |          |
|    success_rate    | 0        |
| time/              |          |
|    episodes        | 676      |
|    fps             | 19       |
|    time_elapsed    | 3460     |
|    total_timesteps | 68000    |
---------------------------------
---------------------------------
| rollout/           |          |
|    success_rate    | 0        |
| time/              |          |
|    episodes        | 680      |
|    fps             | 19       |
|    time_elapsed    | 3460     |
|    total_timesteps | 68000    |
---------------------------------
----------------------------------
| rollout/           |           |
|    success_rate    | 0.02      |
| time/              |           |
|    episodes        | 684       |
|    fps             | 19        |
|    time_elapsed    | 3575      |
|    total_timesteps | 70000     |
| train/             |           |
|    actor_loss      | -1.13e+04 |
|    critic_loss     | 542       |
|  

---------------------------------
| rollout/           |          |
|    success_rate    | 0.04     |
| time/              |          |
|    episodes        | 768      |
|    fps             | 19       |
|    time_elapsed    | 4029     |
|    total_timesteps | 78000    |
---------------------------------
---------------------------------
| rollout/           |          |
|    success_rate    | 0.04     |
| time/              |          |
|    episodes        | 772      |
|    fps             | 19       |
|    time_elapsed    | 4029     |
|    total_timesteps | 78000    |
---------------------------------
---------------------------------
| rollout/           |          |
|    success_rate    | 0.04     |
| time/              |          |
|    episodes        | 776      |
|    fps             | 19       |
|    time_elapsed    | 4029     |
|    total_timesteps | 78000    |
---------------------------------
---------------------------------
| rollout/           |          |
|    success_r

----------------------------------
| rollout/           |           |
|    success_rate    | 0.02      |
| time/              |           |
|    episodes        | 864       |
|    fps             | 19        |
|    time_elapsed    | 4566      |
|    total_timesteps | 88000     |
| train/             |           |
|    actor_loss      | -4.68e+04 |
|    critic_loss     | 2.15e+03  |
|    ent_coef        | 37.3      |
|    ent_coef_loss   | 1.39      |
|    learning_rate   | 0.000912  |
|    n_updates       | 85880     |
----------------------------------
---------------------------------
| rollout/           |          |
|    success_rate    | 0.02     |
| time/              |          |
|    episodes        | 868      |
|    fps             | 19       |
|    time_elapsed    | 4566     |
|    total_timesteps | 88000    |
---------------------------------
---------------------------------
| rollout/           |          |
|    success_rate    | 0.02     |
| time/              |          

# Try 2

In [4]:
gym.__version__

'0.29.1'

In [5]:
registered_envs = set(gym.envs.registry.keys())

In [6]:
len(registered_envs)

1200

In [7]:
[s for s in registered_envs if s.startswith("F")]

['FreewayNoFrameskip-v4',
 'FetchSlideDense-v2',
 'FetchPickAndPlace-v2',
 'FetchSlide-v2',
 'FishingDerby-ramDeterministic-v0',
 'Frostbite-ramNoFrameskip-v4',
 'FrozenLake-v1',
 'FetchPickAndPlaceDense-v1',
 'Freeway-ramNoFrameskip-v4',
 'FishingDerbyDeterministic-v0',
 'Frostbite-ramDeterministic-v4',
 'FetchSlideDense-v1',
 'FetchReach-v1',
 'FetchPushDense-v2',
 'FetchPickAndPlace-v1',
 'FetchPushDense-v1',
 'FreewayDeterministic-v0',
 'FishingDerbyNoFrameskip-v4',
 'Freeway-ramDeterministic-v4',
 'FishingDerbyDeterministic-v4',
 'FetchSlide-v1',
 'FishingDerby-ramDeterministic-v4',
 'FishingDerby-ram-v4',
 'Frostbite-ramDeterministic-v0',
 'Freeway-v4',
 'FishingDerbyNoFrameskip-v0',
 'FishingDerby-ramNoFrameskip-v0',
 'Freeway-ram-v0',
 'FishingDerby-v0',
 'FishingDerby-ram-v0',
 'Freeway-ram-v4',
 'FetchReachDense-v1',
 'Freeway-ramDeterministic-v0',
 'FishingDerby-ramNoFrameskip-v4',
 'FetchPush-v2',
 'FrostbiteDeterministic-v4',
 'FrostbiteNoFrameskip-v0',
 'FetchPickAndPlace