In [2]:
# 🚀 torch-ac minimal training example
import gymnasium as gym
import torch
import torch_ac
from torch_ac.model import ACModel
from torch_ac.algos.ppo import PPOAlgo
from torch_ac.utils.penv import ParallelEnv
from minigrid.wrappers import ImgObsWrapper
import numpy as np
import multiprocessing as mp
from torch_ac.utils.penv import ParallelEnv



# ----- 1. Set config parameters -----
ENV_NAME = "MiniGrid-DoorKey-6x6-v0"
NUM_ENVS = 4
FRAMES_PER_PROC = 128
TOTAL_FRAMES = 100_000
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# ----- 2. Define environment factory -----
def make_env():
    env = gym.make(ENV_NAME)
    env = ImgObsWrapper(env)  # Use RGB-only observations
    return env

# ----- 3. Create parallel environments -----
# mp_context = mp.get_context("spawn")
# envs = ParallelEnv(4, make_env, multiprocessing_context=mp_context)
envs = ParallelEnv(4, make_env)

# ----- 4. Define the actor-critic model -----
obs_shape = envs.observation_space.shape  # (3, 64, 64)
n_actions = envs.action_space.n           # e.g., 7 discrete actions

model = ACModel(obs_shape, n_actions)
model.to(DEVICE)

# ----- 5. Initialize PPO algorithm -----
ppo = PPOAlgo(
    envs=envs,
    model=model,
    device=DEVICE,
    frames_per_proc=FRAMES_PER_PROC,
    discount=0.99,
    lr=0.00025,
    gae_lambda=0.95,
    entropy_coef=0.01,
    value_loss_coef=0.5,
    max_grad_norm=0.5,
    recurrence=1,  # Set >1 for recurrent policy (LSTM)
    clip_eps=0.2,
    epochs=4,
    batch_size=256,
    preprocess_obss=None  # No preprocessing wrapper needed here
)

# ----- 6. Training loop -----
num_frames = 0
update = 0

while num_frames < TOTAL_FRAMES:
    # 6.1 Collect rollouts from envs
    exps, logs1 = ppo.collect_experiences()

    # 6.2 Update policy and value networks
    logs2 = ppo.update_parameters(exps)
    logs = {**logs1, **logs2}

    num_frames += logs["num_frames"]
    update += 1

    # 6.3 Print summary every few updates
    if update % 1 == 0:
        ret = logs["return_per_episode"]
        print(f"Update {update} | Frames {num_frames} | Mean Return: {np.mean(ret):.2f} | Episodes: {len(ret)}")

print("✅ Training complete!")


TypeError: ParallelEnv.__init__() takes 2 positional arguments but 3 were given

In [3]:
from torch_ac.utils.penv import ParallelEnv
import inspect

print("✅ ParallelEnv signature:", inspect.signature(ParallelEnv))


✅ ParallelEnv signature: (envs)


In [1]:
import gymnasium as gym
from minigrid.wrappers import ImgObsWrapper
from torch_ac.model import ACModel
from torch_ac.algos.ppo import PPOAlgo
import torch
import numpy as np

# ----- 1. Set config parameters -----
ENV_NAME = "MiniGrid-DoorKey-6x6-v0"
FRAMES_PER_PROC = 128       # How many steps per update
TOTAL_FRAMES = 50000        # How long to train
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# ----- 2. Create a single wrapped environment -----
def make_env():
    env = gym.make(ENV_NAME)
    env = ImgObsWrapper(env)  # Use RGB image observations
    return env

env = make_env()

# ----- 3. Wrap single env in a dummy list -----
envs = [env]  # Acts like ParallelEnv but with just one env

# ----- 4. Define the model -----
obs_shape = env.observation_space.shape  # e.g., (3, 64, 64)
n_actions = env.action_space.n

model = ACModel(obs_shape, n_actions)
model.to(DEVICE)

# ----- 5. Create PPO algorithm -----
ppo = PPOAlgo(
    envs=envs,
    model=model,
    device=DEVICE,
    frames_per_proc=FRAMES_PER_PROC,
    discount=0.99,
    lr=0.00025,
    gae_lambda=0.95,
    entropy_coef=0.01,
    value_loss_coef=0.5,
    max_grad_norm=0.5,
    recurrence=1,
    clip_eps=0.2,
    epochs=4,
    batch_size=64,  # Smaller batch since only 1 env
    preprocess_obss=None
)

# ----- 6. Training loop -----
num_frames = 0
update = 0

while num_frames < TOTAL_FRAMES:
    exps, logs1 = ppo.collect_experiences()
    logs2 = ppo.update_parameters(exps)
    logs = {**logs1, **logs2}

    num_frames += logs["num_frames"]
    update += 1

    print(f"Update {update} | Frames {num_frames} | Mean Return: {np.mean(logs['return_per_episode']):.2f}")


AttributeError: 'ACModel' object has no attribute 'to'

In [2]:
import minigrid 
import gymnasium as gym
env = gym.make("MiniGrid-DoorKey-5x5-v0")
