In [9]:
from torchrl.collectors import SyncDataCollector
from tensordict.nn import TensorDictModule, TensorDictSequential
from torchrl.modules import MLP, AdditiveGaussianWrapper
from torchrl.envs import GymEnv

# Environment
env = GymEnv('Pendulum-v1')

# Model: Actor and value
mlp_actor = MLP(
    num_cells=64,
    depth=3,
    in_features=3,
    out_features=1
)

actor = TensorDictModule(
    mlp_actor,
    in_keys=['observation'],
    out_keys=['action']
)

mlp_value = MLP(
    num_cells=64,
    depth=2,
    in_features=4,
    out_features=1
)

critic = TensorDictSequential(
    actor,
    TensorDictModule(
        mlp_value,
        in_keys = [
            'observation',
            'action'
        ],
        out_keys = ['state_action_value']
    )
)

  logger.warn(
  logger.warn(


In [10]:
# Data Collector
collector = SyncDataCollector(
    env,
    actor,    # Removed AdaptiveGaussianWrapper for now.
    frames_per_batch=1000,
    total_frames=1000000,
)

In [11]:
from torchrl.data import TensorDictReplayBuffer, LazyTensorStorage

# Replay Buffer
buffer = TensorDictReplayBuffer(
    storage=LazyTensorStorage(
        max_size=100000,
    ),
)

In [12]:
import torch
from torchrl.objectives import DDPGLoss

# Loss Module
loss_fn = DDPGLoss(
    actor, critic
)

optim = torch.optim.Adam(
    loss_fn.parameters(),
    lr=2e-4,
)

In [13]:
from torchrl.trainers import Trainer

"""
Alternatively with custom train loop

for data in collector:
    buffer.extend(data)
    sample = buffer.sample(50)
    loss = loss_fn(sample)
    loss = loss['loss_actor'] + loss['loss_value']
    loss.backward()
    optim.step()
    optim.zero_grad()
"""


# Trainer
trainer = Trainer(
    collector=collector,
    total_frames=10000,
    frame_skip=1,
    optim_steps_per_batch=1,
    loss_module=loss_fn,
    optimizer=optim,
)

trainer.train()



  0%|          | 1000/1000000 [00:05<1:30:54, 183.17it/s][A
  0%|          | 1000/1000000 [00:05<1:30:54, 183.17it/s][A
  0%|          | 2000/1000000 [00:06<45:22, 366.60it/s]  [A
  0%|          | 2000/1000000 [00:06<45:22, 366.60it/s][A
  0%|          | 3000/1000000 [00:06<30:05, 552.31it/s][A
  0%|          | 0/1000000 [01:20<?, ?it/s] 552.31it/s][A

  0%|          | 4000/1000000 [00:07<23:41, 700.79it/s][A
  0%|          | 4000/1000000 [00:07<23:41, 700.79it/s][A
  0%|          | 5000/1000000 [00:08<19:49, 836.64it/s][A
  0%|          | 5000/1000000 [00:08<19:49, 836.64it/s][A
  1%|          | 6000/1000000 [00:09<17:06, 968.33it/s][A
  1%|          | 6000/1000000 [00:09<17:06, 968.33it/s][A
  1%|          | 7000/1000000 [00:10<15:33, 1064.25it/s][A
  1%|          | 7000/1000000 [00:10<15:33, 1064.25it/s][A
  1%|          | 8000/1000000 [00:10<14:34, 1134.75it/s][A
  1%|          | 8000/1000000 [00:10<14:34, 1134.75it/s][A
  1%|          | 9000/1000000 [00:11<13:52,

In [17]:
from torchrl.envs import TransformedEnv
# Rendering and recording
from torchrl._utils import logger as torchrl_logger
from torchrl.record import CSVLogger, VideoRecorder

path = "./training_loop"
logger = CSVLogger(exp_name="dqn", log_dir=path, video_format="mp4")
video_recorder = VideoRecorder(logger, tag="video")
record_env = TransformedEnv(
    GymEnv("Pendulum-v1", from_pixels=True, pixels_only=False), video_recorder
)

record_env.rollout(max_steps=1000, policy=actor)
video_recorder.dump()

  logger.warn(
  logger.warn(


ImportError: PyAV is not installed, and is necessary for the video operations in torchvision.
See https://github.com/mikeboers/PyAV#installation for instructions on how to
install PyAV on your system.
