In [1]:
# docs and experiment results can be found at https://docs.cleanrl.dev/rl-algorithms/dqn/#dqnpy
import os
import random
import time
from dataclasses import dataclass

import gymnasium as gym
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import tyro
from torch.utils.tensorboard import SummaryWriter

from cleanrl_utils.buffers import ReplayBuffer


@dataclass
class Args:
    # exp_name: str = os.path.basename(__file__)[: -len(".py")]
    exp_name: str = "dqn"
    """the name of this experiment"""
    seed: int = 1
    """seed of the experiment"""
    torch_deterministic: bool = True
    """if toggled, `torch.backends.cudnn.deterministic=False`"""
    cuda: bool = True
    """if toggled, cuda will be enabled by default"""
    track: bool = False
    """if toggled, this experiment will be tracked with Weights and Biases"""
    wandb_project_name: str = "cleanRL"
    """the wandb's project name"""
    wandb_entity: str = None
    """the entity (team) of wandb's project"""
    capture_video: bool = False
    """whether to capture videos of the agent performances (check out `videos` folder)"""
    save_model: bool = False
    """whether to save model into the `runs/{run_name}` folder"""
    upload_model: bool = False
    """whether to upload the saved model to huggingface"""
    hf_entity: str = ""
    """the user or org name of the model repository from the Hugging Face Hub"""

    # Algorithm specific arguments
    env_id: str = "CartPole-v1"
    """the id of the environment"""
    total_timesteps: int = 500000
    """total timesteps of the experiments"""
    learning_rate: float = 2.5e-4
    """the learning rate of the optimizer"""
    num_envs: int = 1
    """the number of parallel game environments"""
    buffer_size: int = 10000
    """the replay memory buffer size"""
    gamma: float = 0.99
    """the discount factor gamma"""
    tau: float = 1.0
    """the target network update rate"""
    target_network_frequency: int = 500
    """the timesteps it takes to update the target network"""
    batch_size: int = 128
    """the batch size of sample from the reply memory"""
    start_e: float = 1
    """the starting epsilon for exploration"""
    end_e: float = 0.05
    """the ending epsilon for exploration"""
    exploration_fraction: float = 0.5
    """the fraction of `total-timesteps` it takes from start-e to go end-e"""
    learning_starts: int = 10000
    """timestep to start learning"""
    train_frequency: int = 10
    """the frequency of training"""


def make_env(env_id, seed, idx, capture_video, run_name):
    def thunk():
        if capture_video and idx == 0:
            env = gym.make(env_id, render_mode="rgb_array")
            env = gym.wrappers.RecordVideo(env, f"videos/{run_name}")
        else:
            env = gym.make(env_id)
        env = gym.wrappers.RecordEpisodeStatistics(env)
        env.action_space.seed(seed)

        return env

    return thunk


# ALGO LOGIC: initialize agent here:
class QNetwork(nn.Module):
    def __init__(self, env):
        super().__init__()
        self.network = nn.Sequential(
            nn.Linear(np.array(env.single_observation_space.shape).prod(), 120),
            nn.ReLU(),
            nn.Linear(120, 84),
            nn.ReLU(),
            nn.Linear(84, env.single_action_space.n),
        )

    def forward(self, x):
        return self.network(x)


def linear_schedule(start_e: float, end_e: float, duration: int, t: int):
    slope = (end_e - start_e) / duration
    return max(slope * t + start_e, end_e)


if __name__ == "__main__":
    # args = tyro.cli(Args)
    args = tyro.cli(Args, args=["--env-id", "CartPole-v1", "--capture_video"])
    assert args.num_envs == 1, "vectorized envs are not supported at the moment"
    run_name = f"{args.env_id}__{args.exp_name}__{args.seed}__{int(time.time())}"
    if args.track:
        import wandb

        wandb.init(
            project=args.wandb_project_name,
            entity=args.wandb_entity,
            sync_tensorboard=True,
            config=vars(args),
            name=run_name,
            monitor_gym=True,
            save_code=True,
        )
    writer = SummaryWriter(f"runs/{run_name}")
    writer.add_text(
        "hyperparameters",
        "|param|value|\n|-|-|\n%s" % ("\n".join([f"|{key}|{value}|" for key, value in vars(args).items()])),
    )

    # TRY NOT TO MODIFY: seeding
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.backends.cudnn.deterministic = args.torch_deterministic

    device = torch.device("cuda" if torch.cuda.is_available() and args.cuda else "cpu")

    # env setup
    envs = gym.vector.SyncVectorEnv(
        [make_env(args.env_id, args.seed + i, i, args.capture_video, run_name) for i in range(args.num_envs)]
    )
    assert isinstance(envs.single_action_space, gym.spaces.Discrete), "only discrete action space is supported"

    q_network = QNetwork(envs).to(device)
    optimizer = optim.Adam(q_network.parameters(), lr=args.learning_rate)
    target_network = QNetwork(envs).to(device)
    target_network.load_state_dict(q_network.state_dict())

    rb = ReplayBuffer(
        args.buffer_size,
        envs.single_observation_space,
        envs.single_action_space,
        device,
        handle_timeout_termination=False,
    )
    start_time = time.time()

    # TRY NOT TO MODIFY: start the game
    obs, _ = envs.reset(seed=args.seed)
    for global_step in range(args.total_timesteps):
        # ALGO LOGIC: put action logic here
        epsilon = linear_schedule(args.start_e, args.end_e, args.exploration_fraction * args.total_timesteps, global_step)
        if random.random() < epsilon:
            actions = np.array([envs.single_action_space.sample() for _ in range(envs.num_envs)])
        else:
            q_values = q_network(torch.Tensor(obs).to(device))
            actions = torch.argmax(q_values, dim=1).cpu().numpy()

        # TRY NOT TO MODIFY: execute the game and log data.
        next_obs, rewards, terminations, truncations, infos = envs.step(actions)

        # TRY NOT TO MODIFY: record rewards for plotting purposes
        if "final_info" in infos:
            for info in infos["final_info"]:
                if info and "episode" in info:
                    print(f"global_step={global_step}, episodic_return={info['episode']['r']}")
                    writer.add_scalar("charts/episodic_return", info["episode"]["r"], global_step)
                    writer.add_scalar("charts/episodic_length", info["episode"]["l"], global_step)

        # TRY NOT TO MODIFY: save data to reply buffer; handle `final_observation`
        real_next_obs = next_obs.copy()
        for idx, trunc in enumerate(truncations):
            if trunc:
                real_next_obs[idx] = infos["final_observation"][idx]
        rb.add(obs, real_next_obs, actions, rewards, terminations, infos)

        # TRY NOT TO MODIFY: CRUCIAL step easy to overlook
        obs = next_obs

        # ALGO LOGIC: training.
        if global_step > args.learning_starts:
            if global_step % args.train_frequency == 0:
                data = rb.sample(args.batch_size)
                with torch.no_grad():
                    target_max, _ = target_network(data.next_observations).max(dim=1)
                    td_target = data.rewards.flatten() + args.gamma * target_max * (1 - data.dones.flatten())
                old_val = q_network(data.observations).gather(1, data.actions).squeeze()
                loss = F.mse_loss(td_target, old_val)

                if global_step % 100 == 0:
                    writer.add_scalar("losses/td_loss", loss, global_step)
                    writer.add_scalar("losses/q_values", old_val.mean().item(), global_step)
                    print("SPS:", int(global_step / (time.time() - start_time)))
                    writer.add_scalar("charts/SPS", int(global_step / (time.time() - start_time)), global_step)

                # optimize the model
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

            # update target network
            if global_step % args.target_network_frequency == 0:
                for target_network_param, q_network_param in zip(target_network.parameters(), q_network.parameters()):
                    target_network_param.data.copy_(
                        args.tau * q_network_param.data + (1.0 - args.tau) * target_network_param.data
                    )

    if args.save_model:
        model_path = f"runs/{run_name}/{args.exp_name}.cleanrl_model"
        torch.save(q_network.state_dict(), model_path)
        print(f"model saved to {model_path}")
        from cleanrl_utils.evals.dqn_eval import evaluate

        episodic_returns = evaluate(
            model_path,
            make_env,
            args.env_id,
            eval_episodes=10,
            run_name=f"{run_name}-eval",
            Model=QNetwork,
            device=device,
            epsilon=args.end_e,
        )
        for idx, episodic_return in enumerate(episodic_returns):
            writer.add_scalar("eval/episodic_return", episodic_return, idx)

        if args.upload_model:
            from cleanrl_utils.huggingface import push_to_hub

            repo_name = f"{args.env_id}-{args.exp_name}-seed{args.seed}"
            repo_id = f"{args.hf_entity}/{repo_name}" if args.hf_entity else repo_name
            push_to_hub(args, episodic_returns, repo_id, "DQN", f"runs/{run_name}", f"videos/{run_name}-eval")

    envs.close()
    writer.close()

  DESCRIPTOR = _descriptor.FileDescriptor(
  _descriptor.FieldDescriptor(
  _HISTOGRAMPROTO = _descriptor.Descriptor(
  DESCRIPTOR = _descriptor.FileDescriptor(
  _descriptor.FieldDescriptor(
  _TENSORSHAPEPROTO_DIM = _descriptor.Descriptor(
  DESCRIPTOR = _descriptor.FileDescriptor(
  _descriptor.EnumValueDescriptor(
  _DATATYPE = _descriptor.EnumDescriptor(
  _descriptor.FieldDescriptor(
  _SERIALIZEDDTYPE = _descriptor.Descriptor(
  DESCRIPTOR = _descriptor.FileDescriptor(
  _descriptor.FieldDescriptor(
  _RESOURCEHANDLEPROTO_DTYPEANDSHAPE = _descriptor.Descriptor(
  DESCRIPTOR = _descriptor.FileDescriptor(
  _descriptor.FieldDescriptor(
  _TENSORPROTO = _descriptor.Descriptor(
objc[77457]: Class SDLApplication is implemented in both /Users/shibo/Workspace/cleanrl/.venv/lib/python3.9/site-packages/cv2/.dylibs/libSDL2-2.0.0.dylib (0x130b80890) and /Users/shibo/Workspace/cleanrl/.venv/lib/python3.9/site-packages/pygame/.dylibs/libSDL2-2.0.0.dylib (0x1386606e0). This may cause spurious

Moviepy - Building video /Users/shibo/Workspace/cleanrl/videos/CartPole-v1__dqn__1__1771880544/rl-video-episode-0.mp4.
Moviepy - Writing video /Users/shibo/Workspace/cleanrl/videos/CartPole-v1__dqn__1__1771880544/rl-video-episode-0.mp4



                                                   

Moviepy - Done !
Moviepy - video ready /Users/shibo/Workspace/cleanrl/videos/CartPole-v1__dqn__1__1771880544/rl-video-episode-0.mp4
global_step=28, episodic_return=[29.]
Moviepy - Building video /Users/shibo/Workspace/cleanrl/videos/CartPole-v1__dqn__1__1771880544/rl-video-episode-1.mp4.
Moviepy - Writing video /Users/shibo/Workspace/cleanrl/videos/CartPole-v1__dqn__1__1771880544/rl-video-episode-1.mp4



                                                   

Moviepy - Done !
Moviepy - video ready /Users/shibo/Workspace/cleanrl/videos/CartPole-v1__dqn__1__1771880544/rl-video-episode-1.mp4
global_step=38, episodic_return=[10.]
global_step=49, episodic_return=[11.]
global_step=85, episodic_return=[36.]
global_step=98, episodic_return=[13.]
global_step=114, episodic_return=[16.]
global_step=131, episodic_return=[17.]
global_step=150, episodic_return=[19.]




Moviepy - Building video /Users/shibo/Workspace/cleanrl/videos/CartPole-v1__dqn__1__1771880544/rl-video-episode-8.mp4.
Moviepy - Writing video /Users/shibo/Workspace/cleanrl/videos/CartPole-v1__dqn__1__1771880544/rl-video-episode-8.mp4



                                                   

Moviepy - Done !
Moviepy - video ready /Users/shibo/Workspace/cleanrl/videos/CartPole-v1__dqn__1__1771880544/rl-video-episode-8.mp4
global_step=187, episodic_return=[37.]
global_step=214, episodic_return=[27.]
global_step=229, episodic_return=[15.]
global_step=242, episodic_return=[13.]
global_step=265, episodic_return=[23.]
global_step=275, episodic_return=[10.]
global_step=304, episodic_return=[29.]
global_step=322, episodic_return=[18.]
global_step=333, episodic_return=[11.]
global_step=344, episodic_return=[11.]
global_step=390, episodic_return=[46.]
global_step=401, episodic_return=[11.]
global_step=433, episodic_return=[32.]
global_step=480, episodic_return=[47.]
global_step=496, episodic_return=[16.]
global_step=522, episodic_return=[26.]
global_step=556, episodic_return=[34.]
global_step=572, episodic_return=[16.]
global_step=591, episodic_return=[19.]
Moviepy - Building video /Users/shibo/Workspace/cleanrl/videos/CartPole-v1__dqn__1__1771880544/rl-video-episode-27.mp4.
Moviepy

                                                   

Moviepy - Done !
Moviepy - video ready /Users/shibo/Workspace/cleanrl/videos/CartPole-v1__dqn__1__1771880544/rl-video-episode-27.mp4
global_step=613, episodic_return=[22.]
global_step=637, episodic_return=[24.]
global_step=652, episodic_return=[15.]
global_step=663, episodic_return=[11.]
global_step=680, episodic_return=[17.]
global_step=693, episodic_return=[13.]
global_step=723, episodic_return=[30.]
global_step=739, episodic_return=[16.]
global_step=805, episodic_return=[66.]
global_step=836, episodic_return=[31.]
global_step=855, episodic_return=[19.]
global_step=880, episodic_return=[25.]
global_step=899, episodic_return=[19.]
global_step=909, episodic_return=[10.]
global_step=952, episodic_return=[43.]
global_step=973, episodic_return=[21.]
global_step=1005, episodic_return=[32.]
global_step=1016, episodic_return=[11.]
global_step=1064, episodic_return=[48.]
global_step=1088, episodic_return=[24.]
global_step=1100, episodic_return=[12.]
global_step=1113, episodic_return=[13.]
glo

                                                   

Moviepy - Done !
Moviepy - video ready /Users/shibo/Workspace/cleanrl/videos/CartPole-v1__dqn__1__1771880544/rl-video-episode-64.mp4
global_step=1414, episodic_return=[23.]
global_step=1442, episodic_return=[28.]
global_step=1456, episodic_return=[14.]
global_step=1498, episodic_return=[42.]
global_step=1531, episodic_return=[33.]
global_step=1550, episodic_return=[19.]
global_step=1565, episodic_return=[15.]
global_step=1577, episodic_return=[12.]
global_step=1593, episodic_return=[16.]
global_step=1615, episodic_return=[22.]
global_step=1646, episodic_return=[31.]
global_step=1668, episodic_return=[22.]
global_step=1686, episodic_return=[18.]
global_step=1698, episodic_return=[12.]
global_step=1726, episodic_return=[28.]
global_step=1738, episodic_return=[12.]
global_step=1755, episodic_return=[17.]
global_step=1773, episodic_return=[18.]
global_step=1786, episodic_return=[13.]
global_step=1797, episodic_return=[11.]
global_step=1819, episodic_return=[22.]
global_step=1828, episodic_

                                                   

Moviepy - Done !
Moviepy - video ready /Users/shibo/Workspace/cleanrl/videos/CartPole-v1__dqn__1__1771880544/rl-video-episode-125.mp4
global_step=2720, episodic_return=[11.]
global_step=2734, episodic_return=[14.]
global_step=2754, episodic_return=[20.]
global_step=2766, episodic_return=[12.]
global_step=2794, episodic_return=[28.]
global_step=2879, episodic_return=[85.]
global_step=2900, episodic_return=[21.]
global_step=2915, episodic_return=[15.]
global_step=2939, episodic_return=[24.]
global_step=2958, episodic_return=[19.]
global_step=2978, episodic_return=[20.]
global_step=2995, episodic_return=[17.]
global_step=3016, episodic_return=[21.]
global_step=3028, episodic_return=[12.]
global_step=3041, episodic_return=[13.]
global_step=3074, episodic_return=[33.]
global_step=3118, episodic_return=[44.]
global_step=3133, episodic_return=[15.]
global_step=3149, episodic_return=[16.]
global_step=3176, episodic_return=[27.]
global_step=3189, episodic_return=[13.]
global_step=3227, episodic

                                                   

Moviepy - Done !
Moviepy - video ready /Users/shibo/Workspace/cleanrl/videos/CartPole-v1__dqn__1__1771880544/rl-video-episode-216.mp4
global_step=4747, episodic_return=[21.]
global_step=4765, episodic_return=[18.]
global_step=4792, episodic_return=[27.]
global_step=4818, episodic_return=[26.]
global_step=4833, episodic_return=[15.]
global_step=4869, episodic_return=[36.]
global_step=4882, episodic_return=[13.]
global_step=4915, episodic_return=[33.]
global_step=4939, episodic_return=[24.]
global_step=4955, episodic_return=[16.]
global_step=4970, episodic_return=[15.]
global_step=4990, episodic_return=[20.]
global_step=5025, episodic_return=[35.]
global_step=5054, episodic_return=[29.]
global_step=5081, episodic_return=[27.]
global_step=5094, episodic_return=[13.]
global_step=5127, episodic_return=[33.]
global_step=5140, episodic_return=[13.]
global_step=5156, episodic_return=[16.]
global_step=5185, episodic_return=[29.]
global_step=5204, episodic_return=[19.]
global_step=5227, episodic



global_step=5487, episodic_return=[28.]
global_step=5536, episodic_return=[49.]
global_step=5546, episodic_return=[10.]
global_step=5590, episodic_return=[44.]
global_step=5605, episodic_return=[15.]
global_step=5620, episodic_return=[15.]
global_step=5631, episodic_return=[11.]
global_step=5666, episodic_return=[35.]
global_step=5686, episodic_return=[20.]
global_step=5701, episodic_return=[15.]
global_step=5722, episodic_return=[21.]
global_step=5731, episodic_return=[9.]
global_step=5740, episodic_return=[9.]
global_step=5770, episodic_return=[30.]
global_step=5843, episodic_return=[73.]
global_step=5876, episodic_return=[33.]
global_step=5901, episodic_return=[25.]
global_step=5912, episodic_return=[11.]
global_step=5926, episodic_return=[14.]
global_step=5944, episodic_return=[18.]
global_step=5961, episodic_return=[17.]
global_step=5987, episodic_return=[26.]
global_step=6000, episodic_return=[13.]
global_step=6023, episodic_return=[23.]
global_step=6046, episodic_return=[23.]
gl

                                                   

Moviepy - Done !
Moviepy - video ready /Users/shibo/Workspace/cleanrl/videos/CartPole-v1__dqn__1__1771880544/rl-video-episode-343.mp4
global_step=7698, episodic_return=[28.]
global_step=7709, episodic_return=[11.]
global_step=7723, episodic_return=[14.]
global_step=7742, episodic_return=[19.]
global_step=7756, episodic_return=[14.]
global_step=7798, episodic_return=[42.]
global_step=7817, episodic_return=[19.]
global_step=7835, episodic_return=[18.]
global_step=7860, episodic_return=[25.]
global_step=7879, episodic_return=[19.]
global_step=7918, episodic_return=[39.]
global_step=7933, episodic_return=[15.]
global_step=7967, episodic_return=[34.]
global_step=8023, episodic_return=[56.]
global_step=8051, episodic_return=[28.]
global_step=8064, episodic_return=[13.]
global_step=8107, episodic_return=[43.]
global_step=8144, episodic_return=[37.]
global_step=8179, episodic_return=[35.]
global_step=8189, episodic_return=[10.]
global_step=8208, episodic_return=[19.]
global_step=8243, episodic



global_step=9711, episodic_return=[21.]
global_step=9742, episodic_return=[31.]
global_step=9757, episodic_return=[15.]
global_step=9782, episodic_return=[25.]
global_step=9796, episodic_return=[14.]
global_step=9874, episodic_return=[78.]
global_step=9903, episodic_return=[29.]
global_step=9917, episodic_return=[14.]
global_step=9933, episodic_return=[16.]
global_step=9952, episodic_return=[19.]
global_step=9966, episodic_return=[14.]
global_step=9994, episodic_return=[28.]
global_step=10009, episodic_return=[15.]
global_step=10045, episodic_return=[36.]
global_step=10057, episodic_return=[12.]
global_step=10079, episodic_return=[22.]
global_step=10092, episodic_return=[13.]
SPS: 5117
global_step=10113, episodic_return=[21.]
global_step=10143, episodic_return=[30.]
global_step=10155, episodic_return=[12.]
SPS: 5139
global_step=10201, episodic_return=[46.]
global_step=10227, episodic_return=[26.]
global_step=10257, episodic_return=[30.]
SPS: 5162
global_step=10312, episodic_return=[55.

                                                   

Moviepy - Done !
Moviepy - video ready /Users/shibo/Workspace/cleanrl/videos/CartPole-v1__dqn__1__1771880544/rl-video-episode-512.mp4
global_step=11569, episodic_return=[19.]
global_step=11592, episodic_return=[23.]
SPS: 5149
global_step=11609, episodic_return=[17.]
global_step=11627, episodic_return=[18.]
global_step=11639, episodic_return=[12.]
global_step=11657, episodic_return=[18.]
global_step=11669, episodic_return=[12.]
global_step=11681, episodic_return=[12.]
SPS: 5165
global_step=11701, episodic_return=[20.]
global_step=11716, episodic_return=[15.]
global_step=11744, episodic_return=[28.]
global_step=11775, episodic_return=[31.]
global_step=11793, episodic_return=[18.]
SPS: 5183
global_step=11819, episodic_return=[26.]
global_step=11846, episodic_return=[27.]
global_step=11866, episodic_return=[20.]
global_step=11886, episodic_return=[20.]
global_step=11898, episodic_return=[12.]
SPS: 5203
global_step=11908, episodic_return=[10.]
global_step=11916, episodic_return=[8.]
global_



global_step=12294, episodic_return=[23.]
SPS: 5264
global_step=12324, episodic_return=[30.]
global_step=12353, episodic_return=[29.]
global_step=12366, episodic_return=[13.]
global_step=12396, episodic_return=[30.]
SPS: 5282
global_step=12425, episodic_return=[29.]
global_step=12440, episodic_return=[15.]
global_step=12452, episodic_return=[12.]
global_step=12481, episodic_return=[29.]
global_step=12494, episodic_return=[13.]
SPS: 5298
global_step=12504, episodic_return=[10.]
global_step=12524, episodic_return=[20.]
global_step=12534, episodic_return=[10.]
global_step=12549, episodic_return=[15.]
global_step=12576, episodic_return=[27.]
global_step=12592, episodic_return=[16.]
SPS: 5309
global_step=12609, episodic_return=[17.]
global_step=12634, episodic_return=[25.]
global_step=12647, episodic_return=[13.]
global_step=12661, episodic_return=[14.]
global_step=12682, episodic_return=[21.]
global_step=12694, episodic_return=[12.]
SPS: 5324
global_step=12713, episodic_return=[19.]
global_

                                                   

Moviepy - Done !
Moviepy - video ready /Users/shibo/Workspace/cleanrl/videos/CartPole-v1__dqn__1__1771880544/rl-video-episode-729.mp4
global_step=16021, episodic_return=[17.]
global_step=16035, episodic_return=[14.]
global_step=16091, episodic_return=[56.]
SPS: 5586
global_step=16101, episodic_return=[10.]
global_step=16111, episodic_return=[10.]
global_step=16133, episodic_return=[22.]
global_step=16149, episodic_return=[16.]
global_step=16195, episodic_return=[46.]
SPS: 5600
global_step=16250, episodic_return=[55.]
global_step=16265, episodic_return=[15.]
global_step=16273, episodic_return=[8.]
SPS: 5614
global_step=16306, episodic_return=[33.]
global_step=16319, episodic_return=[13.]
global_step=16333, episodic_return=[14.]
global_step=16368, episodic_return=[35.]
global_step=16378, episodic_return=[10.]
global_step=16391, episodic_return=[13.]
SPS: 5626
global_step=16403, episodic_return=[12.]
global_step=16415, episodic_return=[12.]
global_step=16431, episodic_return=[16.]
global_



global_step=16664, episodic_return=[15.]
global_step=16677, episodic_return=[13.]
global_step=16692, episodic_return=[15.]
SPS: 5663
global_step=16709, episodic_return=[17.]
global_step=16737, episodic_return=[28.]
global_step=16761, episodic_return=[24.]
global_step=16785, episodic_return=[24.]
global_step=16796, episodic_return=[11.]
SPS: 5675
global_step=16829, episodic_return=[33.]
global_step=16858, episodic_return=[29.]
global_step=16893, episodic_return=[35.]
SPS: 5687
global_step=16911, episodic_return=[18.]
global_step=16927, episodic_return=[16.]
global_step=16952, episodic_return=[25.]
global_step=16993, episodic_return=[41.]
SPS: 5700
global_step=17013, episodic_return=[20.]
global_step=17038, episodic_return=[25.]
global_step=17055, episodic_return=[17.]
global_step=17067, episodic_return=[12.]
global_step=17080, episodic_return=[13.]
SPS: 5711
global_step=17129, episodic_return=[49.]
global_step=17140, episodic_return=[11.]
global_step=17153, episodic_return=[13.]
global_

                                                   

Moviepy - Done !
Moviepy - video ready /Users/shibo/Workspace/cleanrl/videos/CartPole-v1__dqn__1__1771880544/rl-video-episode-1000.mp4
global_step=22787, episodic_return=[14.]
SPS: 6111
global_step=22847, episodic_return=[60.]
SPS: 6121
global_step=22907, episodic_return=[60.]
global_step=22932, episodic_return=[25.]
global_step=22964, episodic_return=[32.]
global_step=22992, episodic_return=[28.]
SPS: 6129
global_step=23005, episodic_return=[13.]
global_step=23032, episodic_return=[27.]
global_step=23052, episodic_return=[20.]




global_step=23068, episodic_return=[16.]
SPS: 6134
global_step=23111, episodic_return=[43.]
global_step=23121, episodic_return=[10.]
global_step=23148, episodic_return=[27.]
global_step=23170, episodic_return=[22.]
global_step=23181, episodic_return=[11.]
SPS: 6140
global_step=23208, episodic_return=[27.]
global_step=23233, episodic_return=[25.]
global_step=23254, episodic_return=[21.]
SPS: 6148
global_step=23345, episodic_return=[91.]
global_step=23379, episodic_return=[34.]
SPS: 6159
global_step=23423, episodic_return=[44.]
global_step=23479, episodic_return=[56.]
SPS: 6169
global_step=23515, episodic_return=[36.]
global_step=23537, episodic_return=[22.]
global_step=23572, episodic_return=[35.]
global_step=23598, episodic_return=[26.]
SPS: 6178
global_step=23634, episodic_return=[36.]
global_step=23646, episodic_return=[12.]
global_step=23689, episodic_return=[43.]
SPS: 6187
global_step=23716, episodic_return=[27.]
global_step=23742, episodic_return=[26.]
SPS: 6197
global_step=23815,

                                                   

Moviepy - Done !
Moviepy - video ready /Users/shibo/Workspace/cleanrl/videos/CartPole-v1__dqn__1__1771880544/rl-video-episode-2000.mp4
global_step=54623, episodic_return=[13.]
global_step=54692, episodic_return=[69.]
SPS: 7405
global_step=54707, episodic_return=[15.]
global_step=54771, episodic_return=[64.]
SPS: 7408
global_step=54816, episodic_return=[45.]
global_step=54847, episodic_return=[31.]
global_step=54868, episodic_return=[21.]
SPS: 7410
global_step=54936, episodic_return=[68.]
global_step=54959, episodic_return=[23.]
SPS: 7412
global_step=55011, episodic_return=[52.]
global_step=55026, episodic_return=[15.]
global_step=55038, episodic_return=[12.]
SPS: 7415




global_step=55186, episodic_return=[148.]
global_step=55197, episodic_return=[11.]
SPS: 7417
global_step=55254, episodic_return=[57.]
global_step=55268, episodic_return=[14.]
SPS: 7419
global_step=55315, episodic_return=[47.]
global_step=55348, episodic_return=[33.]
global_step=55396, episodic_return=[48.]
SPS: 7421
global_step=55425, episodic_return=[29.]
global_step=55456, episodic_return=[31.]
global_step=55485, episodic_return=[29.]
SPS: 7423
global_step=55531, episodic_return=[46.]
global_step=55546, episodic_return=[15.]
global_step=55561, episodic_return=[15.]
SPS: 7425
global_step=55609, episodic_return=[48.]
global_step=55628, episodic_return=[19.]
global_step=55648, episodic_return=[20.]
global_step=55683, episodic_return=[35.]
SPS: 7426
global_step=55724, episodic_return=[41.]
global_step=55762, episodic_return=[38.]
SPS: 7429
global_step=55837, episodic_return=[75.]
global_step=55851, episodic_return=[14.]
global_step=55892, episodic_return=[41.]
SPS: 7431
global_step=55944

                                                                

Moviepy - Done !
Moviepy - video ready /Users/shibo/Workspace/cleanrl/videos/CartPole-v1__dqn__1__1771880544/rl-video-episode-3000.mp4
global_step=106344, episodic_return=[105.]
global_step=106366, episodic_return=[22.]
SPS: 8008
global_step=106446, episodic_return=[80.]
global_step=106497, episodic_return=[51.]
SPS: 8009
global_step=106558, episodic_return=[61.]
SPS: 8009
SPS: 8011
SPS: 8011
global_step=106801, episodic_return=[243.]




global_step=106862, episodic_return=[61.]
SPS: 8012
global_step=106987, episodic_return=[125.]
SPS: 8012
global_step=107004, episodic_return=[17.]
SPS: 8013
global_step=107193, episodic_return=[189.]
SPS: 8013
SPS: 8014
global_step=107301, episodic_return=[108.]
SPS: 8015
global_step=107428, episodic_return=[127.]
global_step=107498, episodic_return=[70.]
SPS: 8016
global_step=107531, episodic_return=[33.]
SPS: 8016
global_step=107669, episodic_return=[138.]
global_step=107680, episodic_return=[11.]
SPS: 8018
global_step=107718, episodic_return=[38.]
global_step=107735, episodic_return=[17.]
global_step=107771, episodic_return=[36.]
SPS: 8018
global_step=107836, episodic_return=[65.]
global_step=107866, episodic_return=[30.]
SPS: 8019
global_step=107970, episodic_return=[104.]
global_step=107988, episodic_return=[18.]
SPS: 8021
global_step=108028, episodic_return=[40.]
SPS: 8022
global_step=108133, episodic_return=[105.]
SPS: 8023
global_step=108214, episodic_return=[81.]
SPS: 8024
SPS

                                                                

Moviepy - Done !
Moviepy - video ready /Users/shibo/Workspace/cleanrl/videos/CartPole-v1__dqn__1__1771880544/rl-video-episode-4000.mp4
global_step=410622, episodic_return=[500.]
SPS: 8276
SPS: 8276
SPS: 8276
SPS: 8276
SPS: 8276
global_step=411122, episodic_return=[500.]
SPS: 8276
SPS: 8276
SPS: 8276
SPS: 8276
SPS: 8277
global_step=411622, episodic_return=[500.]
SPS: 8277
SPS: 8277
SPS: 8277
SPS: 8277
SPS: 8277
global_step=412122, episodic_return=[500.]
SPS: 8277
SPS: 8277
SPS: 8277
SPS: 8277
SPS: 8277
global_step=412622, episodic_return=[500.]
SPS: 8278
SPS: 8278
SPS: 8278
SPS: 8278
SPS: 8278
global_step=413122, episodic_return=[500.]
SPS: 8278
SPS: 8278
SPS: 8278
SPS: 8278
SPS: 8279
global_step=413622, episodic_return=[500.]
SPS: 8279
SPS: 8279
SPS: 8279
SPS: 8279
SPS: 8279
global_step=414122, episodic_return=[500.]
SPS: 8279
SPS: 8279
SPS: 8280
SPS: 8280
SPS: 8280
global_step=414622, episodic_return=[500.]
SPS: 8280
SPS: 8280
SPS: 8280
SPS: 8280
SPS: 8280
global_step=415122, episodic