In [22]:
import gym
import random
import copy
import numpy as np
from collections import deque
from gym.spaces.box import Box
from gym import wrappers
from gym.wrappers import TransformObservation
import itertools
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import AdamW
from torch.utils.data import DataLoader, IterableDataset

from pytorch_lightning import LightningModule, Trainer
from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint

from gym.wrappers import RecordVideo, RecordEpisodeStatistics, TimeLimit, AtariPreprocessing


device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
num_gpus = torch.cuda.device_count()

In [23]:
class DRQN(nn.Module):
    def __init__(self, state_size , n_actions):
        super(DRQN, self).__init__()
        
        self.state_size = state_size
        self.seq = nn.Sequential(
                     nn.Linear(state_size[0], 128),
                     nn.ReLU(),
                     nn.Linear(128, 128),
                     nn.ReLU(),
                     nn.Linear(128, 128)    
                    )
        self.lstm = nn.LSTMCell(128, 256)
        self.linear = nn.Linear(256, n_actions)
        
   
    def forward(self, x, prev_state=None):        
        o = self.seq(x.float())
        if prev_state is not None:
            hs, ct = self.lstm(o, prev_state)
        else:
            hs = torch.zeros(x.shape[0], 256).to(device)
            ct = torch.zeros(x.shape[0], 256).to(device)
            hs, ct = self.lstm(o, (hs,ct))    
        q_values = self.linear(hs)
        return q_values, (hs, ct)

In [24]:
def epsilon_greedy(state, env, net, hidden=None, epsilon=0.0):
    if np.random.random() < epsilon:
        action = env.action_space.sample()
    else:
        state = state.to(device)
        q_values, hidden = net(state, hidden)
        _, action = torch.max(q_values, dim=1)
        action = int(action.item())
    return action, hidden

In [25]:
def slice_deque(buffer, start, stop, step):
    buffer.rotate(-start)
    slice = list(itertools.islice(buffer, 0, stop-start, step))
    buffer.rotate(start)
    return slice

class ReplayBuffer:
    def __init__(self, capacity):
        self.buffer = deque(maxlen=capacity)
        
    def __len__(self):
        return len(self.buffer)
    
    def append(self, experience):
        self.buffer.append(experience)
        
    def sample(self, sample_size, sequence_len):
        batch = []
        while len(batch) < sample_size :
            start = random.randint(0, len(self.buffer) - sequence_len)
            sequenze_sample = slice_deque(self.buffer, start, start + sequence_len, 1)
            batch += sequenze_sample
            
        return batch

In [26]:
class RLDataset(IterableDataset):
    def __init__(self, buffer, sample_size=400, sequence_len=5):
        self.buffer = buffer
        self.sample_size = sample_size
        self.sequence_len = sequence_len
        
    def __iter__(self):
        for experience in self.buffer.sample(self.sample_size,self.sequence_len):
            yield experience

In [27]:
def create_environment(name):
    env = gym.make(name, render_mode="rgb_array")
    env = TimeLimit(env, max_episode_steps=400)
    env = RecordVideo(env, video_folder='./videos/DRQN_LunarLander', episode_trigger=lambda x: x % 100 == 0)
    env = RecordEpisodeStatistics(env)
    return env



In [28]:
class DeepQLearning(LightningModule):
    def __init__(self, env_name, policy=epsilon_greedy, capacity=100_000, 
               batch_size=256, lr=1e-3, hidden_size=128, gamma=0.99, 
               loss_fn=F.smooth_l1_loss, optim=AdamW, eps_start=1.0, eps_end=0.15, 
               eps_last_episode=100, samples_per_epoch=1024, sync_rate=10,
               sequence_length = 4):
    
        super().__init__()
        self.env = create_environment(env_name)

        obs_size = self.env.observation_space.shape
        n_actions = self.env.action_space.n

        self.q_net = DRQN(obs_size, n_actions)

        self.target_q_net = copy.deepcopy(self.q_net)

        self.policy = policy
        self.buffer = ReplayBuffer(capacity=capacity)
        self.save_hyperparameters()

        while len(self.buffer) < self.hparams.samples_per_epoch:
            print(f"{len(self.buffer)} samples in experience buffer. Filling...")
            self.play_episode(epsilon=self.hparams.eps_start)
            
    @torch.no_grad()
    def play_episode(self, policy=None, epsilon=0.):
        state  = self.env.reset()
        state  = torch.from_numpy(state[0])
        hidden = None
        done = False
        step = 0
        while not done:
            if policy:
                action, hidden = policy(state.unsqueeze(dim=0), self.env, self.q_net, hidden, epsilon=epsilon)
            else:
                action = self.env.action_space.sample()
            next_state, reward, done, tru , _ = self.env.step(action)
            if tru:
                done = tru
            step += 1 
            next_state = torch.from_numpy(next_state)   
            exp = (step, state, action, reward, done, next_state)
            self.buffer.append(exp)
            state = next_state
            
        self.env.close()
        
        
    def forward(self, x):
        return self.q_net(x)

    
    def configure_optimizers(self):
        q_net_optimizer = self.hparams.optim(self.q_net.parameters(), lr=self.hparams.lr)
        return [q_net_optimizer]

     # Create dataloader.
    def train_dataloader(self):
        dataset = RLDataset(self.buffer, self.hparams.samples_per_epoch, self.hparams.sequence_length)
       
        dataloader = DataLoader(
            dataset=dataset,
            batch_size=self.hparams.batch_size * self.hparams.sequence_length
        )
        return dataloader
    
    def training_step(self, batch, batch_idx):
        steps, states, actions, rewards, dones, next_states = batch
        actions = actions.unsqueeze(1)
        rewards = rewards.unsqueeze(1)
        dones = dones.unsqueeze(1)
        
        stack_q_values = []
        stack_actions = []
        stack_rewards = []
        stack_dones = []
        hidden = None
        for i in range(self.hparams.sequence_length):
            stack_actions.append(actions[i::self.hparams.sequence_length])
            stack_dones.append(dones[i::self.hparams.sequence_length])
            stack_rewards.append(rewards[i::self.hparams.sequence_length])
            q_values, hidden  = self.q_net(states[i::self.hparams.sequence_length],hidden)
            stack_q_values.append(q_values)
        stack_q_values = torch.cat(stack_q_values,dim=0)
        stack_actions = torch.cat(stack_actions,dim=0)
        stack_dones = torch.cat(stack_dones,dim=0)
        stack_rewards = torch.cat(stack_rewards,dim=0)

    
        state_action_values = torch.gather(stack_q_values, -1, stack_actions)
        
        
        stack_next_q_values = []
        hidden = None
        for i in range(self.hparams.sequence_length):
            q_values, hidden = self.target_q_net(next_states[i::self.hparams.sequence_length],hidden)
            stack_next_q_values.append(q_values)
                
        stack_next_q_values = torch.cat(stack_next_q_values,dim=0)
        next_action_values = torch.max(stack_next_q_values, dim=1)[0].unsqueeze(dim=1)
        next_action_values[stack_dones] = 0.0

        expected_state_action_values = stack_rewards + self.hparams.gamma * next_action_values

        loss = self.hparams.loss_fn(state_action_values, expected_state_action_values)
        self.log('episode/Q-Error', loss)
        return loss
    
    # Training epoch end.
    def training_epoch_end(self, training_step_outputs):
        epsilon = max(
            self.hparams.eps_end,
            self.hparams.eps_start - self.current_epoch / self.hparams.eps_last_episode
        )

        self.play_episode(policy=self.policy, epsilon=epsilon)
        self.log('episode/Return', self.env.return_queue[-1])

        if self.current_epoch % self.hparams.sync_rate == 0:
            self.target_q_net.load_state_dict(self.q_net.state_dict())
            
    def save_model(self):
        torch.save(self.q_net.state_dict(), "./model")
        
    def load_model(self):
        self.q_net.load_state_dict(torch.load( "./model"))


In [29]:
algo = DeepQLearning('LunarLander-v2')

checkpoint_callback = ModelCheckpoint(dirpath="./checkpoints/DRQN-LunarLander", save_top_k=1,mode="max", monitor="episode/Return")

trainer = Trainer(
     accelerator='gpu',
     devices=num_gpus,
     max_epochs=10_000,
     callbacks=[checkpoint_callback,EarlyStopping(monitor='episode/Return', mode='max', patience=500)]
)

trainer.fit(algo)

  logger.warn(


0 samples in experience buffer. Filling...
Moviepy - Building video D:\multi_agents_rl\videos\DRQN_LunarLander\rl-video-episode-0.mp4.
Moviepy - Writing video D:\multi_agents_rl\videos\DRQN_LunarLander\rl-video-episode-0.mp4



GPU available: True (cuda), used: True                                                                                                                                         
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name         | Type | Params
--------------------------------------
0 | q_net        | DRQN | 430 K 
1 | target_q_net | DRQN | 430 K 
--------------------------------------
860 K     Trainable params
0         Non-trainable params
860 K     Total params
3.444     Total estimated model params size (MB)


Moviepy - Done !
Moviepy - video ready D:\multi_agents_rl\videos\DRQN_LunarLander\rl-video-episode-0.mp4
111 samples in experience buffer. Filling...
200 samples in experience buffer. Filling...
312 samples in experience buffer. Filling...
429 samples in experience buffer. Filling...
525 samples in experience buffer. Filling...
611 samples in experience buffer. Filling...
750 samples in experience buffer. Filling...
874 samples in experience buffer. Filling...
945 samples in experience buffer. Filling...
1015 samples in experience buffer. Filling...


  rank_zero_warn(


Moviepy - Building video D:\multi_agents_rl\videos\DRQN_LunarLander\rl-video-episode-100.mp4.
Moviepy - Writing video D:\multi_agents_rl\videos\DRQN_LunarLander\rl-video-episode-100.mp4

Epoch 88: : 1it [00:00,  2.69it/s, loss=2, v_num=12]


t:   0%|                                                                                                                                       | 0/1 [00:00<?, ?it/s, now=None][A
                                                                                                                                                                               [A

Moviepy - Done !                                    
Moviepy - video ready D:\multi_agents_rl\videos\DRQN_LunarLander\rl-video-episode-100.mp4
Moviepy - Building video D:\multi_agents_rl\videos\DRQN_LunarLander\rl-video-episode-100.mp4.
Moviepy - Writing video D:\multi_agents_rl\videos\DRQN_LunarLander\rl-video-episode-100.mp4

Epoch 89: : 1it [00:01,  1.16s/it, loss=2.01, v_num=12]


t:   0%|                                                                                                                                     | 0/234 [00:00<?, ?it/s, now=None][A
t:  17%|████████████████████▌                                                                                                      | 39/234 [00:00<00:00, 386.16it/s, now=None][A
t:  44%|██████████████████████████████████████████████████████▏                                                                   | 104/234 [00:00<00:00, 540.72it/s, now=None][A
t:  74%|██████████████████████████████████████████████████████████████████████████████████████████▏                               | 173/234 [00:00<00:00, 602.34it/s, now=None][A
                                                                                                                                                                               [A

Moviepy - Done !                                       
Moviepy - video ready D:\multi_agents_rl\videos\DRQN_LunarLander\rl-video-episode-100.mp4
Moviepy - Building video D:\multi_agents_rl\videos\DRQN_LunarLander\rl-video-episode-200.mp4.
Moviepy - Writing video D:\multi_agents_rl\videos\DRQN_LunarLander\rl-video-episode-200.mp4

Epoch 188: : 1it [00:00,  2.96it/s, loss=1.3, v_num=12]


t:   0%|                                                                                                                                       | 0/1 [00:00<?, ?it/s, now=None][A
                                                                                                                                                                               [A

Moviepy - Done !                                       
Moviepy - video ready D:\multi_agents_rl\videos\DRQN_LunarLander\rl-video-episode-200.mp4
Moviepy - Building video D:\multi_agents_rl\videos\DRQN_LunarLander\rl-video-episode-200.mp4.
Moviepy - Writing video D:\multi_agents_rl\videos\DRQN_LunarLander\rl-video-episode-200.mp4

Epoch 189: : 1it [00:01,  1.22s/it, loss=1.3, v_num=12]


t:   0%|                                                                                                                                     | 0/157 [00:00<?, ?it/s, now=None][A
t:   6%|███████▏                                                                                                                     | 9/157 [00:00<00:01, 88.51it/s, now=None][A
t:  34%|█████████████████████████████████████████▌                                                                                 | 53/157 [00:00<00:00, 293.83it/s, now=None][A
t:  67%|█████████████████████████████████████████████████████████████████████████████████▌                                        | 105/157 [00:00<00:00, 393.14it/s, now=None][A
t: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 157/157 [00:00<00:00, 439.40it/s, now=None][A
                                                                                                        

Moviepy - Done !                                       
Moviepy - video ready D:\multi_agents_rl\videos\DRQN_LunarLander\rl-video-episode-200.mp4
Moviepy - Building video D:\multi_agents_rl\videos\DRQN_LunarLander\rl-video-episode-300.mp4.
Moviepy - Writing video D:\multi_agents_rl\videos\DRQN_LunarLander\rl-video-episode-300.mp4

Epoch 288: : 1it [00:00,  1.39it/s, loss=1.26, v_num=12]


t:   0%|                                                                                                                                       | 0/1 [00:00<?, ?it/s, now=None][A
                                                                                                                                                                               [A

Moviepy - Done !                                        
Moviepy - video ready D:\multi_agents_rl\videos\DRQN_LunarLander\rl-video-episode-300.mp4
Moviepy - Building video D:\multi_agents_rl\videos\DRQN_LunarLander\rl-video-episode-300.mp4.
Moviepy - Writing video D:\multi_agents_rl\videos\DRQN_LunarLander\rl-video-episode-300.mp4

Epoch 289: : 1it [00:03,  3.32s/it, loss=1.25, v_num=12]


t:   0%|                                                                                                                                     | 0/401 [00:00<?, ?it/s, now=None][A
t:   1%|█▏                                                                                                                           | 4/401 [00:00<00:10, 39.56it/s, now=None][A
t:  12%|██████████████▋                                                                                                            | 48/401 [00:00<00:01, 263.22it/s, now=None][A
t:  24%|█████████████████████████████▏                                                                                             | 95/401 [00:00<00:00, 353.80it/s, now=None][A
t:  34%|█████████████████████████████████████████                                                                                 | 135/401 [00:00<00:00, 369.54it/s, now=None][A
t:  43%|████████████████████████████████████████████████████▋                                           

Moviepy - Done !                                        
Moviepy - video ready D:\multi_agents_rl\videos\DRQN_LunarLander\rl-video-episode-300.mp4
Moviepy - Building video D:\multi_agents_rl\videos\DRQN_LunarLander\rl-video-episode-400.mp4.
Moviepy - Writing video D:\multi_agents_rl\videos\DRQN_LunarLander\rl-video-episode-400.mp4

Epoch 388: : 1it [00:00,  1.56it/s, loss=1.01, v_num=12]


t:   0%|                                                                                                                                       | 0/1 [00:00<?, ?it/s, now=None][A
                                                                                                                                                                               [A

Moviepy - Done !                                        
Moviepy - video ready D:\multi_agents_rl\videos\DRQN_LunarLander\rl-video-episode-400.mp4
Moviepy - Building video D:\multi_agents_rl\videos\DRQN_LunarLander\rl-video-episode-400.mp4.
Moviepy - Writing video D:\multi_agents_rl\videos\DRQN_LunarLander\rl-video-episode-400.mp4

Epoch 389: : 1it [00:01,  1.17s/it, loss=1.02, v_num=12]


t:   0%|                                                                                                                                     | 0/253 [00:00<?, ?it/s, now=None][A
t:  11%|█████████████▌                                                                                                             | 28/253 [00:00<00:00, 274.43it/s, now=None][A
t:  32%|██████████████████████████████████████▉                                                                                    | 80/253 [00:00<00:00, 415.27it/s, now=None][A
t:  57%|████████████████████████████████████████████████████████████████████▉                                                     | 143/253 [00:00<00:00, 510.33it/s, now=None][A
t:  85%|███████████████████████████████████████████████████████████████████████████████████████████████████████▏                  | 214/253 [00:00<00:00, 586.29it/s, now=None][A
                                                                                                        

Moviepy - Done !                                        
Moviepy - video ready D:\multi_agents_rl\videos\DRQN_LunarLander\rl-video-episode-400.mp4
Moviepy - Building video D:\multi_agents_rl\videos\DRQN_LunarLander\rl-video-episode-500.mp4.
Moviepy - Writing video D:\multi_agents_rl\videos\DRQN_LunarLander\rl-video-episode-500.mp4

Epoch 488: : 1it [00:00,  1.58it/s, loss=0.85, v_num=12]


t:   0%|                                                                                                                                       | 0/1 [00:00<?, ?it/s, now=None][A
                                                                                                                                                                               [A

Moviepy - Done !                                        
Moviepy - video ready D:\multi_agents_rl\videos\DRQN_LunarLander\rl-video-episode-500.mp4
Moviepy - Building video D:\multi_agents_rl\videos\DRQN_LunarLander\rl-video-episode-500.mp4.
Moviepy - Writing video D:\multi_agents_rl\videos\DRQN_LunarLander\rl-video-episode-500.mp4

Epoch 489: : 1it [00:01,  1.12s/it, loss=0.84, v_num=12]


t:   0%|                                                                                                                                     | 0/238 [00:00<?, ?it/s, now=None][A
t:  15%|██████████████████                                                                                                         | 35/238 [00:00<00:00, 346.55it/s, now=None][A
t:  41%|██████████████████████████████████████████████████▏                                                                        | 97/238 [00:00<00:00, 506.46it/s, now=None][A
t:  71%|██████████████████████████████████████████████████████████████████████████████████████▋                                   | 169/238 [00:00<00:00, 598.27it/s, now=None][A
t:  98%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉  | 234/238 [00:00<00:00, 616.12it/s, now=None][A
                                                                                                        

Moviepy - Done !                                        
Moviepy - video ready D:\multi_agents_rl\videos\DRQN_LunarLander\rl-video-episode-500.mp4
Moviepy - Building video D:\multi_agents_rl\videos\DRQN_LunarLander\rl-video-episode-600.mp4.
Moviepy - Writing video D:\multi_agents_rl\videos\DRQN_LunarLander\rl-video-episode-600.mp4

Epoch 588: : 1it [00:00,  1.40it/s, loss=0.74, v_num=12]


t:   0%|                                                                                                                                       | 0/1 [00:00<?, ?it/s, now=None][A
                                                                                                                                                                               [A

Moviepy - Done !                                        
Moviepy - video ready D:\multi_agents_rl\videos\DRQN_LunarLander\rl-video-episode-600.mp4
Moviepy - Building video D:\multi_agents_rl\videos\DRQN_LunarLander\rl-video-episode-600.mp4.
Moviepy - Writing video D:\multi_agents_rl\videos\DRQN_LunarLander\rl-video-episode-600.mp4

Epoch 589: : 1it [00:03,  3.22s/it, loss=0.739, v_num=12]


t:   0%|                                                                                                                                     | 0/401 [00:00<?, ?it/s, now=None][A
t:   1%|█▊                                                                                                                           | 6/401 [00:00<00:06, 59.98it/s, now=None][A
t:  13%|███████████████▋                                                                                                           | 51/401 [00:00<00:01, 286.07it/s, now=None][A
t:  25%|██████████████████████████████▍                                                                                           | 100/401 [00:00<00:00, 378.60it/s, now=None][A
t:  40%|████████████████████████████████████████████████▋                                                                         | 160/401 [00:00<00:00, 460.24it/s, now=None][A
t:  53%|█████████████████████████████████████████████████████████████████                               

Moviepy - Done !                                         
Moviepy - video ready D:\multi_agents_rl\videos\DRQN_LunarLander\rl-video-episode-600.mp4
Moviepy - Building video D:\multi_agents_rl\videos\DRQN_LunarLander\rl-video-episode-700.mp4.
Moviepy - Writing video D:\multi_agents_rl\videos\DRQN_LunarLander\rl-video-episode-700.mp4

Epoch 688: : 1it [00:00,  1.95it/s, loss=0.683, v_num=12]


t:   0%|                                                                                                                                       | 0/1 [00:00<?, ?it/s, now=None][A
                                                                                                                                                                               [A

Moviepy - Done !                                         
Moviepy - video ready D:\multi_agents_rl\videos\DRQN_LunarLander\rl-video-episode-700.mp4
Moviepy - Building video D:\multi_agents_rl\videos\DRQN_LunarLander\rl-video-episode-700.mp4.
Moviepy - Writing video D:\multi_agents_rl\videos\DRQN_LunarLander\rl-video-episode-700.mp4

Epoch 689: : 1it [00:01,  1.90s/it, loss=0.687, v_num=12]


t:   0%|                                                                                                                                     | 0/401 [00:00<?, ?it/s, now=None][A
t:   9%|███████████▎                                                                                                               | 37/401 [00:00<00:00, 367.53it/s, now=None][A
t:  25%|██████████████████████████████▎                                                                                            | 99/401 [00:00<00:00, 512.62it/s, now=None][A
t:  44%|█████████████████████████████████████████████████████▏                                                                    | 175/401 [00:00<00:00, 622.22it/s, now=None][A
t:  63%|████████████████████████████████████████████████████████████████████████████▋                                             | 252/401 [00:00<00:00, 680.15it/s, now=None][A
t:  80%|████████████████████████████████████████████████████████████████████████████████████████████████

Moviepy - Done !                                         
Moviepy - video ready D:\multi_agents_rl\videos\DRQN_LunarLander\rl-video-episode-700.mp4
Moviepy - Building video D:\multi_agents_rl\videos\DRQN_LunarLander\rl-video-episode-800.mp4.
Moviepy - Writing video D:\multi_agents_rl\videos\DRQN_LunarLander\rl-video-episode-800.mp4

Epoch 788: : 1it [00:00,  1.84it/s, loss=0.59, v_num=12]


t:   0%|                                                                                                                                       | 0/1 [00:00<?, ?it/s, now=None][A
                                                                                                                                                                               [A

Moviepy - Done !                                        
Moviepy - video ready D:\multi_agents_rl\videos\DRQN_LunarLander\rl-video-episode-800.mp4
Moviepy - Building video D:\multi_agents_rl\videos\DRQN_LunarLander\rl-video-episode-800.mp4.
Moviepy - Writing video D:\multi_agents_rl\videos\DRQN_LunarLander\rl-video-episode-800.mp4

Epoch 789: : 1it [00:01,  1.92s/it, loss=0.592, v_num=12]


t:   0%|                                                                                                                                     | 0/401 [00:00<?, ?it/s, now=None][A
t:  10%|████████████▎                                                                                                              | 40/401 [00:00<00:00, 396.11it/s, now=None][A
t:  26%|████████████████████████████████▏                                                                                         | 106/401 [00:00<00:00, 547.61it/s, now=None][A
t:  46%|███████████████████████████████████████████████████████▋                                                                  | 183/401 [00:00<00:00, 648.61it/s, now=None][A
t:  66%|████████████████████████████████████████████████████████████████████████████████                                          | 263/401 [00:00<00:00, 705.42it/s, now=None][A
t:  84%|████████████████████████████████████████████████████████████████████████████████████████████████

Moviepy - Done !                                         
Moviepy - video ready D:\multi_agents_rl\videos\DRQN_LunarLander\rl-video-episode-800.mp4
Moviepy - Building video D:\multi_agents_rl\videos\DRQN_LunarLander\rl-video-episode-900.mp4.
Moviepy - Writing video D:\multi_agents_rl\videos\DRQN_LunarLander\rl-video-episode-900.mp4

Epoch 888: : 1it [00:00,  2.19it/s, loss=0.56, v_num=12]


t:   0%|                                                                                                                                       | 0/1 [00:00<?, ?it/s, now=None][A
                                                                                                                                                                               [A

Moviepy - Done !                                        
Moviepy - video ready D:\multi_agents_rl\videos\DRQN_LunarLander\rl-video-episode-900.mp4
Moviepy - Building video D:\multi_agents_rl\videos\DRQN_LunarLander\rl-video-episode-900.mp4.
Moviepy - Writing video D:\multi_agents_rl\videos\DRQN_LunarLander\rl-video-episode-900.mp4

Epoch 889: : 1it [00:01,  1.88s/it, loss=0.561, v_num=12]


t:   0%|                                                                                                                                     | 0/401 [00:00<?, ?it/s, now=None][A
t:  10%|███████████▉                                                                                                               | 39/401 [00:00<00:00, 386.41it/s, now=None][A
t:  26%|███████████████████████████████▉                                                                                          | 105/401 [00:00<00:00, 546.73it/s, now=None][A
t:  44%|█████████████████████████████████████████████████████▊                                                                    | 177/401 [00:00<00:00, 625.59it/s, now=None][A
t:  63%|████████████████████████████████████████████████████████████████████████████▎                                             | 251/401 [00:00<00:00, 670.46it/s, now=None][A
t:  81%|████████████████████████████████████████████████████████████████████████████████████████████████

Moviepy - Done !                                         
Moviepy - video ready D:\multi_agents_rl\videos\DRQN_LunarLander\rl-video-episode-900.mp4
Moviepy - Building video D:\multi_agents_rl\videos\DRQN_LunarLander\rl-video-episode-1000.mp4.
Moviepy - Writing video D:\multi_agents_rl\videos\DRQN_LunarLander\rl-video-episode-1000.mp4

Epoch 988: : 1it [00:00,  1.86it/s, loss=0.68, v_num=12]


t:   0%|                                                                                                                                       | 0/1 [00:00<?, ?it/s, now=None][A
                                                                                                                                                                               [A

Moviepy - Done !                                        
Moviepy - video ready D:\multi_agents_rl\videos\DRQN_LunarLander\rl-video-episode-1000.mp4
Moviepy - Building video D:\multi_agents_rl\videos\DRQN_LunarLander\rl-video-episode-1000.mp4.
Moviepy - Writing video D:\multi_agents_rl\videos\DRQN_LunarLander\rl-video-episode-1000.mp4

Epoch 989: : 1it [00:01,  1.49s/it, loss=0.674, v_num=12]


t:   0%|                                                                                                                                     | 0/315 [00:00<?, ?it/s, now=None][A
t:  13%|████████████████                                                                                                           | 41/315 [00:00<00:00, 409.20it/s, now=None][A
t:  33%|████████████████████████████████████████▎                                                                                 | 104/315 [00:00<00:00, 535.83it/s, now=None][A
t:  57%|█████████████████████████████████████████████████████████████████████▎                                                    | 179/315 [00:00<00:00, 630.40it/s, now=None][A
t:  79%|████████████████████████████████████████████████████████████████████████████████████████████████▊                         | 250/315 [00:00<00:00, 661.67it/s, now=None][A
                                                                                                        

Moviepy - Done !                                         
Moviepy - video ready D:\multi_agents_rl\videos\DRQN_LunarLander\rl-video-episode-1000.mp4
Moviepy - Building video D:\multi_agents_rl\videos\DRQN_LunarLander\rl-video-episode-1100.mp4.
Moviepy - Writing video D:\multi_agents_rl\videos\DRQN_LunarLander\rl-video-episode-1100.mp4

Epoch 1088: : 1it [00:00,  1.72it/s, loss=0.68, v_num=12]


t:   0%|                                                                                                                                       | 0/1 [00:00<?, ?it/s, now=None][A
                                                                                                                                                                               [A

Moviepy - Done !                                         
Moviepy - video ready D:\multi_agents_rl\videos\DRQN_LunarLander\rl-video-episode-1100.mp4
Moviepy - Building video D:\multi_agents_rl\videos\DRQN_LunarLander\rl-video-episode-1100.mp4.
Moviepy - Writing video D:\multi_agents_rl\videos\DRQN_LunarLander\rl-video-episode-1100.mp4

Epoch 1089: : 1it [00:01,  1.80s/it, loss=0.681, v_num=12]


t:   0%|                                                                                                                                     | 0/401 [00:00<?, ?it/s, now=None][A
t:  10%|████████████▎                                                                                                              | 40/401 [00:00<00:00, 392.44it/s, now=None][A
t:  26%|████████████████████████████████▏                                                                                         | 106/401 [00:00<00:00, 545.41it/s, now=None][A
t:  46%|███████████████████████████████████████████████████████▉                                                                  | 184/401 [00:00<00:00, 645.87it/s, now=None][A
t:  64%|██████████████████████████████████████████████████████████████████████████████▍                                           | 258/401 [00:00<00:00, 680.01it/s, now=None][A
t:  82%|████████████████████████████████████████████████████████████████████████████████████████████████

Moviepy - Done !                                          
Moviepy - video ready D:\multi_agents_rl\videos\DRQN_LunarLander\rl-video-episode-1100.mp4
Moviepy - Building video D:\multi_agents_rl\videos\DRQN_LunarLander\rl-video-episode-1200.mp4.
Moviepy - Writing video D:\multi_agents_rl\videos\DRQN_LunarLander\rl-video-episode-1200.mp4

Epoch 1188: : 1it [00:00,  2.46it/s, loss=0.677, v_num=12]


t:   0%|                                                                                                                                       | 0/1 [00:00<?, ?it/s, now=None][A
                                                                                                                                                                               [A

Moviepy - Done !                                          
Moviepy - video ready D:\multi_agents_rl\videos\DRQN_LunarLander\rl-video-episode-1200.mp4
Moviepy - Building video D:\multi_agents_rl\videos\DRQN_LunarLander\rl-video-episode-1200.mp4.
Moviepy - Writing video D:\multi_agents_rl\videos\DRQN_LunarLander\rl-video-episode-1200.mp4

Epoch 1189: : 1it [00:01,  1.89s/it, loss=0.697, v_num=12]


t:   0%|                                                                                                                                     | 0/401 [00:00<?, ?it/s, now=None][A
t:   9%|███████████                                                                                                                | 36/401 [00:00<00:01, 359.79it/s, now=None][A
t:  25%|██████████████████████████████▋                                                                                           | 101/401 [00:00<00:00, 524.40it/s, now=None][A
t:  44%|██████████████████████████████████████████████████████▏                                                                   | 178/401 [00:00<00:00, 635.83it/s, now=None][A
t:  63%|█████████████████████████████████████████████████████████████████████████████▎                                            | 254/401 [00:00<00:00, 681.98it/s, now=None][A
t:  81%|████████████████████████████████████████████████████████████████████████████████████████████████

Moviepy - Done !                                          
Moviepy - video ready D:\multi_agents_rl\videos\DRQN_LunarLander\rl-video-episode-1200.mp4
Moviepy - Building video D:\multi_agents_rl\videos\DRQN_LunarLander\rl-video-episode-1300.mp4.
Moviepy - Writing video D:\multi_agents_rl\videos\DRQN_LunarLander\rl-video-episode-1300.mp4

Epoch 1288: : 1it [00:00,  4.10it/s, loss=0.763, v_num=12]


t:   0%|                                                                                                                                       | 0/1 [00:00<?, ?it/s, now=None][A
                                                                                                                                                                               [A

Moviepy - Done !                                          
Moviepy - video ready D:\multi_agents_rl\videos\DRQN_LunarLander\rl-video-episode-1300.mp4
Moviepy - Building video D:\multi_agents_rl\videos\DRQN_LunarLander\rl-video-episode-1300.mp4.
Moviepy - Writing video D:\multi_agents_rl\videos\DRQN_LunarLander\rl-video-episode-1300.mp4

Epoch 1289: : 1it [00:01,  1.76s/it, loss=0.783, v_num=12]


t:   0%|                                                                                                                                     | 0/316 [00:00<?, ?it/s, now=None][A
t:  13%|███████████████▉                                                                                                           | 41/316 [00:00<00:00, 407.46it/s, now=None][A
t:  33%|████████████████████████████████████████▌                                                                                 | 105/316 [00:00<00:00, 543.75it/s, now=None][A
t:  56%|████████████████████████████████████████████████████████████████████▋                                                     | 178/316 [00:00<00:00, 625.90it/s, now=None][A
t:  77%|██████████████████████████████████████████████████████████████████████████████████████████████▏                           | 244/316 [00:00<00:00, 636.81it/s, now=None][A
t: 100%|████████████████████████████████████████████████████████████████████████████████████████████████

Moviepy - Done !                                          
Moviepy - video ready D:\multi_agents_rl\videos\DRQN_LunarLander\rl-video-episode-1300.mp4
Moviepy - Building video D:\multi_agents_rl\videos\DRQN_LunarLander\rl-video-episode-1400.mp4.
Moviepy - Writing video D:\multi_agents_rl\videos\DRQN_LunarLander\rl-video-episode-1400.mp4

Epoch 1388: : 1it [00:00,  1.49it/s, loss=0.985, v_num=12]


t:   0%|                                                                                                                                       | 0/1 [00:00<?, ?it/s, now=None][A
                                                                                                                                                                               [A

Moviepy - Done !                                          
Moviepy - video ready D:\multi_agents_rl\videos\DRQN_LunarLander\rl-video-episode-1400.mp4
Moviepy - Building video D:\multi_agents_rl\videos\DRQN_LunarLander\rl-video-episode-1400.mp4.
Moviepy - Writing video D:\multi_agents_rl\videos\DRQN_LunarLander\rl-video-episode-1400.mp4

Epoch 1389: : 1it [00:02,  2.34s/it, loss=1.01, v_num=12]


t:   0%|                                                                                                                                     | 0/291 [00:00<?, ?it/s, now=None][A
t:   2%|███                                                                                                                          | 7/291 [00:00<00:04, 68.63it/s, now=None][A
t:  18%|██████████████████████▍                                                                                                    | 53/291 [00:00<00:00, 293.55it/s, now=None][A
t:  35%|██████████████████████████████████████████▎                                                                               | 101/291 [00:00<00:00, 377.69it/s, now=None][A
t:  52%|██████████████████████████████████████████████████████████████▉                                                           | 150/291 [00:00<00:00, 420.09it/s, now=None][A
t:  70%|█████████████████████████████████████████████████████████████████████████████████████▉          

Moviepy - Done !                                         
Moviepy - video ready D:\multi_agents_rl\videos\DRQN_LunarLander\rl-video-episode-1400.mp4
Epoch 1482: : 1it [00:00,  1.80it/s, loss=1.01, v_num=12] 


In [21]:
policy = algo.policy
algo.q_net = algo.q_net.to(device)

for i in range(100):
    print(i)
    algo.play_episode(policy=algo.policy, epsilon=0)

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
Moviepy - Building video D:\multi_agents_rl\videos\DRQN_LunarLander\rl-video-episode-2000.mp4.
Moviepy - Writing video D:\multi_agents_rl\videos\DRQN_LunarLander\rl-video-episode-2000.mp4



                                                                                                                                                                               

Moviepy - Done !
Moviepy - video ready D:\multi_agents_rl\videos\DRQN_LunarLander\rl-video-episode-2000.mp4
18




Moviepy - Building video D:\multi_agents_rl\videos\DRQN_LunarLander\rl-video-episode-2000.mp4.
Moviepy - Writing video D:\multi_agents_rl\videos\DRQN_LunarLander\rl-video-episode-2000.mp4



                                                                                                                                                                               

Moviepy - Done !
Moviepy - video ready D:\multi_agents_rl\videos\DRQN_LunarLander\rl-video-episode-2000.mp4
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99


KeyboardInterrupt: 