In [74]:
import torch

In [75]:
import datetime

import gym

In [76]:
from time import sleep
from tqdm import tqdm
from torch import nn
import copy
import random
from torch.utils.tensorboard import SummaryWriter

# class : Config
- 학습에 필요한 각종 세팅, replay memory 등을 저장 및 관리

In [89]:
class Config():
    def __init__(self, lr = 0.01,
                gamma = 0.6,
                min_epsilon = 0.1,
                renew_target = 100,
                store_and_replay = True,
                double_dqn = True):
        self.lr = lr
        self.gamma = gamma
        self.min_epsilon = min_epsilon
        self.store_and_replay = store_and_replay
        self.double_dqn = double_dqn
        self.max_time_steps = 1000
        self.renew_target = renew_target
        
        self.replay_memory_fail = []
        self.replay_memory_fail_size = 1000 # 실패 state 의 결과를 1000개까지만 저장한다.
        self.replay_memory_success = []
        
        self.prev_success = set() # 디버깅용 : 손님을 태워 목적지에 도착한상태를 저장
        
        dt = datetime.datetime.now() ## 기록 title 설정
        dt_str = dt.strftime("%Y-%m-%d %H:%M:%S")
        self.opt_str = f"SR_{store_and_replay}_DD_{double_dqn}_LOSS"
        self.writer = SummaryWriter(f"runs/{self.opt_str}_{dt_str}")
        
        self.num_episodes = 5000
    
    def get_epsilon(self, episode = -1):
        if episode < 0:
            return self.min_epsilon
        return max(self.min_epsilon, 0.99**episode)
    
    '''
    success, fail 을 따로 저장하도록 사이즈 조정을 해줌.
    '''
    def _insert_record(self, memory, rec, memory_size = -1):
        if rec in memory:
            #print(f'in memory. {rec}')
            return
        if len(memory) < memory_size or memory_size == -1:
            memory.append(rec)
        else:
            memory[random.randint(0, memory_size-1)] = rec
    
    def insert_record(self, rec):
        done = rec[-1]
        if done:
            self._insert_record(self.replay_memory_success, rec)
            self.prev_success.add(rec[0])
        else:
            self._insert_record(self.replay_memory_fail, rec, self.replay_memory_fail_size)
            
    def get_replay_memory(self):
        return self.replay_memory_success + self.replay_memory_fail
    
    def get_replay_record(self):
        replay_memory = self.get_replay_memory()
        mid_replay = random.randint(0, len(replay_memory) - 1)
        return mid_replay, replay_memory[mid_replay]

# class : QNet
- DQN

In [78]:
class QNet(nn.Module):
    def __init__(self, num_states, num_actions, hidden_dim = 16):
        super().__init__()
        
        self.layers = nn.Sequential(
            nn.Embedding(num_states, 2*hidden_dim),
            nn.Linear(2*hidden_dim, 2*hidden_dim),
            nn.PReLU(),
            nn.Linear(2*hidden_dim, hidden_dim),
            nn.PReLU(),
            nn.Linear(hidden_dim, num_actions)
        )
        
    def forward(self, x):
        x = self.layers(x)
        return x

# def : train
- 모델 학습 (전체 에피소드에 대해)

In [79]:
def train(env, config, qnet):
    optimizer = torch.optim.SGD(qnet.parameters(), lr = 0.001)
    if config.double_dqn:
        qnet2 = copy.deepcopy(qnet)
    else:
        qnet2 = qnet
        
    criteria = nn.MSELoss()
    num_episodes = config.num_episodes
    
    for i in tqdm(range(1, num_episodes+1)):
        rtn_loss = train_episode(env, config, qnet, qnet2, optimizer, criteria, episode_count = i) # 수정
        if i % config.renew_target == 0:
            if config.double_dqn:
                qnet2 = copy.deepcopy(qnet)
        
        if i == 1 or i%50 == 0:
            test(env, config, qnet, i, rtn_loss) # 수정
            
    print("Training finished.\n")

# def : train_episode
- 모델 학습 (한 에피소드)

In [80]:
def train_episode(env, config, qnet, qnet2, optimizer, criteria, episode_count = -1):
    done = False
    n_steps = 0
    state = env.reset()
    tot_reward = 0
    loss = 0
    penalties = 0
    
    while not done and n_steps < config.max_time_steps:
        #n_steps += 1
        state_t = torch.LongTensor([state])
        
        epsilon = config.get_epsilon(episode_count)
        #print("Epsilon", epsilon)
        
        if random.uniform(0, 1) < epsilon:
            action = env.action_space.sample()
        else:
            with torch.no_grad():
                q_hat = qnet(state_t)
                action = torch.argmax(q_hat[0]).item()
                
        next_state, reward, done, info = env.step(action)
        
        tot_reward += reward
        
        new_tuple = (state, action, next_state, reward, done)
        
        state = next_state
        if reward == -10:
            penalties += 1
            
        # Replay
        if config.store_and_replay:
            config.insert_record(new_tuple)
            loss_i, steps_i = replay(qnet, qnet2, config, optimizer, criteria)
        else:
            loss_i, steps_i = replay(qnet, qnet2, config, optimizer, criteria, rec = new_tuple)
        loss += loss_i
        n_steps += 1
    

    return loss #추가

# def : replay
- 모델 학습 (한 에피소드)

In [81]:
def replay(qnet, qnet2, config, optimizer, criteria, num_instances = 5, rec = None):
    loss_i = 0
    
    for _ in range(num_instances):
        optimizer.zero_grad()
        if config.store_and_replay:
            mid_replay, rec = config.get_replay_record()
            
        state, action, next_state, reward, done = rec
        
        # Set target value
        if done:
            y_t = torch.Tensor([reward])
        else:
            next_state_r_t = torch.LongTensor([next_state])
            with torch.no_grad():
                q_next = qnet2(next_state_r_t)
                y_t = reward + config.gamma * q_next.max(dim=-1)[0]
                
        # Make a prediction
        state_r_t = torch.LongTensor([state])
        q_hat = qnet(state_r_t)
        q_hat = q_hat[:, action]
        
        # Update
        loss = criteria(q_hat, y_t)
        loss.backward()
        optimizer.step()
        loss_i += loss.item()
        
    return loss_i, num_instances

# def : test
- 테스트

In [82]:
def test(env, config, qnet, global_step = -1, loss = -1): # 수정
    qnet.eval()
    total_epochs, total_penalites = 0, 0
    episodes = 100
    
    total_reward = 0
    writer = config.writer
    
    for _ in tqdm(range(episodes)):
        state = env.reset()
        epochs, penalties, reward = 0, 0, 0
        
        done = False
        
        while not done and epochs < config.max_time_steps:
            with torch.no_grad():
                state_t = torch.LongTensor([state])
                q_hat = qnet(state_t)
                action = torch.argmax(q_hat[0]).item()
                
            state, reward, done, info = env.step(action)
            total_reward += reward
            if reward == -10:
                penalties += 1
                
            epochs += 1
            
        total_penalites += penalties
        total_epochs += epochs
        
    avg_steps = total_epochs / episodes
    avg_penalty = total_penalites / episodes
    avg_reward = total_reward / episodes
    
    print(f"Result after {episodes} episodes:")
    print(f"Average timesteps per episodes: {avg_steps}")
    print(f"Average penalty per episodes: {avg_penalty}")
    print(f"Average reward per episodes: {avg_reward}")
    
    if global_step > 0:
        writer.add_scalar("Steps", avg_steps, global_step)
        writer.add_scalar("Penalty", avg_penalty, global_step)
        writer.add_scalar("Reward", avg_reward, global_step)
        writer.add_scalar("Loss", loss, global_step) # 추가
          

# 메인

In [91]:
env = gym.make("Taxi-v3").env

print("Action Space {}".format(env.action_space))
print("State Space {}".format(env.observation_space))
store_and_replay = True
double_dqn = False

print(f"Store & replay: {store_and_replay}")
print(f"Doublde DQN: {double_dqn}")

qnet = QNet(env.observation_space.n, env.action_space.n, hidden_dim = 32)
config = Config(store_and_replay = store_and_replay, double_dqn = double_dqn, lr = 0.01, gamma = 0.7, renew_target = 20)

train(env, config, qnet)
test(env, config, qnet, config.num_episodes)

Action Space Discrete(6)
State Space Discrete(500)
Store & replay: True
Doublde DQN: False


  0%|                                                  | 0/5000 [00:00<?, ?it/s]
  0%|                                                   | 0/100 [00:00<?, ?it/s][A
  2%|▊                                          | 2/100 [00:00<00:06, 15.04it/s][A
  4%|█▋                                         | 4/100 [00:00<00:06, 14.93it/s][A
  6%|██▌                                        | 6/100 [00:00<00:06, 14.99it/s][A
  8%|███▍                                       | 8/100 [00:00<00:06, 14.99it/s][A
 10%|████▏                                     | 10/100 [00:00<00:06, 15.00it/s][A
 12%|█████                                     | 12/100 [00:00<00:05, 15.02it/s][A
 14%|█████▉                                    | 14/100 [00:00<00:05, 15.02it/s][A
 16%|██████▋                                   | 16/100 [00:01<00:05, 15.01it/s][A
 18%|███████▌                                  | 18/100 [00:01<00:05, 14.99it/s][A
 20%|████████▍                                 | 20/100 [00:01<00:05, 14.94it/s

Result after 100 episodes:
Average timesteps per episodes: 1000.0
Average penalty per episodes: 0.0
Average reward per episodes: -1000.0


  1%|▍                                      | 49/5000 [01:29<2:17:02,  1.66s/it]
  0%|                                                   | 0/100 [00:00<?, ?it/s][A
  2%|▊                                          | 2/100 [00:00<00:06, 14.74it/s][A
  4%|█▋                                         | 4/100 [00:00<00:06, 14.75it/s][A
  6%|██▌                                        | 6/100 [00:00<00:08, 11.25it/s][A
  8%|███▍                                       | 8/100 [00:00<00:07, 12.34it/s][A
 10%|████▏                                     | 10/100 [00:00<00:06, 12.98it/s][A
 12%|█████                                     | 12/100 [00:00<00:06, 12.95it/s][A
 14%|█████▉                                    | 14/100 [00:01<00:06, 13.39it/s][A
 16%|██████▋                                   | 16/100 [00:01<00:06, 13.78it/s][A
 18%|███████▌                                  | 18/100 [00:01<00:05, 14.06it/s][A
 20%|████████▍                                 | 20/100 [00:01<00:05, 14.17it/s

Result after 100 episodes:
Average timesteps per episodes: 990.07
Average penalty per episodes: 0.0
Average reward per episodes: -989.86


  2%|▊                                      | 99/5000 [02:34<1:24:45,  1.04s/it]
  0%|                                                   | 0/100 [00:00<?, ?it/s][A
  2%|▊                                          | 2/100 [00:00<00:06, 15.11it/s][A
  4%|█▋                                         | 4/100 [00:00<00:06, 15.12it/s][A
  6%|██▌                                        | 6/100 [00:00<00:06, 15.02it/s][A
  8%|███▍                                       | 8/100 [00:00<00:06, 15.03it/s][A
 10%|████▏                                     | 10/100 [00:00<00:05, 15.05it/s][A
 12%|█████                                     | 12/100 [00:00<00:05, 15.08it/s][A
 14%|█████▉                                    | 14/100 [00:00<00:05, 15.05it/s][A
 16%|██████▋                                   | 16/100 [00:01<00:05, 15.03it/s][A
 18%|███████▌                                  | 18/100 [00:01<00:05, 15.05it/s][A
 20%|████████▍                                 | 20/100 [00:01<00:05, 15.05it/s

Result after 100 episodes:
Average timesteps per episodes: 990.07
Average penalty per episodes: 0.0
Average reward per episodes: -989.86


  3%|█▏                                    | 149/5000 [03:19<1:37:12,  1.20s/it]
  0%|                                                   | 0/100 [00:00<?, ?it/s][A
  2%|▊                                          | 2/100 [00:00<00:06, 14.82it/s][A
  5%|██▏                                        | 5/100 [00:00<00:04, 19.06it/s][A
  8%|███▍                                       | 8/100 [00:00<00:04, 20.17it/s][A
 11%|████▌                                     | 11/100 [00:00<00:05, 17.56it/s][A
 13%|█████▍                                    | 13/100 [00:00<00:05, 16.61it/s][A
 15%|██████▎                                   | 15/100 [00:00<00:05, 16.05it/s][A
 17%|███████▏                                  | 17/100 [00:01<00:05, 15.63it/s][A
 19%|███████▉                                  | 19/100 [00:01<00:05, 15.36it/s][A
 21%|████████▊                                 | 21/100 [00:01<00:05, 15.15it/s][A
 25%|██████████▌                               | 25/100 [00:01<00:03, 19.29it/s

Result after 100 episodes:
Average timesteps per episodes: 861.36
Average penalty per episodes: 0.0
Average reward per episodes: -858.42


  4%|█▌                                      | 199/5000 [04:12<27:05,  2.95it/s]
  0%|                                                   | 0/100 [00:00<?, ?it/s][A
  2%|▊                                          | 2/100 [00:00<00:08, 12.15it/s][A
  4%|█▋                                         | 4/100 [00:00<00:06, 15.30it/s][A
  6%|██▌                                        | 6/100 [00:00<00:06, 14.38it/s][A
  8%|███▍                                       | 8/100 [00:00<00:06, 14.63it/s][A
 10%|████▏                                     | 10/100 [00:00<00:06, 14.78it/s][A
 14%|█████▉                                    | 14/100 [00:00<00:04, 19.85it/s][A
 16%|██████▋                                   | 16/100 [00:00<00:04, 18.33it/s][A
 19%|███████▉                                  | 19/100 [00:01<00:04, 19.66it/s][A
 21%|████████▊                                 | 21/100 [00:01<00:04, 18.26it/s][A
 23%|█████████▋                                | 23/100 [00:01<00:04, 17.12it/s

Result after 100 episodes:
Average timesteps per episodes: 772.37
Average penalty per episodes: 0.0
Average reward per episodes: -767.54


  5%|█▉                                      | 248/5000 [04:44<17:11,  4.61it/s]
  0%|                                                   | 0/100 [00:00<?, ?it/s][A
  2%|▊                                          | 2/100 [00:00<00:06, 15.00it/s][A
  4%|█▋                                         | 4/100 [00:00<00:06, 15.03it/s][A
  6%|██▌                                        | 6/100 [00:00<00:06, 15.03it/s][A
 12%|█████                                     | 12/100 [00:00<00:03, 26.23it/s][A
 15%|██████▎                                   | 15/100 [00:00<00:03, 24.38it/s][A
 18%|███████▌                                  | 18/100 [00:00<00:04, 20.29it/s][A
 21%|████████▊                                 | 21/100 [00:01<00:04, 18.38it/s][A
 23%|█████████▋                                | 23/100 [00:01<00:04, 17.44it/s][A
 26%|██████████▉                               | 26/100 [00:01<00:03, 18.71it/s][A
 28%|███████████▊                              | 28/100 [00:01<00:04, 17.59it/s

Result after 100 episodes:
Average timesteps per episodes: 752.53
Average penalty per episodes: 0.0
Average reward per episodes: -747.28


  6%|██▎                                   | 298/5000 [05:18<1:19:29,  1.01s/it]
  0%|                                                   | 0/100 [00:00<?, ?it/s][A
  3%|█▎                                         | 3/100 [00:00<00:04, 21.28it/s][A
  6%|██▌                                        | 6/100 [00:00<00:04, 21.26it/s][A
 10%|████▏                                     | 10/100 [00:00<00:03, 24.44it/s][A
 13%|█████▍                                    | 13/100 [00:00<00:04, 19.50it/s][A
 17%|███████▏                                  | 17/100 [00:00<00:03, 22.34it/s][A
 20%|████████▍                                 | 20/100 [00:00<00:04, 18.77it/s][A
 25%|██████████▌                               | 25/100 [00:01<00:03, 23.36it/s][A
 28%|███████████▊                              | 28/100 [00:01<00:03, 22.79it/s][A
 31%|█████████████                             | 31/100 [00:01<00:03, 22.37it/s][A
 34%|██████████████▎                           | 34/100 [00:01<00:02, 22.05it/s

Result after 100 episodes:
Average timesteps per episodes: 703.23
Average penalty per episodes: 0.0
Average reward per episodes: -696.93


  7%|██▊                                     | 349/5000 [05:46<42:43,  1.81it/s]
  0%|                                                   | 0/100 [00:00<?, ?it/s][A
  4%|█▋                                         | 4/100 [00:00<00:03, 28.42it/s][A
  8%|███▍                                       | 8/100 [00:00<00:03, 26.75it/s][A
 11%|████▌                                     | 11/100 [00:00<00:03, 23.51it/s][A
 14%|█████▉                                    | 14/100 [00:00<00:03, 22.54it/s][A
 18%|███████▌                                  | 18/100 [00:00<00:03, 24.55it/s][A
 23%|█████████▋                                | 23/100 [00:00<00:02, 28.10it/s][A
 26%|██████████▉                               | 26/100 [00:01<00:02, 25.84it/s][A
 29%|████████████▏                             | 29/100 [00:01<00:02, 24.30it/s][A
 32%|█████████████▍                            | 32/100 [00:01<00:03, 20.24it/s][A
 35%|██████████████▋                           | 35/100 [00:01<00:03, 20.53it/s

Result after 100 episodes:
Average timesteps per episodes: 574.9
Average penalty per episodes: 0.0
Average reward per episodes: -565.87


  8%|███▏                                    | 399/5000 [06:21<24:53,  3.08it/s]
  0%|                                                   | 0/100 [00:00<?, ?it/s][A
  2%|▊                                          | 2/100 [00:00<00:06, 14.13it/s][A
  4%|█▋                                         | 4/100 [00:00<00:06, 14.25it/s][A
  7%|███                                        | 7/100 [00:00<00:05, 17.41it/s][A
  9%|███▊                                       | 9/100 [00:00<00:05, 16.01it/s][A
 12%|█████                                     | 12/100 [00:00<00:04, 17.66it/s][A
 17%|███████▏                                  | 17/100 [00:00<00:03, 23.24it/s][A
 21%|████████▊                                 | 21/100 [00:01<00:03, 24.73it/s][A
 26%|██████████▉                               | 26/100 [00:01<00:02, 27.94it/s][A
 29%|████████████▏                             | 29/100 [00:01<00:02, 25.81it/s][A
 32%|█████████████▍                            | 32/100 [00:01<00:02, 24.07it/s

Result after 100 episodes:
Average timesteps per episodes: 594.46
Average penalty per episodes: 0.0
Average reward per episodes: -585.85


  9%|███▍                                  | 449/5000 [06:47<1:14:40,  1.02it/s]
  0%|                                                   | 0/100 [00:00<?, ?it/s][A
  3%|█▎                                         | 3/100 [00:00<00:04, 21.50it/s][A
  6%|██▌                                        | 6/100 [00:00<00:04, 20.74it/s][A
  9%|███▊                                       | 9/100 [00:00<00:04, 21.13it/s][A
 12%|█████                                     | 12/100 [00:00<00:04, 21.27it/s][A
 15%|██████▎                                   | 15/100 [00:00<00:04, 18.09it/s][A
 19%|███████▉                                  | 19/100 [00:00<00:03, 21.25it/s][A
 22%|█████████▏                                | 22/100 [00:01<00:04, 18.58it/s][A
 24%|██████████                                | 24/100 [00:01<00:04, 17.40it/s][A
 26%|██████████▉                               | 26/100 [00:01<00:04, 16.51it/s][A
 28%|███████████▊                              | 28/100 [00:01<00:04, 15.90it/s

Result after 100 episodes:
Average timesteps per episodes: 624.52
Average penalty per episodes: 0.0
Average reward per episodes: -616.54


 10%|███▉                                    | 499/5000 [07:14<22:43,  3.30it/s]
  0%|                                                   | 0/100 [00:00<?, ?it/s][A
  4%|█▋                                         | 4/100 [00:00<00:03, 28.77it/s][A
  7%|███                                        | 7/100 [00:00<00:04, 19.02it/s][A
 12%|█████                                     | 12/100 [00:00<00:03, 25.54it/s][A
 19%|███████▉                                  | 19/100 [00:00<00:02, 34.16it/s][A
 23%|█████████▋                                | 23/100 [00:00<00:02, 27.63it/s][A
 28%|███████████▊                              | 28/100 [00:00<00:02, 29.88it/s][A
 32%|█████████████▍                            | 32/100 [00:01<00:02, 25.82it/s][A
 38%|███████████████▉                          | 38/100 [00:01<00:02, 30.16it/s][A
 42%|█████████████████▋                        | 42/100 [00:01<00:02, 23.40it/s][A
 45%|██████████████████▉                       | 45/100 [00:01<00:02, 22.93it/s

Result after 100 episodes:
Average timesteps per episodes: 594.93
Average penalty per episodes: 0.0
Average reward per episodes: -586.32


 11%|████▍                                   | 549/5000 [07:33<20:11,  3.67it/s]
  0%|                                                   | 0/100 [00:00<?, ?it/s][A
  6%|██▌                                        | 6/100 [00:00<00:02, 41.74it/s][A
 11%|████▌                                     | 11/100 [00:00<00:03, 27.97it/s][A
 15%|██████▎                                   | 15/100 [00:00<00:03, 23.59it/s][A
 18%|███████▌                                  | 18/100 [00:00<00:04, 18.75it/s][A
 21%|████████▊                                 | 21/100 [00:00<00:04, 18.91it/s][A
 24%|██████████                                | 24/100 [00:01<00:03, 19.45it/s][A
 27%|███████████▎                              | 27/100 [00:01<00:03, 19.90it/s][A
 30%|████████████▌                             | 30/100 [00:01<00:03, 17.84it/s][A
 34%|██████████████▎                           | 34/100 [00:01<00:03, 20.58it/s][A
 43%|██████████████████                        | 43/100 [00:01<00:01, 32.06it/s

Result after 100 episodes:
Average timesteps per episodes: 565.06
Average penalty per episodes: 0.0
Average reward per episodes: -555.82


 12%|████▊                                   | 599/5000 [07:52<11:13,  6.53it/s]
  0%|                                                   | 0/100 [00:00<?, ?it/s][A
  2%|▊                                          | 2/100 [00:00<00:06, 14.49it/s][A
  6%|██▌                                        | 6/100 [00:00<00:04, 22.65it/s][A
  9%|███▊                                       | 9/100 [00:00<00:04, 22.13it/s][A
 12%|█████                                     | 12/100 [00:00<00:04, 21.82it/s][A
 15%|██████▎                                   | 15/100 [00:00<00:04, 18.45it/s][A
 18%|███████▌                                  | 18/100 [00:00<00:04, 19.35it/s][A
 21%|████████▊                                 | 21/100 [00:01<00:03, 19.96it/s][A
 27%|███████████▎                              | 27/100 [00:01<00:02, 26.74it/s][A
 30%|████████████▌                             | 30/100 [00:01<00:02, 25.18it/s][A
 37%|███████████████▌                          | 37/100 [00:01<00:01, 32.40it/s

Result after 100 episodes:
Average timesteps per episodes: 564.63
Average penalty per episodes: 0.0
Average reward per episodes: -555.39


 13%|█████▏                                  | 648/5000 [08:12<35:59,  2.02it/s]
  0%|                                                   | 0/100 [00:00<?, ?it/s][A
  3%|█▎                                         | 3/100 [00:00<00:04, 21.46it/s][A
  7%|███                                        | 7/100 [00:00<00:03, 25.67it/s][A
 10%|████▏                                     | 10/100 [00:00<00:03, 23.69it/s][A
 13%|█████▍                                    | 13/100 [00:00<00:03, 22.76it/s][A
 16%|██████▋                                   | 16/100 [00:00<00:04, 18.95it/s][A
 18%|███████▌                                  | 18/100 [00:00<00:04, 17.56it/s][A
 21%|████████▊                                 | 21/100 [00:01<00:04, 18.73it/s][A
 23%|█████████▋                                | 23/100 [00:01<00:04, 17.40it/s][A
 28%|███████████▊                              | 28/100 [00:01<00:03, 22.84it/s][A
 33%|█████████████▊                            | 33/100 [00:01<00:02, 26.57it/s

Result after 100 episodes:
Average timesteps per episodes: 624.36
Average penalty per episodes: 0.0
Average reward per episodes: -616.38


 14%|█████▎                                | 699/5000 [08:37<1:01:39,  1.16it/s]
  0%|                                                   | 0/100 [00:00<?, ?it/s][A
  8%|███▍                                       | 8/100 [00:00<00:01, 53.24it/s][A
 14%|█████▉                                    | 14/100 [00:00<00:02, 34.75it/s][A
 18%|███████▌                                  | 18/100 [00:00<00:02, 31.96it/s][A
 23%|█████████▋                                | 23/100 [00:00<00:02, 33.17it/s][A
 29%|████████████▏                             | 29/100 [00:00<00:01, 36.09it/s][A
 35%|██████████████▋                           | 35/100 [00:00<00:01, 37.57it/s][A
 42%|█████████████████▋                        | 42/100 [00:01<00:01, 40.69it/s][A
 47%|███████████████████▋                      | 47/100 [00:01<00:01, 38.83it/s][A
 51%|█████████████████████▍                    | 51/100 [00:01<00:01, 27.62it/s][A
 57%|███████████████████████▉                  | 57/100 [00:01<00:01, 29.96it/s

Result after 100 episodes:
Average timesteps per episodes: 426.78
Average penalty per episodes: 0.0
Average reward per episodes: -414.6


 15%|█████▉                                  | 749/5000 [09:09<49:18,  1.44it/s]
  0%|                                                   | 0/100 [00:00<?, ?it/s][A
  8%|███▍                                       | 8/100 [00:00<00:01, 53.06it/s][A
 14%|█████▉                                    | 14/100 [00:00<00:01, 45.48it/s][A
 19%|███████▉                                  | 19/100 [00:00<00:01, 40.58it/s][A
 24%|██████████                                | 24/100 [00:00<00:02, 37.04it/s][A
 28%|███████████▊                              | 28/100 [00:00<00:02, 28.74it/s][A
 35%|██████████████▋                           | 35/100 [00:00<00:01, 33.98it/s][A
 39%|████████████████▍                         | 39/100 [00:01<00:01, 31.92it/s][A
 43%|██████████████████                        | 43/100 [00:01<00:01, 30.84it/s][A
 49%|████████████████████▌                     | 49/100 [00:01<00:01, 32.52it/s][A
 55%|███████████████████████                   | 55/100 [00:01<00:01, 34.72it/s

Result after 100 episodes:
Average timesteps per episodes: 407.47
Average penalty per episodes: 0.0
Average reward per episodes: -394.87


 16%|██████                                | 796/5000 [09:37<1:02:11,  1.13it/s]
  0%|                                                   | 0/100 [00:00<?, ?it/s][A
  5%|██▏                                        | 5/100 [00:00<00:02, 35.64it/s][A
  9%|███▊                                       | 9/100 [00:00<00:03, 24.27it/s][A
 14%|█████▉                                    | 14/100 [00:00<00:02, 28.71it/s][A
 18%|███████▌                                  | 18/100 [00:00<00:02, 28.55it/s][A
 25%|██████████▌                               | 25/100 [00:00<00:02, 35.43it/s][A
 29%|████████████▏                             | 29/100 [00:00<00:02, 33.24it/s][A
 34%|██████████████▎                           | 34/100 [00:01<00:01, 33.85it/s][A
 39%|████████████████▍                         | 39/100 [00:01<00:01, 34.26it/s][A
 43%|██████████████████                        | 43/100 [00:01<00:01, 32.46it/s][A
 48%|████████████████████▏                     | 48/100 [00:01<00:01, 33.13it/s

Result after 100 episodes:
Average timesteps per episodes: 466.29
Average penalty per episodes: 0.0
Average reward per episodes: -454.95


 17%|██████▊                                 | 847/5000 [09:59<27:24,  2.52it/s]
  0%|                                                   | 0/100 [00:00<?, ?it/s][A
  4%|█▋                                         | 4/100 [00:00<00:03, 28.38it/s][A
  7%|███                                        | 7/100 [00:00<00:03, 24.20it/s][A
 15%|██████▎                                   | 15/100 [00:00<00:02, 38.60it/s][A
 19%|███████▉                                  | 19/100 [00:00<00:02, 29.07it/s][A
 23%|█████████▋                                | 23/100 [00:00<00:02, 28.84it/s][A
 31%|█████████████                             | 31/100 [00:00<00:01, 37.40it/s][A
 35%|██████████████▋                           | 35/100 [00:01<00:01, 34.69it/s][A
 41%|█████████████████▏                        | 41/100 [00:01<00:01, 36.96it/s][A
 48%|████████████████████▏                     | 48/100 [00:01<00:01, 40.68it/s][A
 53%|██████████████████████▎                   | 53/100 [00:01<00:01, 39.05it/s

Result after 100 episodes:
Average timesteps per episodes: 416.95
Average penalty per episodes: 0.0
Average reward per episodes: -404.56


 18%|███████▏                                | 899/5000 [10:24<53:59,  1.27it/s]
  0%|                                                   | 0/100 [00:00<?, ?it/s][A
  6%|██▌                                        | 6/100 [00:00<00:02, 42.61it/s][A
 15%|██████▎                                   | 15/100 [00:00<00:01, 50.07it/s][A
 20%|████████▍                                 | 20/100 [00:00<00:01, 43.02it/s][A
 28%|███████████▊                              | 28/100 [00:00<00:01, 45.77it/s][A
 33%|█████████████▊                            | 33/100 [00:00<00:01, 35.18it/s][A
 37%|███████████████▌                          | 37/100 [00:01<00:02, 29.28it/s][A
 44%|██████████████████▍                       | 44/100 [00:01<00:01, 34.61it/s][A
 48%|████████████████████▏                     | 48/100 [00:01<00:01, 29.02it/s][A
 53%|██████████████████████▎                   | 53/100 [00:01<00:01, 30.66it/s][A
 61%|█████████████████████████▌                | 61/100 [00:01<00:01, 37.12it/s

Result after 100 episodes:
Average timesteps per episodes: 446.76
Average penalty per episodes: 0.0
Average reward per episodes: -435.0


 19%|███████▌                                | 947/5000 [10:47<07:11,  9.39it/s]
  0%|                                                   | 0/100 [00:00<?, ?it/s][A
  4%|█▋                                         | 4/100 [00:00<00:03, 28.59it/s][A
  9%|███▊                                       | 9/100 [00:00<00:02, 32.33it/s][A
 18%|███████▌                                  | 18/100 [00:00<00:01, 46.18it/s][A
 24%|██████████                                | 24/100 [00:00<00:01, 44.52it/s][A
 30%|████████████▌                             | 30/100 [00:00<00:01, 43.48it/s][A
 35%|██████████████▋                           | 35/100 [00:00<00:01, 40.60it/s][A
 41%|█████████████████▏                        | 41/100 [00:01<00:01, 41.00it/s][A
 46%|███████████████████▎                      | 46/100 [00:01<00:01, 33.92it/s][A
 50%|█████████████████████                     | 50/100 [00:01<00:01, 32.31it/s][A
 54%|██████████████████████▋                   | 54/100 [00:01<00:01, 27.45it/s

Result after 100 episodes:
Average timesteps per episodes: 446.77
Average penalty per episodes: 0.0
Average reward per episodes: -435.01


 20%|███████▉                                | 997/5000 [10:58<05:06, 13.06it/s]
  0%|                                                   | 0/100 [00:00<?, ?it/s][A
  4%|█▋                                         | 4/100 [00:00<00:03, 28.30it/s][A
  8%|███▍                                       | 8/100 [00:00<00:03, 28.20it/s][A
 14%|█████▉                                    | 14/100 [00:00<00:02, 34.28it/s][A
 18%|███████▌                                  | 18/100 [00:00<00:02, 30.78it/s][A
 23%|█████████▋                                | 23/100 [00:00<00:02, 32.07it/s][A
 27%|███████████▎                              | 27/100 [00:00<00:02, 30.57it/s][A
 32%|█████████████▍                            | 32/100 [00:01<00:02, 31.84it/s][A
 36%|███████████████                           | 36/100 [00:01<00:02, 30.75it/s][A
 40%|████████████████▊                         | 40/100 [00:01<00:02, 29.96it/s][A
 44%|██████████████████▍                       | 44/100 [00:01<00:01, 29.42it/s

Result after 100 episodes:
Average timesteps per episodes: 446.65
Average penalty per episodes: 0.0
Average reward per episodes: -434.89


 21%|████████▏                              | 1049/5000 [11:11<08:08,  8.09it/s]
  0%|                                                   | 0/100 [00:00<?, ?it/s][A
  5%|██▏                                        | 5/100 [00:00<00:02, 35.68it/s][A
  9%|███▊                                       | 9/100 [00:00<00:02, 31.37it/s][A
 13%|█████▍                                    | 13/100 [00:00<00:02, 29.95it/s][A
 16%|██████▋                                   | 16/100 [00:00<00:03, 21.95it/s][A
 24%|██████████                                | 24/100 [00:00<00:02, 32.73it/s][A
 28%|███████████▊                              | 28/100 [00:00<00:02, 31.39it/s][A
 32%|█████████████▍                            | 32/100 [00:01<00:02, 26.54it/s][A
 35%|██████████████▋                           | 35/100 [00:01<00:02, 25.09it/s][A
 38%|███████████████▉                          | 38/100 [00:01<00:02, 24.04it/s][A
 45%|██████████████████▉                       | 45/100 [00:01<00:01, 31.30it/s

Result after 100 episodes:
Average timesteps per episodes: 436.71
Average penalty per episodes: 0.0
Average reward per episodes: -424.74


 22%|████████▌                              | 1098/5000 [11:24<14:17,  4.55it/s]
  0%|                                                   | 0/100 [00:00<?, ?it/s][A
  2%|▊                                          | 2/100 [00:00<00:06, 14.49it/s][A
  6%|██▌                                        | 6/100 [00:00<00:04, 21.49it/s][A
 12%|█████                                     | 12/100 [00:00<00:02, 30.36it/s][A
 18%|███████▌                                  | 18/100 [00:00<00:02, 34.66it/s][A
 25%|██████████▌                               | 25/100 [00:00<00:01, 39.59it/s][A
 29%|████████████▏                             | 29/100 [00:00<00:01, 35.90it/s][A
 49%|████████████████████▌                     | 49/100 [00:01<00:00, 67.56it/s][A
 56%|███████████████████████▌                  | 56/100 [00:01<00:00, 61.19it/s][A
 71%|█████████████████████████████▊            | 71/100 [00:01<00:00, 73.03it/s][A
 79%|█████████████████████████████████▏        | 79/100 [00:01<00:00, 47.27it/s

Result after 100 episodes:
Average timesteps per episodes: 357.81
Average penalty per episodes: 0.0
Average reward per episodes: -344.16


 23%|████████▉                              | 1146/5000 [11:32<07:57,  8.07it/s]
  0%|                                                   | 0/100 [00:00<?, ?it/s][A
 12%|█████                                     | 12/100 [00:00<00:01, 79.89it/s][A
 20%|████████▍                                 | 20/100 [00:00<00:01, 51.07it/s][A
 32%|█████████████▍                            | 32/100 [00:00<00:01, 63.08it/s][A
 39%|████████████████▍                         | 39/100 [00:00<00:01, 43.35it/s][A
 45%|██████████████████▉                       | 45/100 [00:01<00:01, 33.55it/s][A
 53%|██████████████████████▎                   | 53/100 [00:01<00:01, 38.41it/s][A
 58%|████████████████████████▎                 | 58/100 [00:01<00:01, 33.72it/s][A
 62%|██████████████████████████                | 62/100 [00:01<00:01, 32.41it/s][A
 66%|███████████████████████████▋              | 66/100 [00:01<00:01, 31.16it/s][A
 70%|█████████████████████████████▍            | 70/100 [00:01<00:01, 26.67it/s

Result after 100 episodes:
Average timesteps per episodes: 387.4
Average penalty per episodes: 0.0
Average reward per episodes: -374.38


 24%|█████████▎                             | 1198/5000 [11:46<09:43,  6.52it/s]
  0%|                                                   | 0/100 [00:00<?, ?it/s][A
  3%|█▎                                         | 3/100 [00:00<00:05, 18.63it/s][A
  7%|███                                        | 7/100 [00:00<00:04, 22.04it/s][A
 10%|████▏                                     | 10/100 [00:00<00:04, 19.82it/s][A
 12%|█████                                     | 12/100 [00:00<00:06, 13.88it/s][A
 15%|██████▎                                   | 15/100 [00:00<00:04, 17.39it/s][A
 22%|█████████▏                                | 22/100 [00:01<00:02, 26.60it/s][A
 28%|███████████▊                              | 28/100 [00:01<00:02, 31.04it/s][A
 35%|██████████████▋                           | 35/100 [00:01<00:01, 36.31it/s][A
 39%|████████████████▍                         | 39/100 [00:01<00:01, 32.96it/s][A
 44%|██████████████████▍                       | 44/100 [00:01<00:01, 32.55it/s

Result after 100 episodes:
Average timesteps per episodes: 416.86
Average penalty per episodes: 0.0
Average reward per episodes: -404.47


 25%|█████████▋                             | 1249/5000 [11:59<29:26,  2.12it/s]
  0%|                                                   | 0/100 [00:00<?, ?it/s][A
  4%|█▋                                         | 4/100 [00:00<00:03, 26.57it/s][A
  7%|███                                        | 7/100 [00:00<00:03, 27.71it/s][A
 15%|██████▎                                   | 15/100 [00:00<00:01, 44.50it/s][A
 20%|████████▍                                 | 20/100 [00:00<00:02, 32.96it/s][A
 24%|██████████                                | 24/100 [00:00<00:02, 27.04it/s][A
 32%|█████████████▍                            | 32/100 [00:00<00:01, 35.07it/s][A
 36%|███████████████                           | 36/100 [00:01<00:01, 32.78it/s][A
 45%|██████████████████▉                       | 45/100 [00:01<00:01, 41.31it/s][A
 50%|█████████████████████                     | 50/100 [00:01<00:01, 39.62it/s][A
 57%|███████████████████████▉                  | 57/100 [00:01<00:01, 42.02it/s

Result after 100 episodes:
Average timesteps per episodes: 357.69
Average penalty per episodes: 0.0
Average reward per episodes: -344.04


 26%|██████████▏                            | 1299/5000 [12:08<04:53, 12.62it/s]
  0%|                                                   | 0/100 [00:00<?, ?it/s][A
  3%|█▎                                         | 3/100 [00:00<00:04, 21.01it/s][A
  7%|███                                        | 7/100 [00:00<00:03, 24.37it/s][A
 14%|█████▉                                    | 14/100 [00:00<00:02, 33.22it/s][A
 21%|████████▊                                 | 21/100 [00:00<00:02, 37.48it/s][A
 29%|████████████▏                             | 29/100 [00:00<00:01, 43.59it/s][A
 35%|██████████████▋                           | 35/100 [00:00<00:01, 43.04it/s][A
 40%|████████████████▊                         | 40/100 [00:01<00:01, 35.08it/s][A
 44%|██████████████████▍                       | 44/100 [00:01<00:01, 28.83it/s][A
 48%|████████████████████▏                     | 48/100 [00:01<00:02, 22.86it/s][A
 51%|█████████████████████▍                    | 51/100 [00:01<00:02, 22.51it/s

Result after 100 episodes:
Average timesteps per episodes: 436.78
Average penalty per episodes: 0.0
Average reward per episodes: -424.81


 27%|██████████▌                            | 1349/5000 [12:25<16:43,  3.64it/s]
  0%|                                                   | 0/100 [00:00<?, ?it/s][A
  8%|███▍                                       | 8/100 [00:00<00:01, 55.85it/s][A
 14%|█████▉                                    | 14/100 [00:00<00:02, 30.64it/s][A
 20%|████████▍                                 | 20/100 [00:00<00:02, 34.64it/s][A
 24%|██████████                                | 24/100 [00:00<00:02, 28.08it/s][A
 35%|██████████████▋                           | 35/100 [00:00<00:01, 41.97it/s][A
 40%|████████████████▊                         | 40/100 [00:01<00:01, 31.22it/s][A
 51%|█████████████████████▍                    | 51/100 [00:01<00:01, 42.22it/s][A
 56%|███████████████████████▌                  | 56/100 [00:01<00:01, 40.30it/s][A
 61%|█████████████████████████▌                | 61/100 [00:01<00:01, 38.82it/s][A
 66%|███████████████████████████▋              | 66/100 [00:01<00:00, 37.74it/s

Result after 100 episodes:
Average timesteps per episodes: 387.49
Average penalty per episodes: 0.0
Average reward per episodes: -374.47


 28%|██████████▉                            | 1398/5000 [12:35<11:42,  5.13it/s]
  0%|                                                   | 0/100 [00:00<?, ?it/s][A
  3%|█▎                                         | 3/100 [00:00<00:05, 18.38it/s][A
 14%|█████▉                                    | 14/100 [00:00<00:01, 49.41it/s][A
 20%|████████▍                                 | 20/100 [00:00<00:01, 46.02it/s][A
 27%|███████████▎                              | 27/100 [00:00<00:01, 46.91it/s][A
 32%|█████████████▍                            | 32/100 [00:00<00:01, 42.80it/s][A
 37%|███████████████▌                          | 37/100 [00:00<00:01, 40.16it/s][A
 50%|█████████████████████                     | 50/100 [00:01<00:00, 55.37it/s][A
 58%|████████████████████████▎                 | 58/100 [00:01<00:00, 55.23it/s][A
 64%|██████████████████████████▉               | 64/100 [00:01<00:00, 44.56it/s][A
 75%|███████████████████████████████▌          | 75/100 [00:01<00:00, 47.47it/s

Result after 100 episodes:
Average timesteps per episodes: 348.36
Average penalty per episodes: 0.0
Average reward per episodes: -334.5


 29%|███████████▎                           | 1449/5000 [12:46<05:37, 10.51it/s]
  0%|                                                   | 0/100 [00:00<?, ?it/s][A
  8%|███▍                                       | 8/100 [00:00<00:01, 51.56it/s][A
 14%|█████▉                                    | 14/100 [00:00<00:02, 42.64it/s][A
 19%|███████▉                                  | 19/100 [00:00<00:02, 31.05it/s][A
 23%|█████████▋                                | 23/100 [00:00<00:02, 30.21it/s][A
 29%|████████████▏                             | 29/100 [00:00<00:02, 34.11it/s][A
 33%|█████████████▊                            | 33/100 [00:00<00:02, 32.35it/s][A
 37%|███████████████▌                          | 37/100 [00:01<00:02, 27.09it/s][A
 41%|█████████████████▏                        | 41/100 [00:01<00:02, 27.51it/s][A
 49%|████████████████████▌                     | 49/100 [00:01<00:01, 35.30it/s][A
 53%|██████████████████████▎                   | 53/100 [00:01<00:01, 29.04it/s

Result after 100 episodes:
Average timesteps per episodes: 406.95
Average penalty per episodes: 0.0
Average reward per episodes: -394.35


 30%|███████████▋                           | 1499/5000 [12:55<07:22,  7.91it/s]
  0%|                                                   | 0/100 [00:00<?, ?it/s][A
  5%|██▏                                        | 5/100 [00:00<00:02, 35.39it/s][A
 10%|████▏                                     | 10/100 [00:00<00:02, 35.34it/s][A
 14%|█████▉                                    | 14/100 [00:00<00:02, 32.23it/s][A
 19%|███████▉                                  | 19/100 [00:00<00:02, 33.41it/s][A
 23%|█████████▋                                | 23/100 [00:00<00:02, 31.56it/s][A
 27%|███████████▎                              | 27/100 [00:00<00:02, 30.50it/s][A
 31%|█████████████                             | 31/100 [00:00<00:02, 29.78it/s][A
 34%|██████████████▎                           | 34/100 [00:01<00:02, 27.05it/s][A
 38%|███████████████▉                          | 38/100 [00:01<00:02, 27.43it/s][A
 42%|█████████████████▋                        | 42/100 [00:01<00:02, 27.74it/s

Result after 100 episodes:
Average timesteps per episodes: 515.6
Average penalty per episodes: 0.0
Average reward per episodes: -505.31


 31%|████████████                           | 1549/5000 [13:03<05:27, 10.54it/s]
  0%|                                                   | 0/100 [00:00<?, ?it/s][A
  1%|▍                                          | 1/100 [00:00<00:15,  6.45it/s][A
  4%|█▋                                         | 4/100 [00:00<00:06, 14.49it/s][A
  9%|███▊                                       | 9/100 [00:00<00:04, 19.61it/s][A
 11%|████▌                                     | 11/100 [00:00<00:05, 17.15it/s][A
 14%|█████▉                                    | 14/100 [00:00<00:04, 18.37it/s][A
 16%|██████▋                                   | 16/100 [00:00<00:04, 17.09it/s][A
 18%|███████▌                                  | 18/100 [00:01<00:05, 16.20it/s][A
 21%|████████▊                                 | 21/100 [00:01<00:04, 17.73it/s][A
 23%|█████████▋                                | 23/100 [00:01<00:04, 16.68it/s][A
 25%|██████████▌                               | 25/100 [00:01<00:04, 15.86it/s

Result after 100 episodes:
Average timesteps per episodes: 762.49
Average penalty per episodes: 0.0
Average reward per episodes: -757.45


 32%|████████████▍                          | 1599/5000 [13:16<03:03, 18.57it/s]
  0%|                                                   | 0/100 [00:00<?, ?it/s][A
 11%|████▌                                     | 11/100 [00:00<00:01, 75.35it/s][A
 32%|█████████████                            | 32/100 [00:00<00:00, 109.82it/s][A
 43%|██████████████████                        | 43/100 [00:00<00:00, 94.22it/s][A
 53%|██████████████████████▎                   | 53/100 [00:00<00:00, 62.13it/s][A
 61%|█████████████████████████▌                | 61/100 [00:00<00:00, 52.92it/s][A
 68%|████████████████████████████▌             | 68/100 [00:01<00:00, 51.72it/s][A
 74%|███████████████████████████████           | 74/100 [00:01<00:00, 42.96it/s][A
 79%|█████████████████████████████████▏        | 79/100 [00:01<00:00, 40.94it/s][A
 84%|███████████████████████████████████▎      | 84/100 [00:01<00:00, 39.38it/s][A
 89%|█████████████████████████████████████▍    | 89/100 [00:01<00:00, 38.13it/s

Result after 100 episodes:
Average timesteps per episodes: 308.58
Average penalty per episodes: 0.0
Average reward per episodes: -293.88


 33%|████████████▊                          | 1648/5000 [13:25<04:33, 12.26it/s]
  0%|                                                   | 0/100 [00:00<?, ?it/s][A
  2%|▊                                          | 2/100 [00:00<00:06, 14.53it/s][A
 14%|█████▉                                    | 14/100 [00:00<00:01, 55.16it/s][A
 20%|████████▍                                 | 20/100 [00:00<00:01, 49.02it/s][A
 25%|██████████▌                               | 25/100 [00:00<00:01, 43.25it/s][A
 30%|████████████▌                             | 30/100 [00:00<00:02, 34.51it/s][A
 36%|███████████████                           | 36/100 [00:00<00:01, 36.85it/s][A
 41%|█████████████████▏                        | 41/100 [00:01<00:01, 36.36it/s][A
 45%|██████████████████▉                       | 45/100 [00:01<00:01, 29.45it/s][A
 51%|█████████████████████▍                    | 51/100 [00:01<00:01, 32.85it/s][A
 58%|████████████████████████▎                 | 58/100 [00:01<00:01, 37.40it/s

Result after 100 episodes:
Average timesteps per episodes: 407.0
Average penalty per episodes: 0.0
Average reward per episodes: -394.4


 34%|█████████████▎                         | 1699/5000 [13:33<04:27, 12.36it/s]
  0%|                                                   | 0/100 [00:00<?, ?it/s][A
  5%|██▏                                        | 5/100 [00:00<00:02, 35.69it/s][A
 15%|██████▎                                   | 15/100 [00:00<00:01, 54.63it/s][A
 23%|█████████▋                                | 23/100 [00:00<00:01, 54.61it/s][A
 29%|████████████▏                             | 29/100 [00:00<00:02, 31.72it/s][A
 37%|███████████████▌                          | 37/100 [00:00<00:01, 37.87it/s][A
 42%|█████████████████▋                        | 42/100 [00:01<00:01, 32.83it/s][A
 50%|█████████████████████                     | 50/100 [00:01<00:01, 38.52it/s][A
 55%|███████████████████████                   | 55/100 [00:01<00:01, 37.61it/s][A
 60%|█████████████████████████▏                | 60/100 [00:01<00:01, 36.96it/s][A
 65%|███████████████████████████▎              | 65/100 [00:01<00:00, 36.42it/s

Result after 100 episodes:
Average timesteps per episodes: 426.74
Average penalty per episodes: 0.0
Average reward per episodes: -414.56


 35%|█████████████▋                         | 1749/5000 [13:41<06:02,  8.97it/s]
  0%|                                                   | 0/100 [00:00<?, ?it/s][A
  6%|██▌                                        | 6/100 [00:00<00:02, 41.74it/s][A
 11%|████▌                                     | 11/100 [00:00<00:02, 37.75it/s][A
 15%|██████▎                                   | 15/100 [00:00<00:02, 33.52it/s][A
 19%|███████▉                                  | 19/100 [00:00<00:03, 26.45it/s][A
 25%|██████████▌                               | 25/100 [00:00<00:02, 31.42it/s][A
 29%|████████████▏                             | 29/100 [00:00<00:02, 26.38it/s][A
 35%|██████████████▋                           | 35/100 [00:01<00:02, 30.83it/s][A
 39%|████████████████▍                         | 39/100 [00:01<00:02, 30.13it/s][A
 46%|███████████████████▎                      | 46/100 [00:01<00:01, 35.53it/s][A
 50%|█████████████████████                     | 50/100 [00:01<00:01, 29.34it/s

Result after 100 episodes:
Average timesteps per episodes: 407.2
Average penalty per episodes: 0.0
Average reward per episodes: -394.6


 36%|██████████████                         | 1797/5000 [13:48<03:10, 16.81it/s]
  0%|                                                   | 0/100 [00:00<?, ?it/s][A
  3%|█▎                                         | 3/100 [00:00<00:03, 28.31it/s][A
  7%|███                                        | 7/100 [00:00<00:03, 27.72it/s][A
 17%|███████▏                                  | 17/100 [00:00<00:01, 46.93it/s][A
 22%|█████████▏                                | 22/100 [00:00<00:02, 35.49it/s][A
 32%|█████████████▍                            | 32/100 [00:00<00:01, 46.36it/s][A
 38%|███████████████▉                          | 38/100 [00:00<00:01, 44.84it/s][A
 44%|██████████████████▍                       | 44/100 [00:01<00:01, 43.94it/s][A
 49%|████████████████████▌                     | 49/100 [00:01<00:01, 36.05it/s][A
 54%|██████████████████████▋                   | 54/100 [00:01<00:01, 35.92it/s][A
 58%|████████████████████████▎                 | 58/100 [00:01<00:01, 33.84it/s

Result after 100 episodes:
Average timesteps per episodes: 357.81
Average penalty per episodes: 0.0
Average reward per episodes: -344.16


 37%|██████████████▍                        | 1848/5000 [13:56<05:32,  9.49it/s]
  0%|                                                   | 0/100 [00:00<?, ?it/s][A
  4%|█▋                                         | 4/100 [00:00<00:03, 28.51it/s][A
  7%|███                                        | 7/100 [00:00<00:04, 18.98it/s][A
 17%|███████▏                                  | 17/100 [00:00<00:02, 39.23it/s][A
 25%|██████████▌                               | 25/100 [00:00<00:01, 45.27it/s][A
 30%|████████████▌                             | 30/100 [00:00<00:01, 41.92it/s][A
 44%|██████████████████▍                       | 44/100 [00:00<00:00, 59.58it/s][A
 51%|█████████████████████▍                    | 51/100 [00:01<00:01, 35.13it/s][A
 56%|███████████████████████▌                  | 56/100 [00:01<00:01, 35.15it/s][A
 64%|██████████████████████████▉               | 64/100 [00:01<00:00, 40.10it/s][A
 71%|█████████████████████████████▊            | 71/100 [00:01<00:00, 42.31it/s

Result after 100 episodes:
Average timesteps per episodes: 327.81
Average penalty per episodes: 0.0
Average reward per episodes: -313.53


 38%|██████████████▊                        | 1897/5000 [14:05<10:02,  5.15it/s]
  0%|                                                   | 0/100 [00:00<?, ?it/s][A
  6%|██▌                                        | 6/100 [00:00<00:02, 42.24it/s][A
 18%|███████▌                                  | 18/100 [00:00<00:01, 65.79it/s][A
 31%|█████████████                             | 31/100 [00:00<00:00, 75.91it/s][A
 39%|████████████████▍                         | 39/100 [00:00<00:00, 67.81it/s][A
 46%|███████████████████▎                      | 46/100 [00:00<00:01, 52.21it/s][A
 58%|████████████████████████▎                 | 58/100 [00:00<00:00, 61.31it/s][A
 65%|███████████████████████████▎              | 65/100 [00:01<00:00, 57.55it/s][A
 71%|█████████████████████████████▊            | 71/100 [00:01<00:00, 46.33it/s][A
 76%|███████████████████████████████▉          | 76/100 [00:01<00:00, 43.35it/s][A
 87%|████████████████████████████████████▌     | 87/100 [00:01<00:00, 52.48it/s

Result after 100 episodes:
Average timesteps per episodes: 278.8
Average penalty per episodes: 0.0
Average reward per episodes: -263.47


 39%|███████████████▏                       | 1948/5000 [14:15<10:06,  5.03it/s]
  0%|                                                   | 0/100 [00:00<?, ?it/s][A
  3%|█▎                                         | 3/100 [00:00<00:04, 21.32it/s][A
 10%|████▏                                     | 10/100 [00:00<00:02, 36.59it/s][A
 14%|█████▉                                    | 14/100 [00:00<00:02, 37.70it/s][A
 19%|███████▉                                  | 19/100 [00:00<00:02, 36.08it/s][A
 24%|██████████                                | 24/100 [00:00<00:02, 35.77it/s][A
 30%|████████████▌                             | 30/100 [00:00<00:01, 37.90it/s][A
 37%|███████████████▌                          | 37/100 [00:00<00:01, 41.58it/s][A
 42%|█████████████████▋                        | 42/100 [00:01<00:01, 39.51it/s][A
 49%|████████████████████▌                     | 49/100 [00:01<00:01, 42.48it/s][A
 54%|██████████████████████▋                   | 54/100 [00:01<00:01, 35.20it/s

Result after 100 episodes:
Average timesteps per episodes: 417.16
Average penalty per episodes: 0.0
Average reward per episodes: -404.77


 40%|███████████████▌                       | 1999/5000 [14:25<04:32, 11.03it/s]
  0%|                                                   | 0/100 [00:00<?, ?it/s][A
 14%|█████▉                                    | 14/100 [00:00<00:00, 93.51it/s][A
 24%|██████████                                | 24/100 [00:00<00:01, 49.77it/s][A
 30%|████████████▌                             | 30/100 [00:00<00:01, 46.74it/s][A
 36%|███████████████                           | 36/100 [00:00<00:01, 37.57it/s][A
 41%|█████████████████▏                        | 41/100 [00:01<00:01, 32.30it/s][A
 45%|██████████████████▉                       | 45/100 [00:01<00:01, 31.04it/s][A
 49%|████████████████████▌                     | 49/100 [00:01<00:01, 30.10it/s][A
 53%|██████████████████████▎                   | 53/100 [00:01<00:01, 31.98it/s][A
 57%|███████████████████████▉                  | 57/100 [00:01<00:01, 30.37it/s][A
 61%|█████████████████████████▌                | 61/100 [00:01<00:01, 29.44it/s

Result after 100 episodes:
Average timesteps per episodes: 407.16
Average penalty per episodes: 0.0
Average reward per episodes: -394.56


 41%|███████████████▉                       | 2047/5000 [14:37<10:45,  4.58it/s]
  0%|                                                   | 0/100 [00:00<?, ?it/s][A
  2%|▊                                          | 2/100 [00:00<00:06, 14.03it/s][A
  6%|██▌                                        | 6/100 [00:00<00:04, 22.04it/s][A
 10%|████▏                                     | 10/100 [00:00<00:03, 24.89it/s][A
 14%|█████▉                                    | 14/100 [00:00<00:03, 26.09it/s][A
 24%|██████████                                | 24/100 [00:00<00:01, 41.36it/s][A
 33%|█████████████▊                            | 33/100 [00:00<00:01, 47.37it/s][A
 38%|███████████████▉                          | 38/100 [00:01<00:01, 42.92it/s][A
 43%|██████████████████                        | 43/100 [00:01<00:01, 31.18it/s][A
 47%|███████████████████▋                      | 47/100 [00:01<00:01, 26.95it/s][A
 51%|█████████████████████▍                    | 51/100 [00:01<00:01, 27.11it/s

Result after 100 episodes:
Average timesteps per episodes: 427.07
Average penalty per episodes: 0.0
Average reward per episodes: -414.89


 42%|████████████████▎                      | 2099/5000 [14:53<14:01,  3.45it/s]
  0%|                                                   | 0/100 [00:00<?, ?it/s][A
  4%|█▋                                         | 4/100 [00:00<00:03, 28.24it/s][A
 17%|███████▏                                  | 17/100 [00:00<00:01, 62.43it/s][A
 24%|██████████                                | 24/100 [00:00<00:01, 55.67it/s][A
 30%|████████████▌                             | 30/100 [00:00<00:01, 42.35it/s][A
 45%|██████████████████▉                       | 45/100 [00:00<00:00, 61.20it/s][A
 52%|█████████████████████▊                    | 52/100 [00:01<00:00, 49.55it/s][A
 58%|████████████████████████▎                 | 58/100 [00:01<00:00, 47.14it/s][A
 63%|██████████████████████████▍               | 63/100 [00:01<00:00, 38.62it/s][A
 68%|████████████████████████████▌             | 68/100 [00:01<00:00, 37.67it/s][A
 72%|██████████████████████████████▏           | 72/100 [00:01<00:00, 33.95it/s

Result after 100 episodes:
Average timesteps per episodes: 328.45
Average penalty per episodes: 0.0
Average reward per episodes: -314.17


 43%|████████████████▋                      | 2147/5000 [14:59<02:53, 16.40it/s]
  0%|                                                   | 0/100 [00:00<?, ?it/s][A
  3%|█▎                                         | 3/100 [00:00<00:04, 19.74it/s][A
  5%|██▏                                        | 5/100 [00:00<00:05, 16.46it/s][A
 11%|████▌                                     | 11/100 [00:00<00:03, 26.57it/s][A
 14%|█████▉                                    | 14/100 [00:00<00:04, 19.34it/s][A
 17%|███████▏                                  | 17/100 [00:00<00:04, 17.33it/s][A
 25%|██████████▌                               | 25/100 [00:01<00:02, 27.80it/s][A
 29%|████████████▏                             | 29/100 [00:01<00:02, 27.86it/s][A
 34%|██████████████▎                           | 34/100 [00:01<00:02, 29.56it/s][A
 39%|████████████████▍                         | 39/100 [00:01<00:01, 30.79it/s][A
 50%|█████████████████████                     | 50/100 [00:01<00:01, 43.21it/s

Result after 100 episodes:
Average timesteps per episodes: 416.84
Average penalty per episodes: 0.0
Average reward per episodes: -404.45


 44%|█████████████████▏                     | 2199/5000 [15:06<03:10, 14.70it/s]
  0%|                                                   | 0/100 [00:00<?, ?it/s][A
  4%|█▋                                         | 4/100 [00:00<00:03, 28.31it/s][A
  7%|███                                        | 7/100 [00:00<00:03, 23.99it/s][A
 10%|████▏                                     | 10/100 [00:00<00:03, 22.63it/s][A
 13%|█████▍                                    | 13/100 [00:00<00:03, 22.03it/s][A
 18%|███████▌                                  | 18/100 [00:00<00:03, 26.45it/s][A
 21%|████████▊                                 | 21/100 [00:00<00:03, 24.79it/s][A
 26%|██████████▉                               | 26/100 [00:01<00:02, 28.03it/s][A
 31%|█████████████                             | 31/100 [00:01<00:02, 30.13it/s][A
 35%|██████████████▋                           | 35/100 [00:01<00:02, 25.57it/s][A
 42%|█████████████████▋                        | 42/100 [00:01<00:01, 32.07it/s

Result after 100 episodes:
Average timesteps per episodes: 456.53
Average penalty per episodes: 0.0
Average reward per episodes: -444.98


 45%|█████████████████▌                     | 2249/5000 [15:18<02:43, 16.79it/s]
  0%|                                                   | 0/100 [00:00<?, ?it/s][A
  9%|███▊                                       | 9/100 [00:00<00:01, 62.09it/s][A
 16%|██████▋                                   | 16/100 [00:00<00:01, 53.79it/s][A
 22%|█████████▏                                | 22/100 [00:00<00:01, 39.21it/s][A
 27%|███████████▎                              | 27/100 [00:00<00:02, 34.86it/s][A
 31%|█████████████                             | 31/100 [00:00<00:02, 28.47it/s][A
 36%|███████████████                           | 36/100 [00:01<00:02, 30.35it/s][A
 40%|████████████████▊                         | 40/100 [00:01<00:02, 29.76it/s][A
 44%|██████████████████▍                       | 44/100 [00:01<00:01, 29.16it/s][A
 49%|████████████████████▌                     | 49/100 [00:01<00:01, 30.92it/s][A
 53%|██████████████████████▎                   | 53/100 [00:01<00:01, 30.09it/s

Result after 100 episodes:
Average timesteps per episodes: 387.66
Average penalty per episodes: 0.0
Average reward per episodes: -374.64


 46%|█████████████████▉                     | 2298/5000 [15:29<03:19, 13.57it/s]
  0%|                                                   | 0/100 [00:00<?, ?it/s][A
  3%|█▎                                         | 3/100 [00:00<00:04, 21.22it/s][A
 12%|█████                                     | 12/100 [00:00<00:01, 45.30it/s][A
 17%|███████▏                                  | 17/100 [00:00<00:02, 40.34it/s][A
 22%|█████████▏                                | 22/100 [00:00<00:02, 36.60it/s][A
 26%|██████████▉                               | 26/100 [00:00<00:02, 33.78it/s][A
 31%|█████████████                             | 31/100 [00:00<00:02, 34.32it/s][A
 42%|█████████████████▋                        | 42/100 [00:01<00:01, 47.68it/s][A
 56%|███████████████████████▌                  | 56/100 [00:01<00:00, 62.47it/s][A
 63%|██████████████████████████▍               | 63/100 [00:01<00:00, 50.88it/s][A
 69%|████████████████████████████▉             | 69/100 [00:01<00:00, 42.98it/s

Result after 100 episodes:
Average timesteps per episodes: 328.16
Average penalty per episodes: 0.0
Average reward per episodes: -313.88


 47%|██████████████████▎                    | 2348/5000 [15:44<08:26,  5.24it/s]
  0%|                                                   | 0/100 [00:00<?, ?it/s][A
 10%|████▏                                     | 10/100 [00:00<00:01, 69.03it/s][A
 17%|███████▏                                  | 17/100 [00:00<00:01, 56.87it/s][A
 23%|█████████▋                                | 23/100 [00:00<00:01, 49.70it/s][A
 39%|████████████████▍                         | 39/100 [00:00<00:00, 71.16it/s][A
 47%|███████████████████▋                      | 47/100 [00:00<00:01, 48.80it/s][A
 61%|█████████████████████████▌                | 61/100 [00:01<00:00, 61.38it/s][A
 69%|████████████████████████████▉             | 69/100 [00:01<00:00, 59.85it/s][A
 76%|███████████████████████████████▉          | 76/100 [00:01<00:00, 40.46it/s][A
 87%|████████████████████████████████████▌     | 87/100 [00:01<00:00, 48.36it/s][A
 93%|███████████████████████████████████████   | 93/100 [00:01<00:00, 46.81it/s

Result after 100 episodes:
Average timesteps per episodes: 288.66
Average penalty per episodes: 0.0
Average reward per episodes: -273.54


 48%|██████████████████▋                    | 2397/5000 [15:51<01:56, 22.41it/s]
  0%|                                                   | 0/100 [00:00<?, ?it/s][A
  7%|███                                        | 7/100 [00:00<00:01, 48.81it/s][A
 12%|█████                                     | 12/100 [00:00<00:02, 31.82it/s][A
 16%|██████▋                                   | 16/100 [00:00<00:03, 25.68it/s][A
 28%|███████████▊                              | 28/100 [00:00<00:01, 43.97it/s][A
 33%|█████████████▊                            | 33/100 [00:00<00:01, 35.87it/s][A
 42%|█████████████████▋                        | 42/100 [00:01<00:01, 43.22it/s][A
 47%|███████████████████▋                      | 47/100 [00:01<00:01, 41.03it/s][A
 69%|████████████████████████████▉             | 69/100 [00:01<00:00, 71.68it/s][A
 77%|████████████████████████████████▎         | 77/100 [00:01<00:00, 66.93it/s][A
 84%|███████████████████████████████████▎      | 84/100 [00:01<00:00, 61.63it/s

Result after 100 episodes:
Average timesteps per episodes: 279.09
Average penalty per episodes: 0.0
Average reward per episodes: -263.76


 49%|███████████████████                    | 2447/5000 [16:01<07:33,  5.63it/s]
  0%|                                                   | 0/100 [00:00<?, ?it/s][A
  5%|██▏                                        | 5/100 [00:00<00:02, 35.40it/s][A
 12%|█████                                     | 12/100 [00:00<00:02, 43.40it/s][A
 19%|███████▉                                  | 19/100 [00:00<00:01, 46.00it/s][A
 31%|█████████████                             | 31/100 [00:00<00:01, 60.17it/s][A
 42%|█████████████████▋                        | 42/100 [00:00<00:00, 65.52it/s][A
 73%|█████████████████████████████▉           | 73/100 [00:00<00:00, 110.49it/s][A
100%|█████████████████████████████████████████| 100/100 [00:01<00:00, 89.02it/s][A
 49%|███████████████████▏                   | 2452/5000 [16:02<08:46,  4.84it/s]

Result after 100 episodes:
Average timesteps per episodes: 160.47
Average penalty per episodes: 0.0
Average reward per episodes: -142.62


 50%|███████████████████▍                   | 2498/5000 [16:11<04:13,  9.88it/s]
  0%|                                                   | 0/100 [00:00<?, ?it/s][A
  4%|█▋                                         | 4/100 [00:00<00:02, 39.54it/s][A
  8%|███▍                                       | 8/100 [00:00<00:03, 27.61it/s][A
 18%|███████▌                                  | 18/100 [00:00<00:01, 44.06it/s][A
 24%|██████████                                | 24/100 [00:00<00:01, 40.85it/s][A
 36%|███████████████                           | 36/100 [00:00<00:01, 53.56it/s][A
 48%|████████████████████▏                     | 48/100 [00:00<00:00, 62.30it/s][A
 55%|███████████████████████                   | 55/100 [00:01<00:00, 55.75it/s][A
 61%|█████████████████████████▌                | 61/100 [00:01<00:00, 44.47it/s][A
 67%|████████████████████████████▏             | 67/100 [00:01<00:00, 43.81it/s][A
 72%|██████████████████████████████▏           | 72/100 [00:01<00:00, 37.06it/s

Result after 100 episodes:
Average timesteps per episodes: 328.4
Average penalty per episodes: 0.0
Average reward per episodes: -314.12


 51%|███████████████████▊                   | 2547/5000 [16:21<04:46,  8.57it/s]
  0%|                                                   | 0/100 [00:00<?, ?it/s][A
  4%|█▋                                         | 4/100 [00:00<00:03, 29.90it/s][A
  7%|███                                        | 7/100 [00:00<00:03, 25.60it/s][A
 13%|█████▍                                    | 13/100 [00:00<00:02, 32.79it/s][A
 17%|███████▏                                  | 17/100 [00:00<00:02, 30.60it/s][A
 21%|████████▊                                 | 21/100 [00:00<00:02, 27.72it/s][A
 28%|███████████▊                              | 28/100 [00:00<00:02, 32.92it/s][A
 32%|█████████████▍                            | 32/100 [00:01<00:02, 26.53it/s][A
 39%|████████████████▍                         | 39/100 [00:01<00:01, 32.74it/s][A
 43%|██████████████████                        | 43/100 [00:01<00:01, 31.77it/s][A
 49%|████████████████████▌                     | 49/100 [00:01<00:01, 30.95it/s

Result after 100 episodes:
Average timesteps per episodes: 318.38
Average penalty per episodes: 0.0
Average reward per episodes: -303.89


 52%|████████████████████▎                  | 2597/5000 [16:29<02:21, 17.00it/s]
  0%|                                                   | 0/100 [00:00<?, ?it/s][A
  5%|██▏                                        | 5/100 [00:00<00:02, 37.13it/s][A
  9%|███▊                                       | 9/100 [00:00<00:03, 25.17it/s][A
 14%|█████▉                                    | 14/100 [00:00<00:02, 29.91it/s][A
 25%|██████████▌                               | 25/100 [00:00<00:01, 47.72it/s][A
 30%|████████████▌                             | 30/100 [00:00<00:02, 33.23it/s][A
 43%|██████████████████                        | 43/100 [00:01<00:01, 49.37it/s][A
 50%|█████████████████████                     | 50/100 [00:01<00:01, 49.82it/s][A
 60%|█████████████████████████▏                | 60/100 [00:01<00:00, 56.10it/s][A
 67%|████████████████████████████▏             | 67/100 [00:01<00:00, 43.16it/s][A
 74%|███████████████████████████████           | 74/100 [00:01<00:00, 45.05it/s

Result after 100 episodes:
Average timesteps per episodes: 288.51
Average penalty per episodes: 0.0
Average reward per episodes: -273.39


 53%|████████████████████▋                  | 2649/5000 [16:33<02:34, 15.24it/s]
  0%|                                                   | 0/100 [00:00<?, ?it/s][A
  6%|██▌                                        | 6/100 [00:00<00:02, 36.65it/s][A
 12%|█████                                     | 12/100 [00:00<00:02, 40.36it/s][A
 18%|███████▌                                  | 18/100 [00:00<00:01, 41.92it/s][A
 26%|██████████▉                               | 26/100 [00:00<00:01, 47.65it/s][A
 33%|█████████████▊                            | 33/100 [00:00<00:01, 48.64it/s][A
 38%|███████████████▉                          | 38/100 [00:00<00:01, 38.37it/s][A
 48%|████████████████████▏                     | 48/100 [00:01<00:01, 47.88it/s][A
 54%|██████████████████████▋                   | 54/100 [00:01<00:01, 41.16it/s][A
 59%|████████████████████████▊                 | 59/100 [00:01<00:01, 39.22it/s][A
 64%|██████████████████████████▉               | 64/100 [00:01<00:01, 32.23it/s

Result after 100 episodes:
Average timesteps per episodes: 298.4
Average penalty per episodes: 0.0
Average reward per episodes: -283.49


 54%|█████████████████████                  | 2698/5000 [16:42<04:17,  8.95it/s]
  0%|                                                   | 0/100 [00:00<?, ?it/s][A
  3%|█▎                                         | 3/100 [00:00<00:04, 21.49it/s][A
  8%|███▍                                       | 8/100 [00:00<00:03, 30.49it/s][A
 12%|█████                                     | 12/100 [00:00<00:02, 30.01it/s][A
 16%|██████▋                                   | 16/100 [00:00<00:02, 29.71it/s][A
 19%|███████▉                                  | 19/100 [00:00<00:03, 23.14it/s][A
 22%|█████████▏                                | 22/100 [00:00<00:03, 20.05it/s][A
 26%|██████████▉                               | 26/100 [00:01<00:03, 22.75it/s][A
 29%|████████████▏                             | 29/100 [00:01<00:03, 22.64it/s][A
 32%|█████████████▍                            | 32/100 [00:01<00:03, 22.51it/s][A
 38%|███████████████▉                          | 38/100 [00:01<00:02, 28.66it/s

Result after 100 episodes:
Average timesteps per episodes: 614.22
Average penalty per episodes: 0.0
Average reward per episodes: -606.03


 55%|█████████████████████▍                 | 2748/5000 [16:49<01:46, 21.22it/s]
  0%|                                                   | 0/100 [00:00<?, ?it/s][A
  3%|█▎                                         | 3/100 [00:00<00:04, 22.07it/s][A
  6%|██▌                                        | 6/100 [00:00<00:04, 21.98it/s][A
 10%|████▏                                     | 10/100 [00:00<00:03, 25.09it/s][A
 15%|██████▎                                   | 15/100 [00:00<00:02, 29.76it/s][A
 20%|████████▍                                 | 20/100 [00:00<00:02, 32.47it/s][A
 26%|██████████▉                               | 26/100 [00:00<00:02, 36.33it/s][A
 31%|█████████████                             | 31/100 [00:00<00:01, 36.63it/s][A
 39%|████████████████▍                         | 39/100 [00:01<00:01, 43.63it/s][A
 47%|███████████████████▋                      | 47/100 [00:01<00:01, 48.23it/s][A
 52%|█████████████████████▊                    | 52/100 [00:01<00:01, 44.92it/s

Result after 100 episodes:
Average timesteps per episodes: 396.95
Average penalty per episodes: 0.0
Average reward per episodes: -384.14


 56%|█████████████████████▊                 | 2799/5000 [16:55<01:38, 22.40it/s]
  0%|                                                   | 0/100 [00:00<?, ?it/s][A
  3%|█▎                                         | 3/100 [00:00<00:04, 22.62it/s][A
  6%|██▌                                        | 6/100 [00:00<00:04, 22.58it/s][A
 13%|█████▍                                    | 13/100 [00:00<00:02, 35.82it/s][A
 20%|████████▍                                 | 20/100 [00:00<00:01, 41.92it/s][A
 25%|██████████▌                               | 25/100 [00:00<00:02, 34.30it/s][A
 29%|████████████▏                             | 29/100 [00:00<00:02, 32.54it/s][A
 43%|██████████████████                        | 43/100 [00:01<00:01, 52.96it/s][A
 51%|█████████████████████▍                    | 51/100 [00:01<00:00, 54.43it/s][A
 58%|████████████████████████▎                 | 58/100 [00:01<00:00, 53.57it/s][A
 64%|██████████████████████████▉               | 64/100 [00:01<00:00, 44.36it/s

Result after 100 episodes:
Average timesteps per episodes: 367.9
Average penalty per episodes: 0.0
Average reward per episodes: -354.46


 57%|██████████████████████▏                | 2849/5000 [17:03<08:34,  4.18it/s]
  0%|                                                   | 0/100 [00:00<?, ?it/s][A
  2%|▊                                          | 2/100 [00:00<00:06, 15.11it/s][A
  8%|███▍                                       | 8/100 [00:00<00:02, 32.50it/s][A
 12%|█████                                     | 12/100 [00:00<00:03, 25.39it/s][A
 23%|█████████▋                                | 23/100 [00:00<00:01, 44.34it/s][A
 28%|███████████▊                              | 28/100 [00:00<00:01, 41.95it/s][A
 33%|█████████████▊                            | 33/100 [00:00<00:01, 40.38it/s][A
 41%|█████████████████▏                        | 41/100 [00:01<00:01, 45.90it/s][A
 47%|███████████████████▋                      | 47/100 [00:01<00:01, 45.21it/s][A
 58%|████████████████████████▎                 | 58/100 [00:01<00:00, 55.51it/s][A
 78%|████████████████████████████████▊         | 78/100 [00:01<00:00, 81.42it/s

Result after 100 episodes:
Average timesteps per episodes: 249.14
Average penalty per episodes: 0.0
Average reward per episodes: -233.18


 58%|██████████████████████▌                | 2897/5000 [17:10<02:15, 15.53it/s]
  0%|                                                   | 0/100 [00:00<?, ?it/s][A
  9%|███▊                                       | 9/100 [00:00<00:01, 65.40it/s][A
 16%|██████▋                                   | 16/100 [00:00<00:01, 48.93it/s][A
 22%|█████████▏                                | 22/100 [00:00<00:01, 46.60it/s][A
 31%|█████████████                             | 31/100 [00:00<00:01, 53.19it/s][A
 45%|██████████████████▉                       | 45/100 [00:00<00:00, 69.31it/s][A
 53%|██████████████████████▎                   | 53/100 [00:00<00:00, 50.30it/s][A
 59%|████████████████████████▊                 | 59/100 [00:01<00:00, 48.75it/s][A
 68%|████████████████████████████▌             | 68/100 [00:01<00:00, 53.31it/s][A
 75%|███████████████████████████████▌          | 75/100 [00:01<00:00, 52.20it/s][A
 81%|██████████████████████████████████        | 81/100 [00:01<00:00, 44.11it/s

Result after 100 episodes:
Average timesteps per episodes: 308.22
Average penalty per episodes: 0.0
Average reward per episodes: -293.52


 59%|██████████████████████▉                | 2948/5000 [17:25<06:20,  5.40it/s]
  0%|                                                   | 0/100 [00:00<?, ?it/s][A
  4%|█▋                                         | 4/100 [00:00<00:03, 28.98it/s][A
 20%|████████▍                                 | 20/100 [00:00<00:01, 75.99it/s][A
 28%|███████████▊                              | 28/100 [00:00<00:01, 54.07it/s][A
 36%|███████████████                           | 36/100 [00:00<00:01, 54.45it/s][A
 42%|█████████████████▋                        | 42/100 [00:00<00:01, 44.00it/s][A
 47%|███████████████████▋                      | 47/100 [00:01<00:01, 41.49it/s][A
 54%|██████████████████████▋                   | 54/100 [00:01<00:01, 43.68it/s][A
 59%|████████████████████████▊                 | 59/100 [00:01<00:01, 36.31it/s][A
 67%|████████████████████████████▏             | 67/100 [00:01<00:00, 41.76it/s][A
 80%|█████████████████████████████████▌        | 80/100 [00:01<00:00, 55.28it/s

Result after 100 episodes:
Average timesteps per episodes: 288.89
Average penalty per episodes: 0.0
Average reward per episodes: -273.77


 60%|███████████████████████▍               | 2998/5000 [17:30<02:01, 16.42it/s]
  0%|                                                   | 0/100 [00:00<?, ?it/s][A
  2%|▊                                          | 2/100 [00:00<00:07, 13.55it/s][A
  8%|███▍                                       | 8/100 [00:00<00:03, 30.11it/s][A
 21%|████████▊                                 | 21/100 [00:00<00:01, 57.75it/s][A
 28%|███████████▊                              | 28/100 [00:00<00:01, 55.11it/s][A
 47%|███████████████████▋                      | 47/100 [00:00<00:00, 81.72it/s][A
 56%|███████████████████████▌                  | 56/100 [00:00<00:00, 64.59it/s][A
 63%|██████████████████████████▍               | 63/100 [00:01<00:00, 53.47it/s][A
 79%|█████████████████████████████████▏        | 79/100 [00:01<00:00, 69.01it/s][A
 87%|████████████████████████████████████▌     | 87/100 [00:01<00:00, 64.17it/s][A
100%|█████████████████████████████████████████| 100/100 [00:01<00:00, 60.58it/s

Result after 100 episodes:
Average timesteps per episodes: 239.43
Average penalty per episodes: 0.0
Average reward per episodes: -223.26


 61%|███████████████████████▊               | 3048/5000 [17:38<01:56, 16.74it/s]
  0%|                                                   | 0/100 [00:00<?, ?it/s][A
  7%|███                                        | 7/100 [00:00<00:01, 50.79it/s][A
 13%|█████▍                                    | 13/100 [00:00<00:02, 36.57it/s][A
 19%|███████▉                                  | 19/100 [00:00<00:02, 37.84it/s][A
 26%|██████████▉                               | 26/100 [00:00<00:01, 41.31it/s][A
 43%|██████████████████                        | 43/100 [00:00<00:00, 64.57it/s][A
 50%|█████████████████████                     | 50/100 [00:00<00:00, 57.82it/s][A
 60%|█████████████████████████▏                | 60/100 [00:01<00:00, 61.96it/s][A
 74%|███████████████████████████████           | 74/100 [00:01<00:00, 72.86it/s][A
 84%|███████████████████████████████████▎      | 84/100 [00:01<00:00, 71.90it/s][A
 92%|██████████████████████████████████████▋   | 92/100 [00:01<00:00, 67.40it/s

Result after 100 episodes:
Average timesteps per episodes: 249.2
Average penalty per episodes: 0.0
Average reward per episodes: -233.24


 62%|████████████████████████▏              | 3099/5000 [17:48<08:52,  3.57it/s]
  0%|                                                   | 0/100 [00:00<?, ?it/s][A
 22%|█████████                                | 22/100 [00:00<00:00, 144.62it/s][A
 37%|███████████████▏                         | 37/100 [00:00<00:00, 118.68it/s][A
 49%|████████████████████                     | 49/100 [00:00<00:00, 102.25it/s][A
 60%|█████████████████████████▏                | 60/100 [00:00<00:00, 78.38it/s][A
 69%|████████████████████████████▉             | 69/100 [00:00<00:00, 64.52it/s][A
 78%|████████████████████████████████▊         | 78/100 [00:01<00:00, 64.36it/s][A
 85%|███████████████████████████████████▋      | 85/100 [00:01<00:00, 60.39it/s][A
100%|█████████████████████████████████████████| 100/100 [00:01<00:00, 73.96it/s][A
 62%|████████████████████████▏              | 3101/5000 [17:49<12:16,  2.58it/s]

Result after 100 episodes:
Average timesteps per episodes: 199.56
Average penalty per episodes: 0.0
Average reward per episodes: -182.55


 63%|████████████████████████▌              | 3148/5000 [17:53<05:26,  5.68it/s]
  0%|                                                   | 0/100 [00:00<?, ?it/s][A
  5%|██▏                                        | 5/100 [00:00<00:02, 35.83it/s][A
  9%|███▊                                       | 9/100 [00:00<00:02, 31.20it/s][A
 13%|█████▍                                    | 13/100 [00:00<00:02, 30.20it/s][A
 18%|███████▌                                  | 18/100 [00:00<00:02, 32.47it/s][A
 22%|█████████▏                                | 22/100 [00:00<00:02, 31.47it/s][A
 34%|██████████████▎                           | 34/100 [00:00<00:01, 50.00it/s][A
 39%|████████████████▍                         | 39/100 [00:00<00:01, 45.47it/s][A
 51%|█████████████████████▍                    | 51/100 [00:01<00:00, 57.92it/s][A
 58%|████████████████████████▎                 | 58/100 [00:01<00:00, 55.46it/s][A
 65%|███████████████████████████▎              | 65/100 [00:01<00:00, 54.15it/s

Result after 100 episodes:
Average timesteps per episodes: 278.81
Average penalty per episodes: 0.0
Average reward per episodes: -263.48


 64%|████████████████████████▉              | 3197/5000 [17:59<04:41,  6.41it/s]
  0%|                                                   | 0/100 [00:00<?, ?it/s][A
  9%|███▊                                       | 9/100 [00:00<00:01, 62.52it/s][A
 18%|███████▌                                  | 18/100 [00:00<00:01, 62.17it/s][A
 32%|█████████████▍                            | 32/100 [00:00<00:00, 76.26it/s][A
 40%|████████████████▊                         | 40/100 [00:00<00:00, 68.61it/s][A
 47%|███████████████████▋                      | 47/100 [00:00<00:01, 46.73it/s][A
 53%|██████████████████████▎                   | 53/100 [00:00<00:01, 45.74it/s][A
 64%|██████████████████████████▉               | 64/100 [00:01<00:00, 53.85it/s][A
 71%|█████████████████████████████▊            | 71/100 [00:01<00:00, 51.50it/s][A
 85%|███████████████████████████████████▋      | 85/100 [00:01<00:00, 64.85it/s][A
100%|█████████████████████████████████████████| 100/100 [00:01<00:00, 60.61it/s

Result after 100 episodes:
Average timesteps per episodes: 239.35
Average penalty per episodes: 0.0
Average reward per episodes: -223.18


 65%|█████████████████████████▎             | 3248/5000 [18:03<01:12, 24.18it/s]
  0%|                                                   | 0/100 [00:00<?, ?it/s][A
  2%|▊                                          | 2/100 [00:00<00:06, 14.88it/s][A
  5%|██▏                                        | 5/100 [00:00<00:05, 18.35it/s][A
 10%|████▏                                     | 10/100 [00:00<00:03, 26.37it/s][A
 14%|█████▉                                    | 14/100 [00:00<00:03, 27.38it/s][A
 17%|███████▏                                  | 17/100 [00:00<00:03, 25.50it/s][A
 20%|████████▍                                 | 20/100 [00:00<00:03, 24.43it/s][A
 23%|█████████▋                                | 23/100 [00:00<00:03, 23.76it/s][A
 26%|██████████▉                               | 26/100 [00:01<00:03, 23.38it/s][A
 30%|████████████▌                             | 30/100 [00:01<00:02, 25.23it/s][A
 36%|███████████████                           | 36/100 [00:01<00:02, 30.89it/s

Result after 100 episodes:
Average timesteps per episodes: 525.45
Average penalty per episodes: 0.0
Average reward per episodes: -515.37


 66%|█████████████████████████▋             | 3298/5000 [18:12<02:38, 10.76it/s]
  0%|                                                   | 0/100 [00:00<?, ?it/s][A
  3%|█▎                                         | 3/100 [00:00<00:04, 22.68it/s][A
  8%|███▍                                       | 8/100 [00:00<00:03, 30.15it/s][A
 11%|████▌                                     | 11/100 [00:00<00:04, 21.68it/s][A
 15%|██████▎                                   | 15/100 [00:00<00:03, 24.23it/s][A
 18%|███████▌                                  | 18/100 [00:00<00:03, 22.90it/s][A
 24%|██████████                                | 24/100 [00:00<00:02, 29.54it/s][A
 28%|███████████▊                              | 28/100 [00:01<00:02, 25.61it/s][A
 32%|█████████████▍                            | 32/100 [00:01<00:02, 26.60it/s][A
 35%|██████████████▋                           | 35/100 [00:01<00:02, 25.37it/s][A
 38%|███████████████▉                          | 38/100 [00:01<00:02, 21.56it/s

Result after 100 episodes:
Average timesteps per episodes: 644.0
Average penalty per episodes: 0.0
Average reward per episodes: -636.44


 67%|██████████████████████████             | 3346/5000 [18:25<05:45,  4.78it/s]
  0%|                                                   | 0/100 [00:00<?, ?it/s][A
  5%|██▏                                        | 5/100 [00:00<00:02, 37.32it/s][A
 11%|████▌                                     | 11/100 [00:00<00:02, 41.16it/s][A
 21%|████████▊                                 | 21/100 [00:00<00:01, 55.43it/s][A
 29%|████████████▏                             | 29/100 [00:00<00:01, 56.53it/s][A
 52%|█████████████████████▊                    | 52/100 [00:00<00:00, 93.85it/s][A
 62%|██████████████████████████                | 62/100 [00:00<00:00, 80.81it/s][A
 71%|█████████████████████████████▊            | 71/100 [00:01<00:00, 66.12it/s][A
 78%|████████████████████████████████▊         | 78/100 [00:01<00:00, 62.16it/s][A
 91%|██████████████████████████████████████▏   | 91/100 [00:01<00:00, 71.03it/s][A
100%|█████████████████████████████████████████| 100/100 [00:01<00:00, 67.38it/s

Result after 100 episodes:
Average timesteps per episodes: 219.36
Average penalty per episodes: 0.0
Average reward per episodes: -202.77


 68%|██████████████████████████▍            | 3397/5000 [18:29<01:36, 16.60it/s]
  0%|                                                   | 0/100 [00:00<?, ?it/s][A
  5%|██▏                                        | 5/100 [00:00<00:02, 37.23it/s][A
 15%|██████▎                                   | 15/100 [00:00<00:01, 57.83it/s][A
 21%|████████▊                                 | 21/100 [00:00<00:01, 51.67it/s][A
 30%|████████████▌                             | 30/100 [00:00<00:01, 57.16it/s][A
 42%|█████████████████▋                        | 42/100 [00:00<00:00, 67.69it/s][A
 49%|████████████████████▌                     | 49/100 [00:01<00:01, 42.14it/s][A
 55%|███████████████████████                   | 55/100 [00:01<00:01, 42.67it/s][A
 62%|██████████████████████████                | 62/100 [00:01<00:00, 44.80it/s][A
 68%|████████████████████████████▌             | 68/100 [00:01<00:00, 44.69it/s][A
 73%|██████████████████████████████▋           | 73/100 [00:01<00:00, 42.55it/s

Result after 100 episodes:
Average timesteps per episodes: 308.43
Average penalty per episodes: 0.0
Average reward per episodes: -293.73


 69%|██████████████████████████▉            | 3447/5000 [18:36<04:07,  6.27it/s]
  0%|                                                   | 0/100 [00:00<?, ?it/s][A
  6%|██▌                                        | 6/100 [00:00<00:02, 44.01it/s][A
 11%|████▌                                     | 11/100 [00:00<00:02, 39.93it/s][A
 15%|██████▎                                   | 15/100 [00:00<00:02, 35.08it/s][A
 19%|███████▉                                  | 19/100 [00:00<00:02, 27.83it/s][A
 29%|████████████▏                             | 29/100 [00:00<00:01, 42.42it/s][A
 39%|████████████████▍                         | 39/100 [00:00<00:01, 51.76it/s][A
 50%|█████████████████████                     | 50/100 [00:01<00:00, 59.38it/s][A
 63%|██████████████████████████▍               | 63/100 [00:01<00:00, 68.53it/s][A
 70%|█████████████████████████████▍            | 70/100 [00:01<00:00, 63.33it/s][A
100%|█████████████████████████████████████████| 100/100 [00:01<00:00, 65.11it/s

Result after 100 episodes:
Average timesteps per episodes: 219.59
Average penalty per episodes: 0.0
Average reward per episodes: -203.0


 70%|███████████████████████████▎           | 3496/5000 [18:45<01:44, 14.36it/s]
  0%|                                                   | 0/100 [00:00<?, ?it/s][A
  7%|███                                        | 7/100 [00:00<00:01, 51.80it/s][A
 16%|██████▋                                   | 16/100 [00:00<00:01, 58.43it/s][A
 22%|█████████▏                                | 22/100 [00:00<00:01, 42.52it/s][A
 29%|████████████▏                             | 29/100 [00:00<00:01, 43.97it/s][A
 34%|██████████████▎                           | 34/100 [00:00<00:01, 39.13it/s][A
 39%|████████████████▍                         | 39/100 [00:00<00:01, 38.45it/s][A
 47%|███████████████████▋                      | 47/100 [00:01<00:01, 44.31it/s][A
 58%|████████████████████████▎                 | 58/100 [00:01<00:00, 54.84it/s][A
 64%|██████████████████████████▉               | 64/100 [00:01<00:00, 51.63it/s][A
 72%|██████████████████████████████▏           | 72/100 [00:01<00:00, 53.65it/s

Result after 100 episodes:
Average timesteps per episodes: 278.8
Average penalty per episodes: 0.0
Average reward per episodes: -263.47


 71%|███████████████████████████▋           | 3548/5000 [18:50<00:58, 24.74it/s]
  0%|                                                   | 0/100 [00:00<?, ?it/s][A
  3%|█▎                                         | 3/100 [00:00<00:04, 21.91it/s][A
 10%|████▏                                     | 10/100 [00:00<00:02, 38.57it/s][A
 14%|█████▉                                    | 14/100 [00:00<00:02, 33.21it/s][A
 20%|████████▍                                 | 20/100 [00:00<00:02, 37.36it/s][A
 27%|███████████▎                              | 27/100 [00:00<00:01, 42.14it/s][A
 32%|█████████████▍                            | 32/100 [00:00<00:01, 40.29it/s][A
 39%|████████████████▍                         | 39/100 [00:00<00:01, 43.67it/s][A
 46%|███████████████████▎                      | 46/100 [00:01<00:01, 45.97it/s][A
 51%|█████████████████████▍                    | 51/100 [00:01<00:01, 43.03it/s][A
 56%|███████████████████████▌                  | 56/100 [00:01<00:01, 36.02it/s

Result after 100 episodes:
Average timesteps per episodes: 348.17
Average penalty per episodes: 0.0
Average reward per episodes: -334.31


 72%|████████████████████████████           | 3598/5000 [18:58<03:16,  7.13it/s]
  0%|                                                   | 0/100 [00:00<?, ?it/s][A
 18%|███████▍                                 | 18/100 [00:00<00:00, 124.44it/s][A
 31%|█████████████                             | 31/100 [00:00<00:01, 68.03it/s][A
 39%|████████████████▍                         | 39/100 [00:00<00:00, 63.86it/s][A
 48%|████████████████████▏                     | 48/100 [00:00<00:00, 63.61it/s][A
 71%|█████████████████████████████▊            | 71/100 [00:00<00:00, 92.96it/s][A
 84%|███████████████████████████████████▎      | 84/100 [00:01<00:00, 91.75it/s][A
100%|█████████████████████████████████████████| 100/100 [00:01<00:00, 86.80it/s][A
 72%|████████████████████████████           | 3601/5000 [18:59<05:10,  4.50it/s]

Result after 100 episodes:
Average timesteps per episodes: 170.29
Average penalty per episodes: 0.0
Average reward per episodes: -152.65


 73%|████████████████████████████▍          | 3646/5000 [19:07<05:10,  4.37it/s]
  0%|                                                   | 0/100 [00:00<?, ?it/s][A
  6%|██▌                                        | 6/100 [00:00<00:01, 49.68it/s][A
 11%|████▌                                     | 11/100 [00:00<00:02, 40.46it/s][A
 16%|██████▋                                   | 16/100 [00:00<00:02, 38.40it/s][A
 20%|████████▍                                 | 20/100 [00:00<00:02, 34.23it/s][A
 24%|██████████                                | 24/100 [00:00<00:02, 31.74it/s][A
 32%|█████████████▍                            | 32/100 [00:00<00:01, 39.66it/s][A
 46%|███████████████████▎                      | 46/100 [00:00<00:00, 58.20it/s][A
 52%|█████████████████████▊                    | 52/100 [00:01<00:00, 53.29it/s][A
 61%|█████████████████████████▌                | 61/100 [00:01<00:00, 55.82it/s][A
 67%|████████████████████████████▏             | 67/100 [00:01<00:00, 45.45it/s

Result after 100 episodes:
Average timesteps per episodes: 337.93
Average penalty per episodes: 0.0
Average reward per episodes: -323.86


 74%|████████████████████████████▊          | 3697/5000 [19:12<01:05, 19.84it/s]
  0%|                                                   | 0/100 [00:00<?, ?it/s][A
  5%|██▏                                        | 5/100 [00:00<00:02, 34.94it/s][A
 12%|█████                                     | 12/100 [00:00<00:02, 43.27it/s][A
 25%|██████████▌                               | 25/100 [00:00<00:01, 58.14it/s][A
 42%|█████████████████▋                        | 42/100 [00:00<00:00, 77.13it/s][A
 50%|█████████████████████                     | 50/100 [00:00<00:00, 53.38it/s][A
 56%|███████████████████████▌                  | 56/100 [00:01<00:00, 44.93it/s][A
 61%|█████████████████████████▌                | 61/100 [00:01<00:00, 42.71it/s][A
 66%|███████████████████████████▋              | 66/100 [00:01<00:00, 38.96it/s][A
 72%|██████████████████████████████▏           | 72/100 [00:01<00:00, 38.67it/s][A
 76%|███████████████████████████████▉          | 76/100 [00:01<00:00, 35.58it/s

Result after 100 episodes:
Average timesteps per episodes: 279.32
Average penalty per episodes: 0.0
Average reward per episodes: -263.99


 75%|█████████████████████████████▏         | 3746/5000 [19:17<00:43, 28.56it/s]
  0%|                                                   | 0/100 [00:00<?, ?it/s][A
  5%|██▏                                        | 5/100 [00:00<00:02, 36.39it/s][A
 15%|██████▎                                   | 15/100 [00:00<00:01, 68.59it/s][A
 23%|█████████▋                                | 23/100 [00:00<00:02, 31.11it/s][A
 43%|██████████████████                        | 43/100 [00:00<00:00, 58.70it/s][A
 52%|█████████████████████▊                    | 52/100 [00:00<00:00, 60.06it/s][A
 62%|██████████████████████████                | 62/100 [00:01<00:00, 63.02it/s][A
 70%|█████████████████████████████▍            | 70/100 [00:01<00:00, 61.35it/s][A
 77%|████████████████████████████████▎         | 77/100 [00:01<00:00, 57.71it/s][A
 88%|████████████████████████████████████▉     | 88/100 [00:01<00:00, 62.99it/s][A
100%|█████████████████████████████████████████| 100/100 [00:01<00:00, 55.06it/s

Result after 100 episodes:
Average timesteps per episodes: 258.93
Average penalty per episodes: 0.0
Average reward per episodes: -243.18


 76%|█████████████████████████████▌         | 3797/5000 [19:22<01:08, 17.46it/s]
  0%|                                                   | 0/100 [00:00<?, ?it/s][A
 14%|█████▉                                    | 14/100 [00:00<00:00, 90.82it/s][A
 28%|███████████▊                              | 28/100 [00:00<00:00, 92.10it/s][A
 38%|███████████████▉                          | 38/100 [00:00<00:01, 56.04it/s][A
 45%|██████████████████▉                       | 45/100 [00:00<00:01, 41.08it/s][A
 60%|█████████████████████████▏                | 60/100 [00:01<00:00, 55.49it/s][A
 67%|████████████████████████████▏             | 67/100 [00:01<00:00, 52.81it/s][A
 76%|███████████████████████████████▉          | 76/100 [00:01<00:00, 54.32it/s][A
 82%|██████████████████████████████████▍       | 82/100 [00:01<00:00, 44.83it/s][A
 88%|████████████████████████████████████▉     | 88/100 [00:01<00:00, 44.46it/s][A
 93%|███████████████████████████████████████   | 93/100 [00:01<00:00, 42.34it/s

Result after 100 episodes:
Average timesteps per episodes: 278.79
Average penalty per episodes: 0.0
Average reward per episodes: -263.46


 77%|█████████████████████████████▉         | 3846/5000 [19:27<00:58, 19.71it/s]
  0%|                                                   | 0/100 [00:00<?, ?it/s][A
  8%|███▍                                       | 8/100 [00:00<00:01, 55.71it/s][A
 20%|████████▍                                 | 20/100 [00:00<00:01, 70.94it/s][A
 28%|███████████▊                              | 28/100 [00:00<00:01, 62.87it/s][A
 43%|██████████████████                        | 43/100 [00:00<00:00, 77.40it/s][A
 51%|█████████████████████▍                    | 51/100 [00:00<00:00, 70.01it/s][A
 83%|██████████████████████████████████       | 83/100 [00:00<00:00, 117.24it/s][A
100%|█████████████████████████████████████████| 100/100 [00:01<00:00, 96.04it/s][A
 77%|██████████████████████████████         | 3854/5000 [19:28<01:53, 10.11it/s]

Result after 100 episodes:
Average timesteps per episodes: 150.53
Average penalty per episodes: 0.0
Average reward per episodes: -132.47


 78%|██████████████████████████████▍        | 3899/5000 [19:30<00:41, 26.23it/s]
  0%|                                                   | 0/100 [00:00<?, ?it/s][A
  3%|█▎                                         | 3/100 [00:00<00:04, 22.24it/s][A
 13%|█████▍                                    | 13/100 [00:00<00:01, 51.08it/s][A
 31%|█████████████                             | 31/100 [00:00<00:00, 84.31it/s][A
 44%|██████████████████▍                       | 44/100 [00:00<00:00, 85.94it/s][A
 68%|███████████████████████████▉             | 68/100 [00:00<00:00, 111.29it/s][A
 79%|█████████████████████████████████▏        | 79/100 [00:01<00:00, 76.16it/s][A
 88%|████████████████████████████████████▉     | 88/100 [00:01<00:00, 59.92it/s][A
100%|█████████████████████████████████████████| 100/100 [00:01<00:00, 67.49it/s][A
 78%|██████████████████████████████▍        | 3902/5000 [19:31<03:20,  5.49it/s]

Result after 100 episodes:
Average timesteps per episodes: 209.98
Average penalty per episodes: 0.0
Average reward per episodes: -193.18


 79%|██████████████████████████████▊        | 3949/5000 [19:37<01:58,  8.84it/s]
  0%|                                                   | 0/100 [00:00<?, ?it/s][A
 12%|█████                                     | 12/100 [00:00<00:01, 71.09it/s][A
 22%|█████████▏                                | 22/100 [00:00<00:01, 69.69it/s][A
 29%|████████████▏                             | 29/100 [00:00<00:01, 61.18it/s][A
 43%|██████████████████                        | 43/100 [00:00<00:00, 74.42it/s][A
 59%|████████████████████████▊                 | 59/100 [00:00<00:00, 86.85it/s][A
 76%|███████████████████████████████▉          | 76/100 [00:00<00:00, 96.39it/s][A
 86%|████████████████████████████████████      | 86/100 [00:01<00:00, 60.85it/s][A
100%|█████████████████████████████████████████| 100/100 [00:01<00:00, 72.09it/s][A
 79%|██████████████████████████████▊        | 3955/5000 [19:41<04:42,  3.69it/s]

Result after 100 episodes:
Average timesteps per episodes: 199.81
Average penalty per episodes: 0.0
Average reward per episodes: -182.8


 80%|███████████████████████████████▏       | 3996/5000 [19:44<02:10,  7.71it/s]
  0%|                                                   | 0/100 [00:00<?, ?it/s][A
 13%|█████▍                                    | 13/100 [00:00<00:00, 89.62it/s][A
 24%|██████████                                | 24/100 [00:00<00:00, 82.26it/s][A
 33%|█████████████▊                            | 33/100 [00:00<00:00, 73.39it/s][A
 41%|█████████████████▏                        | 41/100 [00:00<00:00, 66.66it/s][A
 48%|████████████████████▏                     | 48/100 [00:00<00:00, 60.77it/s][A
 55%|███████████████████████                   | 55/100 [00:00<00:00, 57.08it/s][A
 69%|████████████████████████████▉             | 69/100 [00:00<00:00, 70.20it/s][A
 76%|███████████████████████████████▉          | 76/100 [00:01<00:00, 63.85it/s][A
 85%|███████████████████████████████████▋      | 85/100 [00:01<00:00, 64.03it/s][A
100%|█████████████████████████████████████████| 100/100 [00:01<00:00, 56.97it/s

Result after 100 episodes:
Average timesteps per episodes: 259.18
Average penalty per episodes: 0.0
Average reward per episodes: -243.43


 81%|███████████████████████████████▌       | 4047/5000 [19:50<00:51, 18.55it/s]
  0%|                                                   | 0/100 [00:00<?, ?it/s][A
 18%|███████▍                                 | 18/100 [00:00<00:00, 120.87it/s][A
 36%|██████████████▊                          | 36/100 [00:00<00:00, 120.56it/s][A
 49%|████████████████████▌                     | 49/100 [00:00<00:00, 75.74it/s][A
 58%|████████████████████████▎                 | 58/100 [00:00<00:00, 71.96it/s][A
 66%|███████████████████████████▋              | 66/100 [00:00<00:00, 58.87it/s][A
 75%|███████████████████████████████▌          | 75/100 [00:01<00:00, 60.14it/s][A
 86%|████████████████████████████████████      | 86/100 [00:01<00:00, 65.00it/s][A
100%|█████████████████████████████████████████| 100/100 [00:01<00:00, 70.33it/s][A
 81%|███████████████████████████████▌       | 4053/5000 [19:51<02:06,  7.49it/s]

Result after 100 episodes:
Average timesteps per episodes: 210.14
Average penalty per episodes: 0.0
Average reward per episodes: -193.34


 82%|███████████████████████████████▉       | 4099/5000 [19:54<00:54, 16.43it/s]
  0%|                                                   | 0/100 [00:00<?, ?it/s][A
 13%|█████▍                                    | 13/100 [00:00<00:00, 92.24it/s][A
 24%|██████████                                | 24/100 [00:00<00:00, 84.45it/s][A
 38%|███████████████▉                          | 38/100 [00:00<00:00, 90.74it/s][A
 48%|████████████████████▏                     | 48/100 [00:00<00:00, 83.49it/s][A
 61%|█████████████████████████▌                | 61/100 [00:00<00:00, 86.81it/s][A
 74%|███████████████████████████████           | 74/100 [00:00<00:00, 88.86it/s][A
 83%|██████████████████████████████████▊       | 83/100 [00:01<00:00, 46.36it/s][A
100%|█████████████████████████████████████████| 100/100 [00:01<00:00, 66.09it/s][A
 82%|███████████████████████████████▉       | 4102/5000 [19:56<03:03,  4.90it/s]

Result after 100 episodes:
Average timesteps per episodes: 229.59
Average penalty per episodes: 0.0
Average reward per episodes: -213.21


 83%|████████████████████████████████▎      | 4148/5000 [19:58<00:34, 24.54it/s]
  0%|                                                   | 0/100 [00:00<?, ?it/s][A
  5%|██▏                                        | 5/100 [00:00<00:02, 36.92it/s][A
 14%|█████▉                                    | 14/100 [00:00<00:01, 53.70it/s][A
 29%|████████████▏                             | 29/100 [00:00<00:00, 77.58it/s][A
 38%|███████████████▉                          | 38/100 [00:00<00:00, 72.31it/s][A
 46%|███████████████████▎                      | 46/100 [00:00<00:00, 57.44it/s][A
 53%|██████████████████████▎                   | 53/100 [00:00<00:00, 48.60it/s][A
 65%|███████████████████████████▎              | 65/100 [00:01<00:00, 58.37it/s][A
 73%|██████████████████████████████▋           | 73/100 [00:01<00:00, 56.62it/s][A
 80%|█████████████████████████████████▌        | 80/100 [00:01<00:00, 53.12it/s][A
 86%|████████████████████████████████████      | 86/100 [00:01<00:00, 44.76it/s

Result after 100 episodes:
Average timesteps per episodes: 308.57
Average penalty per episodes: 0.0
Average reward per episodes: -293.87


 84%|████████████████████████████████▋      | 4198/5000 [20:04<00:50, 15.98it/s]
  0%|                                                   | 0/100 [00:00<?, ?it/s][A
  8%|███▍                                       | 8/100 [00:00<00:01, 58.28it/s][A
 14%|█████▉                                    | 14/100 [00:00<00:02, 39.25it/s][A
 19%|███████▉                                  | 19/100 [00:00<00:03, 24.35it/s][A
 23%|█████████▋                                | 23/100 [00:00<00:03, 25.45it/s][A
 30%|████████████▌                             | 30/100 [00:00<00:02, 32.04it/s][A
 34%|██████████████▎                           | 34/100 [00:01<00:02, 31.39it/s][A
 38%|███████████████▉                          | 38/100 [00:01<00:02, 30.87it/s][A
 51%|█████████████████████▍                    | 51/100 [00:01<00:01, 48.97it/s][A
 57%|███████████████████████▉                  | 57/100 [00:01<00:01, 40.36it/s][A
 73%|██████████████████████████████▋           | 73/100 [00:01<00:00, 59.06it/s

Result after 100 episodes:
Average timesteps per episodes: 318.43
Average penalty per episodes: 0.0
Average reward per episodes: -303.94


 85%|█████████████████████████████████▏     | 4248/5000 [20:10<00:52, 14.41it/s]
  0%|                                                   | 0/100 [00:00<?, ?it/s][A
  3%|█▎                                         | 3/100 [00:00<00:04, 21.87it/s][A
  8%|███▍                                       | 8/100 [00:00<00:03, 30.37it/s][A
 22%|█████████▏                                | 22/100 [00:00<00:01, 61.11it/s][A
 32%|█████████████▍                            | 32/100 [00:00<00:01, 64.34it/s][A
 39%|████████████████▍                         | 39/100 [00:00<00:01, 58.15it/s][A
 56%|███████████████████████▌                  | 56/100 [00:00<00:00, 76.90it/s][A
 64%|██████████████████████████▉               | 64/100 [00:01<00:00, 61.05it/s][A
 75%|███████████████████████████████▌          | 75/100 [00:01<00:00, 65.24it/s][A
 83%|██████████████████████████████████▊       | 83/100 [00:01<00:00, 62.75it/s][A
 91%|██████████████████████████████████████▏   | 91/100 [00:01<00:00, 60.91it/s

Result after 100 episodes:
Average timesteps per episodes: 258.96
Average penalty per episodes: 0.0
Average reward per episodes: -243.21


 86%|█████████████████████████████████▌     | 4299/5000 [20:19<02:50,  4.11it/s]
  0%|                                                   | 0/100 [00:00<?, ?it/s][A
 13%|█████▍                                    | 13/100 [00:00<00:01, 78.71it/s][A
 22%|█████████▏                                | 22/100 [00:00<00:01, 66.29it/s][A
 29%|████████████▏                             | 29/100 [00:00<00:01, 48.94it/s][A
 35%|██████████████▋                           | 35/100 [00:00<00:01, 46.06it/s][A
 60%|█████████████████████████▏                | 60/100 [00:00<00:00, 82.22it/s][A
 70%|█████████████████████████████▍            | 70/100 [00:01<00:00, 75.41it/s][A
 78%|████████████████████████████████▊         | 78/100 [00:01<00:00, 64.96it/s][A
 88%|████████████████████████████████████▉     | 88/100 [00:01<00:00, 65.54it/s][A
100%|█████████████████████████████████████████| 100/100 [00:01<00:00, 61.37it/s][A
 86%|█████████████████████████████████▌     | 4305/5000 [20:21<02:49,  4.09it/s

Result after 100 episodes:
Average timesteps per episodes: 219.6
Average penalty per episodes: 0.0
Average reward per episodes: -203.01


 87%|█████████████████████████████████▉     | 4346/5000 [20:23<00:25, 25.63it/s]
  0%|                                                   | 0/100 [00:00<?, ?it/s][A
  8%|███▍                                       | 8/100 [00:00<00:01, 57.20it/s][A
 14%|█████▉                                    | 14/100 [00:00<00:02, 36.57it/s][A
 19%|███████▉                                  | 19/100 [00:00<00:02, 35.67it/s][A
 26%|██████████▉                               | 26/100 [00:00<00:01, 40.23it/s][A
 39%|████████████████▍                         | 39/100 [00:00<00:01, 56.02it/s][A
 45%|██████████████████▉                       | 45/100 [00:00<00:01, 51.96it/s][A
 51%|█████████████████████▍                    | 51/100 [00:01<00:01, 48.23it/s][A
 67%|████████████████████████████▏             | 67/100 [00:01<00:00, 66.14it/s][A
 74%|███████████████████████████████           | 74/100 [00:01<00:00, 60.72it/s][A
100%|█████████████████████████████████████████| 100/100 [00:01<00:00, 61.37it/s

Result after 100 episodes:
Average timesteps per episodes: 229.81
Average penalty per episodes: 0.0
Average reward per episodes: -213.43


 88%|██████████████████████████████████▎    | 4399/5000 [20:28<00:34, 17.37it/s]
  0%|                                                   | 0/100 [00:00<?, ?it/s][A
  6%|██▌                                        | 6/100 [00:00<00:02, 41.24it/s][A
 25%|██████████▌                               | 25/100 [00:00<00:00, 87.61it/s][A
 38%|███████████████▉                          | 38/100 [00:00<00:00, 84.60it/s][A
 47%|███████████████████▋                      | 47/100 [00:00<00:00, 72.07it/s][A
 60%|█████████████████████████▏                | 60/100 [00:00<00:00, 76.85it/s][A
 68%|████████████████████████████▌             | 68/100 [00:01<00:00, 60.73it/s][A
 79%|█████████████████████████████████▏        | 79/100 [00:01<00:00, 62.17it/s][A
100%|█████████████████████████████████████████| 100/100 [00:01<00:00, 74.47it/s][A
 88%|██████████████████████████████████▎    | 4406/5000 [20:32<02:42,  3.65it/s]

Result after 100 episodes:
Average timesteps per episodes: 170.25
Average penalty per episodes: 0.0
Average reward per episodes: -152.61


 89%|██████████████████████████████████▋    | 4447/5000 [20:37<00:57,  9.61it/s]
  0%|                                                   | 0/100 [00:00<?, ?it/s][A
  9%|███▊                                       | 9/100 [00:00<00:01, 51.82it/s][A
 17%|███████▏                                  | 17/100 [00:00<00:01, 50.13it/s][A
 30%|████████████▌                             | 30/100 [00:00<00:01, 65.75it/s][A
 37%|███████████████▌                          | 37/100 [00:00<00:01, 48.82it/s][A
 43%|██████████████████                        | 43/100 [00:00<00:01, 39.18it/s][A
 48%|████████████████████▏                     | 48/100 [00:01<00:01, 37.51it/s][A
 54%|██████████████████████▋                   | 54/100 [00:01<00:01, 38.43it/s][A
 73%|██████████████████████████████▋           | 73/100 [00:01<00:00, 62.54it/s][A
100%|█████████████████████████████████████████| 100/100 [00:01<00:00, 63.17it/s][A
 89%|██████████████████████████████████▋    | 4453/5000 [20:39<01:38,  5.56it/s

Result after 100 episodes:
Average timesteps per episodes: 199.81
Average penalty per episodes: 0.0
Average reward per episodes: -182.8


 90%|███████████████████████████████████    | 4499/5000 [20:45<00:33, 14.80it/s]
  0%|                                                   | 0/100 [00:00<?, ?it/s][A
  7%|███                                        | 7/100 [00:00<00:01, 50.22it/s][A
 18%|███████▌                                  | 18/100 [00:00<00:01, 64.86it/s][A
 26%|██████████▉                               | 26/100 [00:00<00:01, 59.91it/s][A
 32%|█████████████▍                            | 32/100 [00:00<00:01, 52.67it/s][A
 41%|█████████████████▏                        | 41/100 [00:00<00:01, 55.42it/s][A
 67%|████████████████████████████▏             | 67/100 [00:00<00:00, 94.50it/s][A
 82%|██████████████████████████████████▍       | 82/100 [00:01<00:00, 96.52it/s][A
100%|█████████████████████████████████████████| 100/100 [00:01<00:00, 71.71it/s][A
 90%|███████████████████████████████████    | 4502/5000 [20:47<01:43,  4.80it/s]

Result after 100 episodes:
Average timesteps per episodes: 200.15
Average penalty per episodes: 0.0
Average reward per episodes: -183.14


 91%|███████████████████████████████████▍   | 4549/5000 [20:51<00:44, 10.15it/s]
  0%|                                                   | 0/100 [00:00<?, ?it/s][A
  2%|▊                                          | 2/100 [00:00<00:06, 14.09it/s][A
  4%|█▋                                         | 4/100 [00:00<00:06, 14.09it/s][A
 12%|█████                                     | 12/100 [00:00<00:02, 33.42it/s][A
 16%|██████▋                                   | 16/100 [00:00<00:02, 31.56it/s][A
 28%|███████████▊                              | 28/100 [00:00<00:01, 50.53it/s][A
 34%|██████████████▎                           | 34/100 [00:00<00:01, 41.31it/s][A
 45%|██████████████████▉                       | 45/100 [00:01<00:01, 51.22it/s][A
 59%|████████████████████████▊                 | 59/100 [00:01<00:00, 63.68it/s][A
 66%|███████████████████████████▋              | 66/100 [00:01<00:00, 46.94it/s][A
 78%|████████████████████████████████▊         | 78/100 [00:01<00:00, 55.98it/s

Result after 100 episodes:
Average timesteps per episodes: 318.47
Average penalty per episodes: 0.0
Average reward per episodes: -303.98


 92%|███████████████████████████████████▊   | 4597/5000 [20:56<00:42,  9.51it/s]
  0%|                                                   | 0/100 [00:00<?, ?it/s][A
  6%|██▌                                        | 6/100 [00:00<00:02, 43.46it/s][A
 14%|█████▉                                    | 14/100 [00:00<00:01, 51.50it/s][A
 20%|████████▍                                 | 20/100 [00:00<00:01, 48.11it/s][A
 27%|███████████▎                              | 27/100 [00:00<00:01, 49.33it/s][A
 38%|███████████████▉                          | 38/100 [00:00<00:01, 59.94it/s][A
 44%|██████████████████▍                       | 44/100 [00:00<00:01, 46.51it/s][A
 51%|█████████████████████▍                    | 51/100 [00:01<00:01, 47.58it/s][A
 68%|████████████████████████████▌             | 68/100 [00:01<00:00, 68.73it/s][A
 78%|████████████████████████████████▊         | 78/100 [00:01<00:00, 69.66it/s][A
 87%|████████████████████████████████████▌     | 87/100 [00:01<00:00, 68.29it/s

Result after 100 episodes:
Average timesteps per episodes: 239.3
Average penalty per episodes: 0.0
Average reward per episodes: -223.13


 93%|████████████████████████████████████▎  | 4649/5000 [21:01<00:18, 18.56it/s]
  0%|                                                   | 0/100 [00:00<?, ?it/s][A
 16%|██████▌                                  | 16/100 [00:00<00:00, 110.57it/s][A
 28%|███████████▊                              | 28/100 [00:00<00:01, 52.56it/s][A
 35%|██████████████▋                           | 35/100 [00:00<00:01, 52.25it/s][A
 49%|████████████████████▌                     | 49/100 [00:00<00:00, 66.65it/s][A
 59%|████████████████████████▊                 | 59/100 [00:00<00:00, 67.50it/s][A
 67%|████████████████████████████▏             | 67/100 [00:01<00:00, 41.28it/s][A
 75%|███████████████████████████████▌          | 75/100 [00:01<00:00, 44.81it/s][A
 90%|█████████████████████████████████████▊    | 90/100 [00:01<00:00, 59.01it/s][A
100%|█████████████████████████████████████████| 100/100 [00:01<00:00, 57.59it/s][A
 93%|████████████████████████████████████▎  | 4655/5000 [21:03<00:56,  6.06it/s

Result after 100 episodes:
Average timesteps per episodes: 259.5
Average penalty per episodes: 0.0
Average reward per episodes: -243.75


 94%|████████████████████████████████████▋  | 4699/5000 [21:05<00:15, 19.80it/s]
  0%|                                                   | 0/100 [00:00<?, ?it/s][A
  9%|███▊                                       | 9/100 [00:00<00:01, 51.08it/s][A
 15%|██████▎                                   | 15/100 [00:00<00:01, 54.17it/s][A
 29%|████████████▏                             | 29/100 [00:00<00:00, 73.29it/s][A
 37%|███████████████▌                          | 37/100 [00:00<00:00, 64.26it/s][A
 47%|███████████████████▋                      | 47/100 [00:00<00:00, 66.56it/s][A
 54%|██████████████████████▋                   | 54/100 [00:00<00:00, 60.78it/s][A
 70%|█████████████████████████████▍            | 70/100 [00:01<00:00, 76.56it/s][A
 78%|████████████████████████████████▊         | 78/100 [00:01<00:00, 70.66it/s][A
 86%|████████████████████████████████████      | 86/100 [00:01<00:00, 58.61it/s][A
100%|█████████████████████████████████████████| 100/100 [00:01<00:00, 66.67it/s

Result after 100 episodes:
Average timesteps per episodes: 209.99
Average penalty per episodes: 0.0
Average reward per episodes: -193.19


 95%|█████████████████████████████████████  | 4747/5000 [21:08<00:08, 28.61it/s]
  0%|                                                   | 0/100 [00:00<?, ?it/s][A
  3%|█▎                                         | 3/100 [00:00<00:04, 21.98it/s][A
  6%|██▌                                        | 6/100 [00:00<00:04, 21.75it/s][A
  9%|███▊                                       | 9/100 [00:00<00:05, 17.45it/s][A
 11%|████▌                                     | 11/100 [00:00<00:05, 16.65it/s][A
 13%|█████▍                                    | 13/100 [00:00<00:05, 16.16it/s][A
 15%|██████▎                                   | 15/100 [00:00<00:05, 15.69it/s][A
 17%|███████▏                                  | 17/100 [00:01<00:05, 15.41it/s][A
 22%|█████████▏                                | 22/100 [00:01<00:03, 21.99it/s][A
 25%|██████████▌                               | 25/100 [00:01<00:03, 22.11it/s][A
 28%|███████████▊                              | 28/100 [00:01<00:03, 19.34it/s

Result after 100 episodes:
Average timesteps per episodes: 643.88
Average penalty per episodes: 0.0
Average reward per episodes: -636.32


 96%|█████████████████████████████████████▍ | 4797/5000 [21:16<00:17, 11.30it/s]
  0%|                                                   | 0/100 [00:00<?, ?it/s][A
 10%|████▏                                     | 10/100 [00:00<00:01, 71.91it/s][A
 18%|███████▌                                  | 18/100 [00:00<00:01, 63.94it/s][A
 26%|██████████▉                               | 26/100 [00:00<00:01, 61.28it/s][A
 33%|█████████████▊                            | 33/100 [00:00<00:01, 57.47it/s][A
 40%|████████████████▊                         | 40/100 [00:00<00:01, 55.20it/s][A
 46%|███████████████████▎                      | 46/100 [00:00<00:01, 51.55it/s][A
 52%|█████████████████████▊                    | 52/100 [00:01<00:01, 37.52it/s][A
 57%|███████████████████████▉                  | 57/100 [00:01<00:01, 37.39it/s][A
 67%|████████████████████████████▏             | 67/100 [00:01<00:00, 47.02it/s][A
 73%|██████████████████████████████▋           | 73/100 [00:01<00:00, 40.83it/s

Result after 100 episodes:
Average timesteps per episodes: 338.35
Average penalty per episodes: 0.0
Average reward per episodes: -324.28


 97%|█████████████████████████████████████▊ | 4849/5000 [21:22<00:11, 13.10it/s]
  0%|                                                   | 0/100 [00:00<?, ?it/s][A
  4%|█▋                                         | 4/100 [00:00<00:03, 28.78it/s][A
 15%|██████▎                                   | 15/100 [00:00<00:01, 58.03it/s][A
 26%|██████████▉                               | 26/100 [00:00<00:01, 67.32it/s][A
 33%|█████████████▊                            | 33/100 [00:00<00:01, 42.34it/s][A
 42%|█████████████████▋                        | 42/100 [00:00<00:01, 47.90it/s][A
 51%|█████████████████████▍                    | 51/100 [00:00<00:00, 52.79it/s][A
 57%|███████████████████████▉                  | 57/100 [00:01<00:00, 50.34it/s][A
 63%|██████████████████████████▍               | 63/100 [00:01<00:00, 42.74it/s][A
 68%|████████████████████████████▌             | 68/100 [00:01<00:00, 41.23it/s][A
 73%|██████████████████████████████▋           | 73/100 [00:01<00:00, 40.03it/s

Result after 100 episodes:
Average timesteps per episodes: 298.78
Average penalty per episodes: 0.0
Average reward per episodes: -283.87


 98%|██████████████████████████████████████▏| 4899/5000 [21:28<00:06, 15.22it/s]
  0%|                                                   | 0/100 [00:00<?, ?it/s][A
  6%|██▌                                        | 6/100 [00:00<00:02, 44.23it/s][A
 11%|████▌                                     | 11/100 [00:00<00:02, 38.49it/s][A
 17%|███████▏                                  | 17/100 [00:00<00:02, 40.65it/s][A
 22%|█████████▏                                | 22/100 [00:00<00:01, 39.31it/s][A
 32%|█████████████▍                            | 32/100 [00:00<00:01, 51.24it/s][A
 47%|███████████████████▋                      | 47/100 [00:00<00:00, 69.89it/s][A
 57%|███████████████████████▉                  | 57/100 [00:00<00:00, 70.74it/s][A
 65%|███████████████████████████▎              | 65/100 [00:01<00:00, 66.90it/s][A
 72%|██████████████████████████████▏           | 72/100 [00:01<00:00, 54.14it/s][A
 86%|████████████████████████████████████      | 86/100 [00:01<00:00, 64.53it/s

Result after 100 episodes:
Average timesteps per episodes: 278.82
Average penalty per episodes: 0.0
Average reward per episodes: -263.49


 99%|██████████████████████████████████████▌| 4946/5000 [21:33<00:02, 19.28it/s]
  0%|                                                   | 0/100 [00:00<?, ?it/s][A
  2%|▊                                          | 2/100 [00:00<00:06, 14.84it/s][A
 16%|██████▋                                   | 16/100 [00:00<00:01, 65.04it/s][A
 25%|██████████▌                               | 25/100 [00:00<00:01, 64.46it/s][A
 32%|█████████████▍                            | 32/100 [00:00<00:01, 49.63it/s][A
 38%|███████████████▉                          | 38/100 [00:00<00:01, 47.58it/s][A
 43%|██████████████████                        | 43/100 [00:00<00:01, 38.69it/s][A
 58%|████████████████████████▎                 | 58/100 [00:01<00:00, 57.87it/s][A
 65%|███████████████████████████▎              | 65/100 [00:01<00:00, 55.35it/s][A
 74%|███████████████████████████████           | 74/100 [00:01<00:00, 57.63it/s][A
 81%|██████████████████████████████████        | 81/100 [00:01<00:00, 55.45it/s

Result after 100 episodes:
Average timesteps per episodes: 259.02
Average penalty per episodes: 0.0
Average reward per episodes: -243.27


100%|██████████████████████████████████████▉| 4998/5000 [21:39<00:00,  9.72it/s]
  0%|                                                   | 0/100 [00:00<?, ?it/s][A
  9%|███▊                                       | 9/100 [00:00<00:01, 64.45it/s][A
 20%|████████▍                                 | 20/100 [00:00<00:01, 72.37it/s][A
 33%|█████████████▊                            | 33/100 [00:00<00:00, 80.31it/s][A
 43%|██████████████████                        | 43/100 [00:00<00:00, 76.56it/s][A
 51%|█████████████████████▍                    | 51/100 [00:00<00:00, 59.70it/s][A
 58%|████████████████████████▎                 | 58/100 [00:00<00:00, 57.19it/s][A
 64%|██████████████████████████▉               | 64/100 [00:01<00:00, 37.44it/s][A
 69%|████████████████████████████▉             | 69/100 [00:01<00:00, 36.99it/s][A
 78%|████████████████████████████████▊         | 78/100 [00:01<00:00, 42.54it/s][A
 83%|██████████████████████████████████▊       | 83/100 [00:01<00:00, 40.95it/s

Result after 100 episodes:
Average timesteps per episodes: 288.82
Average penalty per episodes: 0.0
Average reward per episodes: -273.7
Training finished.



100%|█████████████████████████████████████████| 100/100 [00:01<00:00, 67.58it/s]

Result after 100 episodes:
Average timesteps per episodes: 219.48
Average penalty per episodes: 0.0
Average reward per episodes: -202.89





In [84]:
print(env.observation_space.n, env.action_space.n)

500 6
