In [1]:
##log_dir = "logs/dicewars/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
# simulare partite contro altri bot 
# allenare il tuo agente
# salvare i pesi del modello (model.save())

import numpy as np
from tensorflow.summary import create_file_writer
import datetime
from importlib import import_module
from dicewars.match import Match
from dicewars.game import Game
from dicewars.player import RandomPlayer, AgressivePlayer, WeakerPlayerAttacker, PassivePlayer
from rl_agent import RLDicewarsAgent
from rl_agent import ReplayBuffer
import tensorflow as tf
import os
import matplotlib.pyplot as plt
from collections import deque
from tqdm import tqdm
import wandb
import time
from datetime import datetime

tf.config.list_physical_devices('GPU')

2025-03-27 17:57:07.765697: I tensorflow/core/util/port.cc:111] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-03-27 17:57:07.804545: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-03-27 17:57:07.804566: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-03-27 17:57:07.804593: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-03-27 17:57:07.812429: I tensorflow/core/platform/cpu_feature_g

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU'),
 PhysicalDevice(name='/physical_device:GPU:1', device_type='GPU')]

In [None]:
start_time = time.time()

run = wandb.init(
    project='Machine-Learning-DICEWARS',
    name= f"DiceWars_{datetime.now().strftime('%Y%m%d_%H%M%S')}"  
)
run_id = run.id 
api = wandb.Api()
run = api.run(f"Machine-Learning-DICEWARS/{run_id}")

if tf.config.list_physical_devices('GPU'):
    print("using cuda ok")
else:
    print('using cpu')

NUM_EPISODES = 1000
SAVE_MODEL_PATH = "./saved_models/dicewars_rl_model_new.keras"
os.makedirs(os.path.dirname(SAVE_MODEL_PATH), exist_ok=True)

## Buffer
buffer = ReplayBuffer(max_size=10000)
BATCH_SIZE = 128
TRAIN_EVERY = 10  # ogni 10 episodi

other_players = [PassivePlayer(), WeakerPlayerAttacker(), WeakerPlayerAttacker()]


def calculate_step_reward(prev_state, new_state, player_idx):
    """
    Reward intermedio basato su: conquiste, perdite, crescita di dadi
    """
    reward = 0

    # Aree controllate
    prev_areas = len(prev_state.player_areas[player_idx])
    new_areas = len(new_state.player_areas[player_idx])
    reward += (new_areas - prev_areas) * 0.4

    # Dadi totali
    prev_dice = prev_state.player_num_dice[player_idx]
    new_dice = new_state.player_num_dice[player_idx]
    reward += (new_dice - prev_dice) * 0.04

    # Penalità se ha fatto "end turn" senza attaccare
    if prev_areas == new_areas and prev_dice == new_dice:
        reward -= 0.2

    return reward

def calculate_final_reward(winner, player_idx):
    return 20.0 if winner == player_idx else -10.0

def evaluate_agent(agent, num_matches=10, other_players=other_players):
    wins = 0

    for _ in range(num_matches):
        players = [agent] + other_players
        game = Game(num_seats=4)
        match = Match(game)
        grid, state = match.game.grid, match.state

        while match.winner == -1:
            current_player = state.player

            if current_player == 0:
                action = agent.select_action(grid, state, epsilon=0.0)  # no esplorazione
            else:
                action = players[current_player].get_attack_areas(grid, state)

            grid, state = match.step(action)

        if match.winner == 0:
            wins += 1

    win_rate = wins / num_matches
    print(f"🎯 Evaluation match win rate (no epsilon): {win_rate:.2f}")
    return win_rate


# Training loop
agent = RLDicewarsAgent()

## plots
win_history = []
reward_history = []
moving_avg = deque(maxlen=50)  # media mobile su ultimi 50 episodi


## training loop
for episode in tqdm(range(NUM_EPISODES), desc="Episode"):
    players = [agent] + other_players
    game = Game(num_seats=4)
    match = Match(game)
    grid, state = match.game.grid, match.state
    ##print("→ Player order:", [type(p).__name__ for p in players])
    epsilon =max(0.01, 0.1 - episode / NUM_EPISODES)  # decrescente

    history = []

    while match.winner == -1:
        current_player = state.player
        prev_state = state  # <- snapshot prima dell'azione
        
        if current_player == 0:
            action = agent.select_action(grid, state, epsilon=epsilon) # rimiuovi epsilon se non vuoi usare epsilon greedy
            state_vec = agent.encode_state(grid, state)
        else:
            action = players[current_player].get_attack_areas(grid, state)

        grid, state = match.step(action)

        if current_player == 0:
            #q_values = agent.model.predict(state_vec[None, :])[0]
            #print(f"Q-values per lo stato corrente: {q_values}")
            reward = calculate_step_reward(prev_state, state, player_idx=0)
            done = match.winner != -1
            state_vec = agent.encode_state(grid, prev_state)
            next_state_vec = agent.encode_state(grid,state)
            history.append((state_vec, action, reward))
            buffer.add(state_vec, action, reward, next_state_vec, done)


    # Final reward da partita
    final_reward = calculate_final_reward(match.winner, player_idx=0)
    episode_reward = sum([r for _, _, r in history]) + final_reward
    reward_history.append(episode_reward)

    # Registra vincita
    won = 1 if match.winner == 0 else 0
    win_history.append(won)
    moving_avg.append(won)


    # Allena senza buffer
    #for state_vec, action_taken, step_reward in history:
    #    total_reward = step_reward + final_reward
    #    agent.train_step(state_vec, action_taken, total_reward)
    
    ## Buffer training
    if (episode + 1) % TRAIN_EVERY == 0 and len(buffer) >= BATCH_SIZE:
        states, actions, rewards, next_states, dones = buffer.sample(BATCH_SIZE)
        agent.train_batch(states, actions, rewards, next_states, dones)


    print(f"Episode {episode + 1}/{NUM_EPISODES} | Reward: {episode_reward:.2f} | Win Rate: {np.mean(moving_avg):.2f} | 🏆 Winner: {players[match.winner].__class__.__name__}")
    wandb.log({'Episode': episode , 'Reward': episode_reward, 'Win Rate': np.mean(moving_avg), 'Epsilon': (epsilon)})
    
    # Salva modello ogni 20 episodi
    if (episode + 1) % 30 == 0:
        agent.save_model()
    if (episode + 1) % 50 == 0:
        eval_win_rate = evaluate_agent(agent, other_players=other_players)
        wandb.log({'eval_win_rate': eval_win_rate, 'episode': episode})
      
        
print("Time", time.time() - start_time, "seconds")

using cuda ok
Initializing player from standard library with name: PassivePlayer
Initializing player from standard library with name: WeakerPlayerAttacker
Initializing player from standard library with name: WeakerPlayerAttacker
Inizializzazione agente RL
[MODEL] Caricato da: ./saved_models/dicewars_rl_model_new.keras


Episode:   0%|          | 1/1000 [00:04<1:22:22,  4.95s/it]

Episode 1/1000 | Reward: 3.84 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker


Episode:   0%|          | 2/1000 [00:05<41:25,  2.49s/it]  

Episode 2/1000 | Reward: -8.76 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker


Episode:   0%|          | 3/1000 [00:07<34:50,  2.10s/it]

Episode 3/1000 | Reward: -6.92 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker


Episode:   0%|          | 4/1000 [00:10<40:21,  2.43s/it]

Episode 4/1000 | Reward: -5.04 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker


Episode:   0%|          | 5/1000 [00:11<30:24,  1.83s/it]

Episode 5/1000 | Reward: -7.28 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker


Episode:   1%|          | 6/1000 [00:12<28:34,  1.72s/it]

Episode 6/1000 | Reward: -6.60 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker


Episode:   1%|          | 7/1000 [00:14<30:37,  1.85s/it]

Episode 7/1000 | Reward: -5.60 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker


Episode:   1%|          | 8/1000 [00:17<33:08,  2.01s/it]

Episode 8/1000 | Reward: -4.12 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker


Episode:   1%|          | 9/1000 [00:18<30:18,  1.84s/it]

Episode 9/1000 | Reward: -5.80 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker


Episode:   1%|          | 10/1000 [00:22<41:10,  2.50s/it]

Episode 10/1000 | Reward: -3.04 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker


Episode:   1%|          | 11/1000 [00:25<41:23,  2.51s/it]

Episode 11/1000 | Reward: -2.88 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker


Episode:   1%|          | 12/1000 [00:26<37:34,  2.28s/it]

Episode 12/1000 | Reward: -4.64 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker


Episode:   1%|▏         | 13/1000 [00:29<37:42,  2.29s/it]

Episode 13/1000 | Reward: -5.32 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker


Episode:   1%|▏         | 14/1000 [00:30<34:21,  2.09s/it]

Episode 14/1000 | Reward: -6.16 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker


Episode:   2%|▏         | 15/1000 [00:33<36:40,  2.23s/it]

Episode 15/1000 | Reward: -3.08 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker


Episode:   2%|▏         | 16/1000 [00:36<43:17,  2.64s/it]

Episode 16/1000 | Reward: -4.00 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker


Episode:   2%|▏         | 17/1000 [00:38<38:08,  2.33s/it]

Episode 17/1000 | Reward: -6.92 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker


Episode:   2%|▏         | 18/1000 [00:40<36:49,  2.25s/it]

Episode 18/1000 | Reward: -7.00 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker


Episode:   2%|▏         | 19/1000 [00:42<35:50,  2.19s/it]

Episode 19/1000 | Reward: -5.36 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker


Episode:   2%|▏         | 20/1000 [00:43<31:37,  1.94s/it]

Episode 20/1000 | Reward: -8.84 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker


Episode:   2%|▏         | 21/1000 [00:47<40:18,  2.47s/it]

Episode 21/1000 | Reward: 0.16 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker


Episode:   2%|▏         | 23/1000 [00:49<27:21,  1.68s/it]

Episode 22/1000 | Reward: -4.96 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker
Episode 23/1000 | Reward: -10.08 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker


Episode:   2%|▏         | 24/1000 [00:51<28:18,  1.74s/it]

Episode 24/1000 | Reward: -7.96 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker


Episode:   2%|▎         | 25/1000 [00:55<37:37,  2.32s/it]

Episode 25/1000 | Reward: -2.00 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker


Episode:   3%|▎         | 26/1000 [00:59<44:44,  2.76s/it]

Episode 26/1000 | Reward: -0.16 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker


Episode:   3%|▎         | 27/1000 [01:01<43:37,  2.69s/it]

Episode 27/1000 | Reward: -3.00 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker


Episode:   3%|▎         | 28/1000 [01:10<1:15:29,  4.66s/it]

Episode 28/1000 | Reward: 12.36 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker


Episode:   3%|▎         | 29/1000 [01:12<58:31,  3.62s/it]  

Episode 29/1000 | Reward: -7.76 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker


Episode:   3%|▎         | 30/1000 [01:13<48:37,  3.01s/it]

Episode 30/1000 | Reward: -9.00 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker
[MODEL] Salvato in: ./saved_models/dicewars_rl_model_new.keras


Episode:   3%|▎         | 31/1000 [01:15<44:12,  2.74s/it]

Episode 31/1000 | Reward: -3.12 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker


Episode:   3%|▎         | 32/1000 [01:18<46:04,  2.86s/it]

Episode 32/1000 | Reward: -6.32 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker


Episode:   3%|▎         | 33/1000 [01:19<37:19,  2.32s/it]

Episode 33/1000 | Reward: -8.32 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker


Episode:   3%|▎         | 34/1000 [01:22<39:55,  2.48s/it]

Episode 34/1000 | Reward: -4.72 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker


Episode:   4%|▎         | 35/1000 [01:24<36:38,  2.28s/it]

Episode 35/1000 | Reward: -5.20 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker


Episode:   4%|▎         | 36/1000 [01:25<30:33,  1.90s/it]

Episode 36/1000 | Reward: -6.24 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker


Episode:   4%|▎         | 37/1000 [01:28<35:04,  2.18s/it]

Episode 37/1000 | Reward: -1.12 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker


Episode:   4%|▍         | 38/1000 [01:30<33:47,  2.11s/it]

Episode 38/1000 | Reward: -5.44 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker


Episode:   4%|▍         | 39/1000 [01:36<50:49,  3.17s/it]

Episode 39/1000 | Reward: 1.48 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker


Episode:   4%|▍         | 40/1000 [01:38<46:20,  2.90s/it]

Episode 40/1000 | Reward: -6.12 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker


Episode:   4%|▍         | 41/1000 [01:39<38:21,  2.40s/it]

Episode 41/1000 | Reward: -7.80 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker


Episode:   4%|▍         | 42/1000 [01:41<35:10,  2.20s/it]

Episode 42/1000 | Reward: -6.12 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker


Episode:   4%|▍         | 43/1000 [01:42<31:30,  1.98s/it]

Episode 43/1000 | Reward: -7.84 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker


Episode:   4%|▍         | 44/1000 [01:45<36:14,  2.27s/it]

Episode 44/1000 | Reward: -5.84 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker


Episode:   4%|▍         | 45/1000 [01:48<38:03,  2.39s/it]

Episode 45/1000 | Reward: -3.60 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker


Episode:   5%|▍         | 46/1000 [01:50<35:06,  2.21s/it]

Episode 46/1000 | Reward: -5.92 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker


Episode:   5%|▍         | 47/1000 [01:52<35:43,  2.25s/it]

Episode 47/1000 | Reward: -5.72 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker


Episode:   5%|▍         | 48/1000 [01:54<33:21,  2.10s/it]

Episode 48/1000 | Reward: -7.08 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker


Episode:   5%|▍         | 49/1000 [01:57<39:30,  2.49s/it]

Episode 49/1000 | Reward: -0.76 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker
Episode 50/1000 | Reward: -1.60 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker


Episode:   5%|▌         | 50/1000 [02:34<3:20:20, 12.65s/it]

🎯 Evaluation match win rate (no epsilon): 0.00


Episode:   5%|▌         | 51/1000 [02:36<2:32:47,  9.66s/it]

Episode 51/1000 | Reward: -3.16 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker


Episode:   5%|▌         | 52/1000 [02:43<2:16:39,  8.65s/it]

Episode 52/1000 | Reward: 5.80 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker


Episode:   5%|▌         | 53/1000 [02:44<1:40:48,  6.39s/it]

Episode 53/1000 | Reward: -6.88 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker


Episode:   5%|▌         | 54/1000 [02:47<1:26:13,  5.47s/it]

Episode 54/1000 | Reward: -2.68 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker


Episode:   6%|▌         | 55/1000 [02:49<1:09:33,  4.42s/it]

Episode 55/1000 | Reward: -5.76 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker


Episode:   6%|▌         | 56/1000 [02:50<52:54,  3.36s/it]  

Episode 56/1000 | Reward: -8.40 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker


Episode:   6%|▌         | 57/1000 [02:53<49:47,  3.17s/it]

Episode 57/1000 | Reward: -4.68 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker


Episode:   6%|▌         | 58/1000 [02:54<40:13,  2.56s/it]

Episode 58/1000 | Reward: -6.92 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker


Episode:   6%|▌         | 59/1000 [02:57<41:47,  2.66s/it]

Episode 59/1000 | Reward: -1.56 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker


Episode:   6%|▌         | 60/1000 [02:59<39:12,  2.50s/it]

Episode 60/1000 | Reward: -6.88 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker
[MODEL] Salvato in: ./saved_models/dicewars_rl_model_new.keras


Episode:   6%|▌         | 61/1000 [03:01<38:05,  2.43s/it]

Episode 61/1000 | Reward: -5.40 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker


Episode:   6%|▌         | 62/1000 [03:03<34:47,  2.23s/it]

Episode 62/1000 | Reward: -7.00 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker


Episode:   6%|▋         | 63/1000 [03:06<38:15,  2.45s/it]

Episode 63/1000 | Reward: -5.00 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker


Episode:   6%|▋         | 64/1000 [03:10<47:13,  3.03s/it]

Episode 64/1000 | Reward: 0.44 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker


Episode:   6%|▋         | 65/1000 [03:12<43:29,  2.79s/it]

Episode 65/1000 | Reward: -3.20 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker


Episode:   7%|▋         | 66/1000 [03:13<35:41,  2.29s/it]

Episode 66/1000 | Reward: -8.44 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker


Episode:   7%|▋         | 67/1000 [03:16<36:18,  2.33s/it]

Episode 67/1000 | Reward: -4.84 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker


Episode:   7%|▋         | 68/1000 [03:17<30:20,  1.95s/it]

Episode 68/1000 | Reward: -9.60 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker


Episode:   7%|▋         | 69/1000 [03:23<48:23,  3.12s/it]

Episode 69/1000 | Reward: 5.76 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker


Episode:   7%|▋         | 70/1000 [03:25<45:00,  2.90s/it]

Episode 70/1000 | Reward: -7.48 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker


Episode:   7%|▋         | 71/1000 [03:30<51:33,  3.33s/it]

Episode 71/1000 | Reward: 1.80 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker


Episode:   7%|▋         | 72/1000 [03:38<1:17:04,  4.98s/it]

Episode 72/1000 | Reward: 17.24 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker


Episode:   7%|▋         | 73/1000 [03:40<1:00:39,  3.93s/it]

Episode 73/1000 | Reward: -7.48 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker


Episode:   7%|▋         | 74/1000 [03:41<49:05,  3.18s/it]  

Episode 74/1000 | Reward: -7.48 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker


Episode:   8%|▊         | 75/1000 [03:44<47:52,  3.11s/it]

Episode 75/1000 | Reward: -1.00 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker


Episode:   8%|▊         | 76/1000 [03:45<39:20,  2.55s/it]

Episode 76/1000 | Reward: -7.92 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker


Episode:   8%|▊         | 77/1000 [03:48<39:27,  2.57s/it]

Episode 77/1000 | Reward: -3.04 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker


Episode:   8%|▊         | 78/1000 [03:50<37:32,  2.44s/it]

Episode 78/1000 | Reward: -8.20 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker


Episode:   8%|▊         | 79/1000 [03:51<31:51,  2.08s/it]

Episode 79/1000 | Reward: -6.84 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker


Episode:   8%|▊         | 80/1000 [03:56<41:19,  2.70s/it]

Episode 80/1000 | Reward: -1.84 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker


Episode:   8%|▊         | 81/1000 [04:03<1:02:29,  4.08s/it]

Episode 81/1000 | Reward: 13.68 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker


Episode:   8%|▊         | 82/1000 [04:04<49:33,  3.24s/it]  

Episode 82/1000 | Reward: -7.96 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker


Episode:   8%|▊         | 83/1000 [04:06<42:34,  2.79s/it]

Episode 83/1000 | Reward: -7.00 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker


Episode:   8%|▊         | 84/1000 [04:08<38:54,  2.55s/it]

Episode 84/1000 | Reward: -6.56 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker


Episode:   8%|▊         | 85/1000 [04:09<32:42,  2.14s/it]

Episode 85/1000 | Reward: -7.72 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker


Episode:   9%|▊         | 86/1000 [04:11<33:12,  2.18s/it]

Episode 86/1000 | Reward: -5.08 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker


Episode:   9%|▊         | 87/1000 [04:13<31:59,  2.10s/it]

Episode 87/1000 | Reward: -5.72 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker


Episode:   9%|▉         | 88/1000 [04:17<38:35,  2.54s/it]

Episode 88/1000 | Reward: -1.80 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker


Episode:   9%|▉         | 89/1000 [04:19<35:58,  2.37s/it]

Episode 89/1000 | Reward: -5.44 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker


Episode:   9%|▉         | 90/1000 [04:24<47:35,  3.14s/it]

Episode 90/1000 | Reward: -2.48 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker
[MODEL] Salvato in: ./saved_models/dicewars_rl_model_new.keras


Episode:   9%|▉         | 91/1000 [04:26<44:24,  2.93s/it]

Episode 91/1000 | Reward: -3.92 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker


Episode:   9%|▉         | 92/1000 [04:29<44:46,  2.96s/it]

Episode 92/1000 | Reward: -2.88 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker


Episode:   9%|▉         | 93/1000 [04:32<45:55,  3.04s/it]

Episode 93/1000 | Reward: -1.04 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker


Episode:   9%|▉         | 94/1000 [04:34<39:31,  2.62s/it]

Episode 94/1000 | Reward: -7.56 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker


Episode:  10%|▉         | 95/1000 [04:36<34:38,  2.30s/it]

Episode 95/1000 | Reward: -7.44 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker


Episode:  10%|▉         | 96/1000 [04:37<32:36,  2.16s/it]

Episode 96/1000 | Reward: -5.16 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker


Episode:  10%|▉         | 97/1000 [04:40<32:56,  2.19s/it]

Episode 97/1000 | Reward: -4.48 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker


Episode:  10%|▉         | 98/1000 [04:41<29:31,  1.96s/it]

Episode 98/1000 | Reward: -7.44 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker


Episode:  10%|▉         | 99/1000 [04:44<32:41,  2.18s/it]

Episode 99/1000 | Reward: -6.28 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker
Episode 100/1000 | Reward: -6.32 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker


Episode:  10%|█         | 100/1000 [05:12<2:31:08, 10.08s/it]

🎯 Evaluation match win rate (no epsilon): 0.00


Episode:  10%|█         | 101/1000 [05:16<2:03:32,  8.25s/it]

Episode 101/1000 | Reward: -2.52 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker


Episode:  10%|█         | 102/1000 [05:17<1:30:18,  6.03s/it]

Episode 102/1000 | Reward: -7.80 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker


Episode:  10%|█         | 103/1000 [05:18<1:08:33,  4.59s/it]

Episode 103/1000 | Reward: -8.72 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker


Episode:  10%|█         | 104/1000 [05:23<1:06:39,  4.46s/it]

Episode 104/1000 | Reward: -6.76 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker


Episode:  10%|█         | 105/1000 [05:27<1:04:29,  4.32s/it]

Episode 105/1000 | Reward: -1.44 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker


Episode:  11%|█         | 106/1000 [05:39<1:41:58,  6.84s/it]

Episode 106/1000 | Reward: 59.16 | Win Rate: 0.02 | 🏆 Winner: RLDicewarsAgent


Episode:  11%|█         | 107/1000 [05:45<1:37:24,  6.54s/it]

Episode 107/1000 | Reward: -6.12 | Win Rate: 0.02 | 🏆 Winner: WeakerPlayerAttacker


Episode:  11%|█         | 108/1000 [05:46<1:12:11,  4.86s/it]

Episode 108/1000 | Reward: -8.52 | Win Rate: 0.02 | 🏆 Winner: WeakerPlayerAttacker


Episode:  11%|█         | 109/1000 [05:49<1:03:43,  4.29s/it]

Episode 109/1000 | Reward: -2.84 | Win Rate: 0.02 | 🏆 Winner: WeakerPlayerAttacker


Episode:  11%|█         | 110/1000 [06:00<1:33:42,  6.32s/it]

Episode 110/1000 | Reward: 59.20 | Win Rate: 0.04 | 🏆 Winner: RLDicewarsAgent


Episode:  11%|█         | 111/1000 [06:03<1:17:49,  5.25s/it]

Episode 111/1000 | Reward: -3.32 | Win Rate: 0.04 | 🏆 Winner: WeakerPlayerAttacker


Episode:  11%|█         | 112/1000 [06:05<1:04:28,  4.36s/it]

Episode 112/1000 | Reward: -5.00 | Win Rate: 0.04 | 🏆 Winner: WeakerPlayerAttacker


Episode:  11%|█▏        | 113/1000 [06:08<59:55,  4.05s/it]  

Episode 113/1000 | Reward: -5.20 | Win Rate: 0.04 | 🏆 Winner: WeakerPlayerAttacker


Episode:  11%|█▏        | 114/1000 [06:09<46:26,  3.15s/it]

Episode 114/1000 | Reward: -7.76 | Win Rate: 0.04 | 🏆 Winner: WeakerPlayerAttacker


Episode:  12%|█▏        | 115/1000 [06:12<43:27,  2.95s/it]

Episode 115/1000 | Reward: -5.92 | Win Rate: 0.04 | 🏆 Winner: WeakerPlayerAttacker


Episode:  12%|█▏        | 116/1000 [06:17<51:22,  3.49s/it]

Episode 116/1000 | Reward: -8.92 | Win Rate: 0.04 | 🏆 Winner: WeakerPlayerAttacker


Episode:  12%|█▏        | 117/1000 [06:18<43:12,  2.94s/it]

Episode 117/1000 | Reward: -6.44 | Win Rate: 0.04 | 🏆 Winner: WeakerPlayerAttacker


Episode:  12%|█▏        | 118/1000 [06:22<44:18,  3.01s/it]

Episode 118/1000 | Reward: -2.20 | Win Rate: 0.04 | 🏆 Winner: WeakerPlayerAttacker


Episode:  12%|█▏        | 119/1000 [06:24<39:47,  2.71s/it]

Episode 119/1000 | Reward: -6.68 | Win Rate: 0.04 | 🏆 Winner: WeakerPlayerAttacker


Episode:  12%|█▏        | 120/1000 [06:27<41:35,  2.84s/it]

Episode 120/1000 | Reward: -4.16 | Win Rate: 0.04 | 🏆 Winner: WeakerPlayerAttacker
[MODEL] Salvato in: ./saved_models/dicewars_rl_model_new.keras


Episode:  12%|█▏        | 121/1000 [06:28<33:21,  2.28s/it]

Episode 121/1000 | Reward: -9.28 | Win Rate: 0.04 | 🏆 Winner: WeakerPlayerAttacker


Episode:  12%|█▏        | 122/1000 [06:30<31:31,  2.15s/it]

Episode 122/1000 | Reward: -7.28 | Win Rate: 0.04 | 🏆 Winner: WeakerPlayerAttacker


Episode:  12%|█▏        | 123/1000 [06:33<35:10,  2.41s/it]

Episode 123/1000 | Reward: -3.40 | Win Rate: 0.04 | 🏆 Winner: WeakerPlayerAttacker


Episode:  12%|█▏        | 124/1000 [06:35<35:34,  2.44s/it]

Episode 124/1000 | Reward: -3.16 | Win Rate: 0.04 | 🏆 Winner: WeakerPlayerAttacker


Episode:  12%|█▎        | 125/1000 [06:36<30:58,  2.12s/it]

Episode 125/1000 | Reward: -7.28 | Win Rate: 0.04 | 🏆 Winner: WeakerPlayerAttacker


Episode:  13%|█▎        | 126/1000 [06:38<28:49,  1.98s/it]

Episode 126/1000 | Reward: -5.12 | Win Rate: 0.04 | 🏆 Winner: WeakerPlayerAttacker


Episode:  13%|█▎        | 127/1000 [06:41<31:27,  2.16s/it]

Episode 127/1000 | Reward: -5.40 | Win Rate: 0.04 | 🏆 Winner: WeakerPlayerAttacker


Episode:  13%|█▎        | 128/1000 [06:44<36:48,  2.53s/it]

Episode 128/1000 | Reward: -2.20 | Win Rate: 0.04 | 🏆 Winner: WeakerPlayerAttacker


Episode:  13%|█▎        | 129/1000 [06:47<38:40,  2.66s/it]

Episode 129/1000 | Reward: -3.12 | Win Rate: 0.04 | 🏆 Winner: WeakerPlayerAttacker


Episode:  13%|█▎        | 130/1000 [06:49<37:52,  2.61s/it]

Episode 130/1000 | Reward: -5.32 | Win Rate: 0.04 | 🏆 Winner: WeakerPlayerAttacker


Episode:  13%|█▎        | 131/1000 [06:51<31:44,  2.19s/it]

Episode 131/1000 | Reward: -9.68 | Win Rate: 0.04 | 🏆 Winner: WeakerPlayerAttacker


Episode:  13%|█▎        | 132/1000 [06:53<33:03,  2.28s/it]

Episode 132/1000 | Reward: -5.48 | Win Rate: 0.04 | 🏆 Winner: WeakerPlayerAttacker


Episode:  13%|█▎        | 133/1000 [06:55<29:40,  2.05s/it]

Episode 133/1000 | Reward: -7.28 | Win Rate: 0.04 | 🏆 Winner: WeakerPlayerAttacker


Episode:  13%|█▎        | 134/1000 [06:56<25:25,  1.76s/it]

Episode 134/1000 | Reward: -7.36 | Win Rate: 0.04 | 🏆 Winner: WeakerPlayerAttacker


Episode:  14%|█▎        | 135/1000 [06:58<25:08,  1.74s/it]

Episode 135/1000 | Reward: -6.28 | Win Rate: 0.04 | 🏆 Winner: WeakerPlayerAttacker


Episode:  14%|█▎        | 136/1000 [07:01<31:34,  2.19s/it]

Episode 136/1000 | Reward: -2.72 | Win Rate: 0.04 | 🏆 Winner: WeakerPlayerAttacker


Episode:  14%|█▎        | 137/1000 [07:04<37:52,  2.63s/it]

Episode 137/1000 | Reward: -1.08 | Win Rate: 0.04 | 🏆 Winner: WeakerPlayerAttacker


Episode:  14%|█▍        | 138/1000 [07:07<36:33,  2.54s/it]

Episode 138/1000 | Reward: -3.12 | Win Rate: 0.04 | 🏆 Winner: WeakerPlayerAttacker


Episode:  14%|█▍        | 139/1000 [07:13<51:07,  3.56s/it]

Episode 139/1000 | Reward: -3.60 | Win Rate: 0.04 | 🏆 Winner: WeakerPlayerAttacker


Episode:  14%|█▍        | 140/1000 [07:14<43:31,  3.04s/it]

Episode 140/1000 | Reward: -7.44 | Win Rate: 0.04 | 🏆 Winner: WeakerPlayerAttacker


Episode:  14%|█▍        | 141/1000 [07:17<39:41,  2.77s/it]

Episode 141/1000 | Reward: -4.76 | Win Rate: 0.04 | 🏆 Winner: WeakerPlayerAttacker


Episode:  14%|█▍        | 142/1000 [07:20<41:53,  2.93s/it]

Episode 142/1000 | Reward: -3.44 | Win Rate: 0.04 | 🏆 Winner: WeakerPlayerAttacker


Episode:  14%|█▍        | 143/1000 [07:23<41:09,  2.88s/it]

Episode 143/1000 | Reward: -3.76 | Win Rate: 0.04 | 🏆 Winner: WeakerPlayerAttacker


Episode:  14%|█▍        | 144/1000 [07:24<35:54,  2.52s/it]

Episode 144/1000 | Reward: -5.52 | Win Rate: 0.04 | 🏆 Winner: WeakerPlayerAttacker


Episode:  14%|█▍        | 145/1000 [07:26<32:17,  2.27s/it]

Episode 145/1000 | Reward: -6.96 | Win Rate: 0.04 | 🏆 Winner: WeakerPlayerAttacker


Episode:  15%|█▍        | 146/1000 [07:28<31:50,  2.24s/it]

Episode 146/1000 | Reward: -3.00 | Win Rate: 0.04 | 🏆 Winner: WeakerPlayerAttacker


Episode:  15%|█▍        | 147/1000 [07:30<28:29,  2.00s/it]

Episode 147/1000 | Reward: -7.72 | Win Rate: 0.04 | 🏆 Winner: WeakerPlayerAttacker


Episode:  15%|█▍        | 148/1000 [07:31<26:13,  1.85s/it]

Episode 148/1000 | Reward: -6.44 | Win Rate: 0.04 | 🏆 Winner: WeakerPlayerAttacker


Episode:  15%|█▍        | 149/1000 [07:34<31:38,  2.23s/it]

Episode 149/1000 | Reward: -2.28 | Win Rate: 0.04 | 🏆 Winner: WeakerPlayerAttacker
Episode 150/1000 | Reward: -5.56 | Win Rate: 0.04 | 🏆 Winner: WeakerPlayerAttacker
[MODEL] Salvato in: ./saved_models/dicewars_rl_model_new.keras


Episode:  15%|█▌        | 150/1000 [08:05<2:32:08, 10.74s/it]

🎯 Evaluation match win rate (no epsilon): 0.00


Episode:  15%|█▌        | 151/1000 [08:07<1:54:16,  8.08s/it]

Episode 151/1000 | Reward: -7.12 | Win Rate: 0.04 | 🏆 Winner: WeakerPlayerAttacker


Episode:  15%|█▌        | 152/1000 [08:08<1:26:01,  6.09s/it]

Episode 152/1000 | Reward: -6.36 | Win Rate: 0.04 | 🏆 Winner: WeakerPlayerAttacker


Episode:  15%|█▌        | 153/1000 [08:16<1:34:14,  6.68s/it]

Episode 153/1000 | Reward: 45.56 | Win Rate: 0.06 | 🏆 Winner: RLDicewarsAgent


Episode:  15%|█▌        | 154/1000 [08:19<1:16:32,  5.43s/it]

Episode 154/1000 | Reward: -5.12 | Win Rate: 0.06 | 🏆 Winner: WeakerPlayerAttacker


Episode:  16%|█▌        | 155/1000 [08:20<59:33,  4.23s/it]  

Episode 155/1000 | Reward: -7.84 | Win Rate: 0.06 | 🏆 Winner: WeakerPlayerAttacker


Episode:  16%|█▌        | 156/1000 [08:22<50:00,  3.55s/it]

Episode 156/1000 | Reward: -4.88 | Win Rate: 0.04 | 🏆 Winner: WeakerPlayerAttacker


Episode:  16%|█▌        | 157/1000 [08:25<45:30,  3.24s/it]

Episode 157/1000 | Reward: -4.96 | Win Rate: 0.04 | 🏆 Winner: WeakerPlayerAttacker


Episode:  16%|█▌        | 158/1000 [08:25<33:43,  2.40s/it]

Episode 158/1000 | Reward: -9.64 | Win Rate: 0.04 | 🏆 Winner: WeakerPlayerAttacker


Episode:  16%|█▌        | 159/1000 [08:27<33:13,  2.37s/it]

Episode 159/1000 | Reward: -4.48 | Win Rate: 0.04 | 🏆 Winner: WeakerPlayerAttacker


Episode:  16%|█▌        | 160/1000 [08:29<30:14,  2.16s/it]

Episode 160/1000 | Reward: -7.20 | Win Rate: 0.02 | 🏆 Winner: WeakerPlayerAttacker


Episode:  16%|█▌        | 161/1000 [08:30<25:32,  1.83s/it]

Episode 161/1000 | Reward: -8.84 | Win Rate: 0.02 | 🏆 Winner: WeakerPlayerAttacker


Episode:  16%|█▌        | 162/1000 [08:33<29:05,  2.08s/it]

Episode 162/1000 | Reward: -4.88 | Win Rate: 0.02 | 🏆 Winner: WeakerPlayerAttacker


Episode:  16%|█▋        | 163/1000 [08:34<26:33,  1.90s/it]

Episode 163/1000 | Reward: -7.12 | Win Rate: 0.02 | 🏆 Winner: WeakerPlayerAttacker


Episode:  16%|█▋        | 164/1000 [08:36<24:07,  1.73s/it]

Episode 164/1000 | Reward: -6.96 | Win Rate: 0.02 | 🏆 Winner: WeakerPlayerAttacker


Episode:  16%|█▋        | 165/1000 [08:37<22:59,  1.65s/it]

Episode 165/1000 | Reward: -8.00 | Win Rate: 0.02 | 🏆 Winner: WeakerPlayerAttacker


Episode:  17%|█▋        | 166/1000 [08:38<19:44,  1.42s/it]

Episode 166/1000 | Reward: -9.40 | Win Rate: 0.02 | 🏆 Winner: WeakerPlayerAttacker


Episode:  17%|█▋        | 167/1000 [08:41<24:28,  1.76s/it]

Episode 167/1000 | Reward: -4.04 | Win Rate: 0.02 | 🏆 Winner: WeakerPlayerAttacker


Episode:  17%|█▋        | 168/1000 [08:44<30:39,  2.21s/it]

Episode 168/1000 | Reward: -4.08 | Win Rate: 0.02 | 🏆 Winner: WeakerPlayerAttacker


Episode:  17%|█▋        | 169/1000 [08:46<31:33,  2.28s/it]

Episode 169/1000 | Reward: -2.88 | Win Rate: 0.02 | 🏆 Winner: WeakerPlayerAttacker


Episode:  17%|█▋        | 170/1000 [08:49<34:55,  2.52s/it]

Episode 170/1000 | Reward: -7.56 | Win Rate: 0.02 | 🏆 Winner: WeakerPlayerAttacker


Episode:  17%|█▋        | 171/1000 [08:51<32:17,  2.34s/it]

Episode 171/1000 | Reward: -5.44 | Win Rate: 0.02 | 🏆 Winner: WeakerPlayerAttacker


Episode:  17%|█▋        | 172/1000 [08:53<28:03,  2.03s/it]

Episode 172/1000 | Reward: -8.00 | Win Rate: 0.02 | 🏆 Winner: WeakerPlayerAttacker


Episode:  17%|█▋        | 173/1000 [08:54<25:09,  1.83s/it]

Episode 173/1000 | Reward: -6.76 | Win Rate: 0.02 | 🏆 Winner: WeakerPlayerAttacker


Episode:  17%|█▋        | 174/1000 [08:57<32:22,  2.35s/it]

Episode 174/1000 | Reward: -1.48 | Win Rate: 0.02 | 🏆 Winner: WeakerPlayerAttacker


Episode:  18%|█▊        | 175/1000 [09:01<36:24,  2.65s/it]

Episode 175/1000 | Reward: -0.24 | Win Rate: 0.02 | 🏆 Winner: WeakerPlayerAttacker


Episode:  18%|█▊        | 176/1000 [09:01<28:13,  2.05s/it]

Episode 176/1000 | Reward: -9.68 | Win Rate: 0.02 | 🏆 Winner: WeakerPlayerAttacker


Episode:  18%|█▊        | 177/1000 [09:05<32:07,  2.34s/it]

Episode 177/1000 | Reward: -3.64 | Win Rate: 0.02 | 🏆 Winner: WeakerPlayerAttacker


Episode:  18%|█▊        | 178/1000 [09:06<27:39,  2.02s/it]

Episode 178/1000 | Reward: -8.56 | Win Rate: 0.02 | 🏆 Winner: WeakerPlayerAttacker


Episode:  18%|█▊        | 179/1000 [09:07<25:43,  1.88s/it]

Episode 179/1000 | Reward: -6.36 | Win Rate: 0.02 | 🏆 Winner: WeakerPlayerAttacker


Episode:  18%|█▊        | 180/1000 [09:10<28:52,  2.11s/it]

Episode 180/1000 | Reward: -4.00 | Win Rate: 0.02 | 🏆 Winner: WeakerPlayerAttacker
[MODEL] Salvato in: ./saved_models/dicewars_rl_model_new.keras


Episode:  18%|█▊        | 181/1000 [09:11<25:17,  1.85s/it]

Episode 181/1000 | Reward: -8.36 | Win Rate: 0.02 | 🏆 Winner: WeakerPlayerAttacker


Episode:  18%|█▊        | 182/1000 [09:13<24:49,  1.82s/it]

Episode 182/1000 | Reward: -5.92 | Win Rate: 0.02 | 🏆 Winner: WeakerPlayerAttacker


Episode:  18%|█▊        | 183/1000 [09:16<28:23,  2.08s/it]

Episode 183/1000 | Reward: -1.60 | Win Rate: 0.02 | 🏆 Winner: WeakerPlayerAttacker


Episode:  18%|█▊        | 184/1000 [09:24<55:50,  4.11s/it]

Episode 184/1000 | Reward: 13.40 | Win Rate: 0.02 | 🏆 Winner: WeakerPlayerAttacker


Episode:  18%|█▊        | 185/1000 [09:26<46:51,  3.45s/it]

Episode 185/1000 | Reward: -7.08 | Win Rate: 0.02 | 🏆 Winner: WeakerPlayerAttacker


Episode:  19%|█▊        | 186/1000 [09:29<44:11,  3.26s/it]

Episode 186/1000 | Reward: -6.36 | Win Rate: 0.02 | 🏆 Winner: WeakerPlayerAttacker


Episode:  19%|█▊        | 187/1000 [09:30<34:12,  2.52s/it]

Episode 187/1000 | Reward: -9.64 | Win Rate: 0.02 | 🏆 Winner: WeakerPlayerAttacker


Episode:  19%|█▉        | 188/1000 [09:31<29:12,  2.16s/it]

Episode 188/1000 | Reward: -9.28 | Win Rate: 0.02 | 🏆 Winner: WeakerPlayerAttacker


Episode:  19%|█▉        | 189/1000 [09:37<44:30,  3.29s/it]

Episode 189/1000 | Reward: 8.80 | Win Rate: 0.02 | 🏆 Winner: WeakerPlayerAttacker


Episode:  19%|█▉        | 190/1000 [09:40<40:21,  2.99s/it]

Episode 190/1000 | Reward: -6.12 | Win Rate: 0.02 | 🏆 Winner: WeakerPlayerAttacker


Episode:  19%|█▉        | 191/1000 [09:43<44:01,  3.27s/it]

Episode 191/1000 | Reward: -0.32 | Win Rate: 0.02 | 🏆 Winner: WeakerPlayerAttacker


Episode:  19%|█▉        | 192/1000 [09:45<35:10,  2.61s/it]

Episode 192/1000 | Reward: -7.84 | Win Rate: 0.02 | 🏆 Winner: WeakerPlayerAttacker


Episode:  19%|█▉        | 193/1000 [09:47<35:22,  2.63s/it]

Episode 193/1000 | Reward: -3.56 | Win Rate: 0.02 | 🏆 Winner: WeakerPlayerAttacker


Episode:  19%|█▉        | 194/1000 [09:48<27:11,  2.02s/it]

Episode 194/1000 | Reward: -8.76 | Win Rate: 0.02 | 🏆 Winner: WeakerPlayerAttacker


Episode:  20%|█▉        | 195/1000 [09:51<32:09,  2.40s/it]

Episode 195/1000 | Reward: -1.24 | Win Rate: 0.02 | 🏆 Winner: WeakerPlayerAttacker


Episode:  20%|█▉        | 196/1000 [09:54<33:12,  2.48s/it]

Episode 196/1000 | Reward: -4.32 | Win Rate: 0.02 | 🏆 Winner: WeakerPlayerAttacker


Episode:  20%|█▉        | 197/1000 [09:55<28:51,  2.16s/it]

Episode 197/1000 | Reward: -7.76 | Win Rate: 0.02 | 🏆 Winner: WeakerPlayerAttacker


Episode:  20%|█▉        | 198/1000 [09:57<25:42,  1.92s/it]

Episode 198/1000 | Reward: -6.96 | Win Rate: 0.02 | 🏆 Winner: WeakerPlayerAttacker


Episode:  20%|█▉        | 199/1000 [09:59<27:08,  2.03s/it]

Episode 199/1000 | Reward: -4.76 | Win Rate: 0.02 | 🏆 Winner: WeakerPlayerAttacker
Episode 200/1000 | Reward: -7.12 | Win Rate: 0.02 | 🏆 Winner: WeakerPlayerAttacker


Episode:  20%|██        | 200/1000 [10:26<2:08:14,  9.62s/it]

🎯 Evaluation match win rate (no epsilon): 0.00


Episode:  20%|██        | 201/1000 [10:28<1:36:08,  7.22s/it]

Episode 201/1000 | Reward: -7.40 | Win Rate: 0.02 | 🏆 Winner: WeakerPlayerAttacker


Episode:  20%|██        | 202/1000 [10:31<1:20:32,  6.06s/it]

Episode 202/1000 | Reward: -3.64 | Win Rate: 0.02 | 🏆 Winner: WeakerPlayerAttacker


Episode:  20%|██        | 203/1000 [10:36<1:14:33,  5.61s/it]

Episode 203/1000 | Reward: -0.80 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker


Episode:  20%|██        | 204/1000 [10:40<1:08:04,  5.13s/it]

Episode 204/1000 | Reward: 0.68 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker


Episode:  20%|██        | 205/1000 [10:43<1:01:51,  4.67s/it]

Episode 205/1000 | Reward: -0.36 | Win Rate: 0.00 | 🏆 Winner: WeakerPlayerAttacker
