In [None]:
##log_dir = "logs/dicewars/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
# simulare partite contro altri bot 
# allenare il tuo agente
# salvare i pesi del modello (model.save())

import numpy as np
from tensorflow.summary import create_file_writer
import datetime
from importlib import import_module
from dicewars.match import Match
from dicewars.game import Game
from dicewars.player import RandomPlayer, AgressivePlayer, WeakerPlayerAttacker
from rl_agent import RLDicewarsAgent
import tensorflow as tf
import os
import matplotlib.pyplot as plt
from collections import deque
from tqdm import tqdm
import wandb

run = wandb.init(
    project='Machine-Learning-DICEWARS',
    name='Dice Wars',
)
run_id = run.id 
api = wandb.Api()
run = api.run(f"Machine-Learning-DICEWARS/{run_id}")

if tf.config.list_physical_devices('GPU'):
    print("using cuda ok")
else:
    print('using cpu')

NUM_EPISODES = 1000
SAVE_MODEL_PATH = "./saved_models/dicewars_rl_model.keras"

# Crea directory se non esiste
os.makedirs(os.path.dirname(SAVE_MODEL_PATH), exist_ok=True)

def calculate_step_reward(prev_state, new_state, player_idx):
    """
    Reward intermedio basato su: conquiste, perdite, crescita di dadi
    """
    reward = 0

    # Aree controllate
    prev_areas = len(prev_state.player_areas[player_idx])
    new_areas = len(new_state.player_areas[player_idx])
    reward += (new_areas - prev_areas) * 0.3

    # Dadi totali
    prev_dice = prev_state.player_num_dice[player_idx]
    new_dice = new_state.player_num_dice[player_idx]
    reward += (new_dice - prev_dice) * 0.05

    # Penalità se ha fatto "end turn" senza attaccare
    if prev_areas == new_areas and prev_dice == new_dice:
        reward -= 0.2

    return reward

def calculate_final_reward(winner, player_idx):
    return 1.0 if winner == player_idx else -0.5


# Training loop
agent = RLDicewarsAgent()

## plots
win_history = []
reward_history = []
moving_avg = deque(maxlen=50)  # media mobile su ultimi 50 episodi


## training loop
for episode in tqdm(range(NUM_EPISODES), desc="Episode"):
    players = [agent] + [RandomPlayer() for _ in range(3)]
    game = Game(num_seats=4)
    match = Match(game)
    grid, state = match.game.grid, match.state
    epsilon = max(0.01, 0.1 - episode / NUM_EPISODES)  # decrescente

    history = []

    while match.winner == -1:
        current_player = state.player
        prev_state = state  # <- snapshot prima dell'azione

        if current_player == 0:
            action = agent.select_action(grid, state, epsilon=epsilon) # rimiuovi epsilon se non vuoi usare epsilon greedy
            state_vec = agent.encode_state(grid, state)
        else:
            action = players[current_player].get_attack_areas(grid, state)

        grid, state = match.step(action)

        if current_player == 0:
            reward = calculate_step_reward(prev_state, state, player_idx=0)
            history.append((state_vec, action, reward))


    # Final reward da partita
    final_reward = calculate_final_reward(match.winner, player_idx=0)
    episode_reward = sum([r for _, _, r in history]) + final_reward
    reward_history.append(episode_reward)

    # Registra vincita
    won = 1 if match.winner == 0 else 0
    win_history.append(won)
    moving_avg.append(won)

    # Allena
    for state_vec, action_taken, step_reward in history:
        total_reward = step_reward + final_reward
        agent.train_step(state_vec, action_taken, total_reward)

    print(f"Episode {episode + 1}/{NUM_EPISODES} | Reward: {episode_reward:.2f} | Win Rate: {np.mean(moving_avg):.2f}")
    wandb.log({'Episode': episode , 'Reward': episode_reward, 'Win Rate': np.mean(moving_avg)})
    # Salva modello ogni 50 episodi
    if (episode + 1) % 20 == 0:
        agent.save_model()
        

2025-03-27 14:11:31.112123: I tensorflow/core/util/port.cc:111] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-03-27 14:11:31.150912: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-03-27 14:11:31.150933: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-03-27 14:11:31.150959: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-03-27 14:11:31.158645: I tensorflow/core/platform/cpu_feature_g

using cuda ok
Inizializzazione agente RL


2025-03-27 14:11:41.576529: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1886] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 13246 MB memory:  -> device: 0, name: Tesla T4, pci bus id: 0000:3b:00.0, compute capability: 7.5
2025-03-27 14:11:41.578944: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1886] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 13764 MB memory:  -> device: 1, name: Tesla T4, pci bus id: 0000:d8:00.0, compute capability: 7.5


[MODEL] Caricato da: ./saved_models/dicewars_rl_model.keras


Episode:   0%|          | 0/1000 [00:00<?, ?it/s]

Initializing player from standard library with name: RandomPlayer
Initializing player from standard library with name: RandomPlayer
Initializing player from standard library with name: RandomPlayer


2025-03-27 14:11:44.586625: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x7f320c2bd330 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2025-03-27 14:11:44.586651: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): Tesla T4, Compute Capability 7.5
2025-03-27 14:11:44.586658: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (1): Tesla T4, Compute Capability 7.5
2025-03-27 14:11:44.595115: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2025-03-27 14:11:44.618047: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:442] Loaded cuDNN version 8700
2025-03-27 14:11:44.762935: I ./tensorflow/compiler/jit/device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.
Episode:   0%|          | 1/1000 [00:06<1:54:32,  

Episode 1/1000 | Reward: 2.85 | Win Rate: 0.00
Initializing player from standard library with name: RandomPlayer
Initializing player from standard library with name: RandomPlayer
Initializing player from standard library with name: RandomPlayer


Episode:   0%|          | 2/1000 [00:39<6:10:36, 22.28s/it]

Episode 2/1000 | Reward: 25.85 | Win Rate: 0.00
Initializing player from standard library with name: RandomPlayer
Initializing player from standard library with name: RandomPlayer
Initializing player from standard library with name: RandomPlayer


Episode:   0%|          | 3/1000 [01:15<7:51:18, 28.36s/it]

Episode 3/1000 | Reward: 33.25 | Win Rate: 0.33
Initializing player from standard library with name: RandomPlayer
Initializing player from standard library with name: RandomPlayer
Initializing player from standard library with name: RandomPlayer


Episode:   0%|          | 4/1000 [01:26<5:59:14, 21.64s/it]

Episode 4/1000 | Reward: -5.75 | Win Rate: 0.25
Initializing player from standard library with name: RandomPlayer
Initializing player from standard library with name: RandomPlayer
Initializing player from standard library with name: RandomPlayer


Episode:   0%|          | 5/1000 [01:47<5:50:39, 21.15s/it]

Episode 5/1000 | Reward: 26.60 | Win Rate: 0.40
Initializing player from standard library with name: RandomPlayer
Initializing player from standard library with name: RandomPlayer
Initializing player from standard library with name: RandomPlayer


Episode:   1%|          | 6/1000 [01:51<4:16:09, 15.46s/it]

Episode 6/1000 | Reward: 0.75 | Win Rate: 0.33
Initializing player from standard library with name: RandomPlayer
Initializing player from standard library with name: RandomPlayer
Initializing player from standard library with name: RandomPlayer


Episode:   1%|          | 7/1000 [02:12<4:46:58, 17.34s/it]

Episode 7/1000 | Reward: 17.55 | Win Rate: 0.29
Initializing player from standard library with name: RandomPlayer
Initializing player from standard library with name: RandomPlayer
Initializing player from standard library with name: RandomPlayer


Episode:   1%|          | 8/1000 [02:39<5:35:44, 20.31s/it]

Episode 8/1000 | Reward: 14.10 | Win Rate: 0.25
Initializing player from standard library with name: RandomPlayer
Initializing player from standard library with name: RandomPlayer
Initializing player from standard library with name: RandomPlayer


Episode:   1%|          | 9/1000 [02:54<5:10:06, 18.78s/it]

Episode 9/1000 | Reward: 24.40 | Win Rate: 0.33
Initializing player from standard library with name: RandomPlayer
Initializing player from standard library with name: RandomPlayer
Initializing player from standard library with name: RandomPlayer


Episode:   1%|          | 10/1000 [03:04<4:21:06, 15.82s/it]

Episode 10/1000 | Reward: 3.05 | Win Rate: 0.30
Initializing player from standard library with name: RandomPlayer
Initializing player from standard library with name: RandomPlayer
Initializing player from standard library with name: RandomPlayer


Episode:   1%|          | 11/1000 [03:18<4:14:19, 15.43s/it]

Episode 11/1000 | Reward: 7.70 | Win Rate: 0.27
Initializing player from standard library with name: RandomPlayer
Initializing player from standard library with name: RandomPlayer
Initializing player from standard library with name: RandomPlayer


Episode:   1%|          | 12/1000 [03:36<4:24:38, 16.07s/it]

Episode 12/1000 | Reward: 10.10 | Win Rate: 0.25
Initializing player from standard library with name: RandomPlayer
Initializing player from standard library with name: RandomPlayer
Initializing player from standard library with name: RandomPlayer


Episode:   1%|▏         | 13/1000 [03:42<3:36:20, 13.15s/it]

Episode 13/1000 | Reward: 5.40 | Win Rate: 0.23
Initializing player from standard library with name: RandomPlayer
Initializing player from standard library with name: RandomPlayer
Initializing player from standard library with name: RandomPlayer


Episode:   1%|▏         | 14/1000 [04:19<5:33:34, 20.30s/it]

Episode 14/1000 | Reward: 38.95 | Win Rate: 0.29
Initializing player from standard library with name: RandomPlayer
Initializing player from standard library with name: RandomPlayer
Initializing player from standard library with name: RandomPlayer


Episode:   2%|▏         | 15/1000 [04:24<4:16:22, 15.62s/it]

Episode 15/1000 | Reward: -2.70 | Win Rate: 0.27
Initializing player from standard library with name: RandomPlayer
Initializing player from standard library with name: RandomPlayer
Initializing player from standard library with name: RandomPlayer


Episode:   2%|▏         | 16/1000 [04:32<3:38:53, 13.35s/it]

Episode 16/1000 | Reward: -1.75 | Win Rate: 0.25
Initializing player from standard library with name: RandomPlayer
Initializing player from standard library with name: RandomPlayer
Initializing player from standard library with name: RandomPlayer


Episode:   2%|▏         | 17/1000 [05:04<5:10:33, 18.96s/it]

Episode 17/1000 | Reward: 37.00 | Win Rate: 0.29
Initializing player from standard library with name: RandomPlayer
Initializing player from standard library with name: RandomPlayer
Initializing player from standard library with name: RandomPlayer


Episode:   2%|▏         | 18/1000 [05:06<3:49:18, 14.01s/it]

Episode 18/1000 | Reward: -0.35 | Win Rate: 0.28
Initializing player from standard library with name: RandomPlayer
Initializing player from standard library with name: RandomPlayer
Initializing player from standard library with name: RandomPlayer


Episode:   2%|▏         | 19/1000 [05:08<2:50:14, 10.41s/it]

Episode 19/1000 | Reward: 0.40 | Win Rate: 0.26
Initializing player from standard library with name: RandomPlayer
Initializing player from standard library with name: RandomPlayer
Initializing player from standard library with name: RandomPlayer


Episode:   2%|▏         | 20/1000 [05:13<2:21:21,  8.66s/it]

Episode 20/1000 | Reward: -1.25 | Win Rate: 0.25
[MODEL] Salvato in: ./saved_models/dicewars_rl_model.keras
Initializing player from standard library with name: RandomPlayer
Initializing player from standard library with name: RandomPlayer
Initializing player from standard library with name: RandomPlayer


Episode:   2%|▏         | 21/1000 [05:17<1:59:00,  7.29s/it]

Episode 21/1000 | Reward: 0.40 | Win Rate: 0.24
Initializing player from standard library with name: RandomPlayer
Initializing player from standard library with name: RandomPlayer
Initializing player from standard library with name: RandomPlayer


Episode:   2%|▏         | 22/1000 [05:22<1:45:45,  6.49s/it]

Episode 22/1000 | Reward: 2.30 | Win Rate: 0.23
Initializing player from standard library with name: RandomPlayer
Initializing player from standard library with name: RandomPlayer
Initializing player from standard library with name: RandomPlayer


Episode:   2%|▏         | 23/1000 [05:51<3:35:31, 13.24s/it]

Episode 23/1000 | Reward: 34.40 | Win Rate: 0.26
Initializing player from standard library with name: RandomPlayer
Initializing player from standard library with name: RandomPlayer
Initializing player from standard library with name: RandomPlayer


Episode:   2%|▏         | 24/1000 [06:01<3:23:58, 12.54s/it]

Episode 24/1000 | Reward: -3.95 | Win Rate: 0.25
Initializing player from standard library with name: RandomPlayer
Initializing player from standard library with name: RandomPlayer
Initializing player from standard library with name: RandomPlayer


Episode:   2%|▎         | 25/1000 [06:05<2:39:07,  9.79s/it]

Episode 25/1000 | Reward: -0.35 | Win Rate: 0.24
Initializing player from standard library with name: RandomPlayer
Initializing player from standard library with name: RandomPlayer
Initializing player from standard library with name: RandomPlayer


Episode:   3%|▎         | 26/1000 [06:13<2:30:02,  9.24s/it]

Episode 26/1000 | Reward: 5.45 | Win Rate: 0.23
Initializing player from standard library with name: RandomPlayer
Initializing player from standard library with name: RandomPlayer
Initializing player from standard library with name: RandomPlayer


Episode:   3%|▎         | 27/1000 [06:18<2:12:34,  8.18s/it]

Episode 27/1000 | Reward: 0.80 | Win Rate: 0.22
Initializing player from standard library with name: RandomPlayer
Initializing player from standard library with name: RandomPlayer
Initializing player from standard library with name: RandomPlayer


Episode:   3%|▎         | 28/1000 [06:25<2:06:04,  7.78s/it]

Episode 28/1000 | Reward: 1.70 | Win Rate: 0.21
Initializing player from standard library with name: RandomPlayer
Initializing player from standard library with name: RandomPlayer
Initializing player from standard library with name: RandomPlayer


Episode:   3%|▎         | 29/1000 [06:49<3:24:39, 12.65s/it]

Episode 29/1000 | Reward: 31.80 | Win Rate: 0.24
Initializing player from standard library with name: RandomPlayer
Initializing player from standard library with name: RandomPlayer
Initializing player from standard library with name: RandomPlayer


Episode:   3%|▎         | 30/1000 [06:52<2:35:04,  9.59s/it]

Episode 30/1000 | Reward: -1.55 | Win Rate: 0.23
Initializing player from standard library with name: RandomPlayer
Initializing player from standard library with name: RandomPlayer
Initializing player from standard library with name: RandomPlayer


Episode:   3%|▎         | 31/1000 [06:56<2:08:14,  7.94s/it]

Episode 31/1000 | Reward: -1.45 | Win Rate: 0.23
Initializing player from standard library with name: RandomPlayer
Initializing player from standard library with name: RandomPlayer
Initializing player from standard library with name: RandomPlayer


Episode:   3%|▎         | 32/1000 [07:02<1:59:48,  7.43s/it]

Episode 32/1000 | Reward: 1.80 | Win Rate: 0.22
Initializing player from standard library with name: RandomPlayer
Initializing player from standard library with name: RandomPlayer
Initializing player from standard library with name: RandomPlayer


In [15]:
#%load_ext tensorboard
#!kill 791914
#%tensorboard --logdir logs/dicewars