# Imports

In [66]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import chess
from torchsummary import summary
import numpy as np
import random
from tqdm.notebook import tqdm
import chess.svg
import cairosvg
import chess.engine
import os

# Device

In [67]:
if torch.cuda.is_available():
    device_count = torch.cuda.device_count()
    print(f"✅ {device_count} CUDA device(s) available:")
    for i in range(device_count):
        print(f"  └─ [{i}] {torch.cuda.get_device_name(i)}")
    DEVICE = torch.device("cuda")
else:
    print("⚠️ CUDA not available, using CPU.")
    DEVICE = torch.device("cpu")

✅ 1 CUDA device(s) available:
  └─ [0] NVIDIA GeForce GTX 1650


# Encoding

In [68]:

def board_encoder(board: chess.Board) -> np.ndarray:
    planes = np.zeros((19, 8, 8), dtype=np.float32)
    piece_map = board.piece_map()

    # 12 planes for pieces (6x2)
    for square, piece in piece_map.items():
        row, col = divmod(square, 8)
        plane_idx = (piece.piece_type - 1) + \
            (0 if piece.color == chess.WHITE else 6)
        planes[plane_idx][row][col] = 1

    # Turn to move
    planes[12].fill(float(board.turn))

    # Castling rights (4 planes)
    planes[13].fill(board.has_kingside_castling_rights(chess.WHITE))
    planes[14].fill(board.has_queenside_castling_rights(chess.WHITE))
    planes[15].fill(board.has_kingside_castling_rights(chess.BLACK))
    planes[16].fill(board.has_queenside_castling_rights(chess.BLACK))

    # Halfmove clock (normalized)
    planes[17].fill(board.halfmove_clock / 100.0)

    # En Passant square (new plane!)
    if board.ep_square is not None:
        row, col = divmod(board.ep_square, 8)
        planes[18][row][col] = 1.0

    return planes


def move_to_index(move: chess.Move) -> int:
    from_square = move.from_square
    to_square = move.to_square
    promotion = move.promotion

    dx = chess.square_file(to_square) - chess.square_file(from_square)
    dy = chess.square_rank(to_square) - chess.square_rank(from_square)

    # Handle knight moves separately
    knight_deltas = {(1, 2): 0, (2, 1): 1, (2, -1): 2, (1, -2):3,
                     (-1, -2): 4, (-2, -1): 5, (-2, 1): 6, (-1, 2): 7}
    if (dx, dy) in knight_deltas:
        dir_idx = knight_deltas[(dx, dy)]
        return from_square * 73 + 56 + dir_idx

    # Handle promotions
    if promotion is not None:
        promo_types = [chess.KNIGHT, chess.ROOK, chess.BISHOP]
        directions = [-1, 0, 1]  # left, straight, right
        dx = chess.square_file(to_square) - chess.square_file(from_square)

        if dx in directions and promotion in promo_types:
            dir_idx = directions.index(dx)
            promo_idx = promo_types.index(promotion)
            return from_square * 73 + 64 + (promo_idx * 3) + dir_idx

    # Handle normal directional moves
    directions = {(0, 1): 0, (1, 0): 1, (0, -1): 2, (-1, 0): 3,
                  (1, 1): 4, (1, -1): 5, (-1, 1): 6, (-1, -1): 7}
    curr_dir = [0, 0]
    if dx > 0: curr_dir[0] = 1
    if dx < 0: curr_dir[0] = -1
    if dy > 0: curr_dir[1] = 1
    if dy < 0: curr_dir[1] = -1
    curr_dir = tuple(curr_dir)
    diff = abs(dx) | abs(dy)
    assert diff > 0

    if curr_dir in directions:
        dir_index = directions[curr_dir]
        return from_square * 73 + (diff-1)*8 + dir_index

    raise ValueError(f"Move {move} could not be encoded.")


def index_to_move(index: int) -> chess.Move:
    from_square = index // 73
    sub_index = index % 73

    if sub_index < 56:
        # Sliding move
        directions = [(0, 1), (1, 0), (0, -1), (-1, 0), (1, 1), (1, -1), (-1, 1), (-1, -1)]
        diff, dir = sub_index // 8, sub_index%8
        diff += 1
        dx, dy = ( diff*directions[dir][0], diff*directions[dir][1] )
        fx = chess.square_file(from_square)
        fy = chess.square_rank(from_square)
        tx = fx + dx
        ty = fy + dy
        if 0 <= tx < 8 and 0 <= ty < 8:
            to_square = chess.square(tx, ty)
            move = chess.Move(from_square, to_square)
            return move

    elif sub_index < 64:
        # Knight move
        knight_deltas = [(1, 2), (2, 1), (2, -1), (1, -2),
                         (-1, -2), (-2, -1), (-2, 1), (-1, 2)]
        dx, dy = knight_deltas[sub_index - 56]
        fx = chess.square_file(from_square)
        fy = chess.square_rank(from_square)
        tx = fx + dx
        ty = fy + dy
        if 0 <= tx < 8 and 0 <= ty < 8:
            to_square = chess.square(tx, ty)
            move = chess.Move(from_square, to_square)
            return move

    else:
        # Promotion
        promo_idx = sub_index - 64
        promotion_types = [chess.KNIGHT, chess.ROOK, chess.BISHOP]
        directions = [-1, 0, 1]
        promo_type = promotion_types[promo_idx // 3]
        dx = directions[promo_idx % 3]

        fx = chess.square_file(from_square)
        fy = chess.square_rank(from_square)
        tx = fx + dx
        if fy == 6:
            ty = 7
        elif fy == 1:
            ty = 0
        else:
            raise ValueError("Not a valid promotion square")

        if 0 <= tx < 8 and 0 <= ty < 8:
            to_square = chess.square(tx, ty)
            move = chess.Move(from_square, to_square, promotion=promo_type)
            return move

    raise ValueError(f"Index {index} could not be decoded to a legal move.")


TOTAL_MOVES = 4672
index_move_map, move_index_map = {}, {}
for id in range(TOTAL_MOVES+1):
    try:
        index_move_map[id] = index_to_move(id)
    except:
        continue
for _, move in index_move_map.items():
    id = move_to_index(move)
    move_index_map[move] = id
    curr_uci = move.uci()
    # Queen promotion maps to native
    if curr_uci[-1] =='1' or curr_uci[-1] == '8':
        move_index_map[chess.Move.from_uci(curr_uci+'q')] = id
len(index_move_map), len(move_index_map)

(1924, 2312)

# Model

In [69]:
class ResidualBlock(nn.Module):
    def __init__(self, channels, dropout=0.1):
        super().__init__()
        self.conv1 = nn.Conv2d(channels, channels, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm2d(channels)
        self.dropout = nn.Dropout2d(dropout)
        self.conv2 = nn.Conv2d(channels, channels, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(channels)

    def forward(self, x):
        identity = x
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.dropout(out)
        out = self.bn2(self.conv2(out))
        out += identity
        return F.relu(out)


class ChessNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.input_conv = nn.Conv2d(19, 128, kernel_size=3, padding=1)
        self.bn_input = nn.BatchNorm2d(128)

        # Deepen the residual stack
        self.res_blocks = nn.Sequential(
            *[ResidualBlock(128, dropout=0.1) for _ in range(10)])

        # Policy head
        self.policy_conv = nn.Conv2d(128, 4, kernel_size=1)
        self.policy_bn = nn.BatchNorm2d(4)
        self.policy_fc1 = nn.Linear(4 * 8 * 8, 1024)
        self.policy_fc2 = nn.Linear(1024, 4672)

        # Value head
        self.value_conv = nn.Conv2d(128, 2, kernel_size=1)
        self.value_bn = nn.BatchNorm2d(2)
        self.value_fc1 = nn.Linear(2 * 8 * 8, 128)
        self.value_fc2 = nn.Linear(128, 64)
        self.value_fc3 = nn.Linear(64, 1)

    def forward(self, x):
        x = F.relu(self.bn_input(self.input_conv(x)))
        x = self.res_blocks(x)

        # Policy head
        p = F.relu(self.policy_bn(self.policy_conv(x)))
        p = p.view(p.size(0), -1)
        p = F.relu(self.policy_fc1(p))
        p = self.policy_fc2(p)

        # Value head
        v = F.relu(self.value_bn(self.value_conv(x)))
        v = v.view(v.size(0), -1)
        v = F.relu(self.value_fc1(v))
        v = F.relu(self.value_fc2(v))
        v = torch.tanh(self.value_fc3(v))

        return p, v


# Instantiate and print summary
model = ChessNet()
model.to(DEVICE)

# Load model if checkpoint exists
checkpoint_path = "best_model.pt"
if os.path.exists(checkpoint_path):
    model.load_state_dict(torch.load(checkpoint_path, map_location=DEVICE))
    print(f"✅ Loaded model from {checkpoint_path}")
else:
    print("🚀 No saved model found. Using a fresh model.")

# Print model summary for input size [batch_size=1, 19, 8, 8]
summary(model, input_size=(19, 8, 8))

✅ Loaded model from best_model.pt
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 128, 8, 8]          22,016
       BatchNorm2d-2            [-1, 128, 8, 8]             256
            Conv2d-3            [-1, 128, 8, 8]         147,584
       BatchNorm2d-4            [-1, 128, 8, 8]             256
         Dropout2d-5            [-1, 128, 8, 8]               0
            Conv2d-6            [-1, 128, 8, 8]         147,584
       BatchNorm2d-7            [-1, 128, 8, 8]             256
     ResidualBlock-8            [-1, 128, 8, 8]               0
            Conv2d-9            [-1, 128, 8, 8]         147,584
      BatchNorm2d-10            [-1, 128, 8, 8]             256
        Dropout2d-11            [-1, 128, 8, 8]               0
           Conv2d-12            [-1, 128, 8, 8]         147,584
      BatchNorm2d-13            [-1, 128, 8, 8]             256
    R

# MCTS Search

In [70]:
import numpy as np
import math
import chess


class MCTSNode:
    def __init__(self, board: chess.Board, parent=None, move=None):
        self.board = board
        self.parent = parent
        self.move = move
        self.children = {}
        self.visit_count = 0
        self.total_value = 0.0
        self.prior = 0.0

    def is_expanded(self):
        return len(self.children) > 0

    def value(self):
        if self.visit_count == 0:
            return 0
        return self.total_value / self.visit_count

def softmax_temperature(logits, temperature=1.0):
    logits = np.array(logits, dtype=np.float32)
    logits = logits / temperature
    logits = logits - np.max(logits)
    exp_logits = np.exp(logits)
    return exp_logits / np.sum(exp_logits)

def select_child(node, c_puct=1.0):
    best_score = -float('inf')
    best_move = None
    best_child = None

    for move, child in node.children.items():
        ucb = child.value() + c_puct * child.prior * \
            math.sqrt(node.visit_count) / (1 + child.visit_count)
        if ucb > best_score:
            best_score = ucb
            best_move = move
            best_child = child
    return best_move, best_child

def expand_node(node, policy_logits, legal_moves):
    legal_indices = [move_index_map[move] for move in legal_moves]
    masked_logits = np.full_like(policy_logits, -np.inf)
    for idx in legal_indices:
        masked_logits[idx] = policy_logits[idx]

    probs = softmax_temperature(masked_logits)
    for move in legal_moves:
        index = move_index_map[move]
        prior = probs[index]
        next_board = node.board.copy()
        next_board.push(move)
        child = MCTSNode(next_board, parent=node, move=move)
        child.prior = prior
        node.children[move] = child


def add_dirichlet_noise(node, alpha=0.3, epsilon=0.25):
    legal_moves = list(node.children.keys())
    noise = np.random.dirichlet([alpha] * len(legal_moves))

    for i, move in enumerate(legal_moves):
        child = node.children[move]
        child.prior = (1 - epsilon) * child.prior + epsilon * noise[i]

def backpropagate(node, value):
    while node:
        node.visit_count += 1
        node.total_value += value
        node = node.parent
        value = -value

def evaluate_board_with_mcts_node(board, model):

    if board.is_game_over(claim_draw=True):
        result = board.result(claim_draw=True)
        if result == '1-0':
            value = 1
        elif result == '0-1':
            value = -1
        else:
            value = 0

        # Assign uniform policy for terminal state
        policy_logits = np.zeros(TOTAL_MOVES)
        return policy_logits, value

    # Non-terminal: use neural network
    board_input = board_encoder(board)
    board_input = torch.tensor(
        board_input, dtype=torch.float32).unsqueeze(0).to(DEVICE)

    with torch.no_grad():
        policy_logits, value_tensor = model(board_input)
        policy_logits = policy_logits[0].cpu().numpy()
        value = float(value_tensor[0].item())

    return policy_logits, value

def mcts_search(board, model, num_simulations=800):
    root = MCTSNode(board)
    policy_logits, value = evaluate_board_with_mcts_node(board, model)

    legal_moves = list(board.legal_moves)
    expand_node(root, policy_logits, legal_moves)
    add_dirichlet_noise(root)

    for _ in range(num_simulations):
        node = root
        search_path = [node]

        while node.is_expanded():
            move, node = select_child(node)
            search_path.append(node)

        policy_logits, value = evaluate_board_with_mcts_node(node.board, model)
        legal_moves = list(node.board.legal_moves)
        expand_node(node, policy_logits, legal_moves)

        backpropagate(node, value)

    return root

def choose_action(root, temperature=1.0):
    visit_counts = np.array(
        [child.visit_count for child in root.children.values()])
    moves = list(root.children.keys())

    if temperature == 0:
        best_move = moves[np.argmax(visit_counts)]
        return best_move

    probs = softmax_temperature(visit_counts, temperature)
    return np.random.choice(moves, p=probs)

# Stockfish

In [71]:
STOCKFISH_PATH = "/home/mystic/Programming/mystic-bot/bots/stockfish-ubuntu-x86-64"

def get_stockfish_score(board, depth=15):
    with chess.engine.SimpleEngine.popen_uci(STOCKFISH_PATH) as engine:
        result = engine.analyse(board, chess.engine.Limit(depth=depth))
        score = result["score"].white().score(
            mate_score=10000)
        return score if score is not None else 0

# Self Play

In [72]:
MAX_MOVES = 200  # Prevents extremely long games


def play_self_play_game(model, num_simulations=800, max_moves=200):
    board = chess.Board()
    states = []
    policies = []
    outcomes = []

    i = 0
    while not board.is_game_over(claim_draw=True) and i < max_moves:
        root = mcts_search(board, model, num_simulations)
        move = choose_action(root, temperature=1.0)

        # Record data
        board_tensor = board_encoder(board)
        visit_counts = np.array(
            [child.visit_count for child in root.children.values()])
        visit_sum = np.sum(visit_counts)
        policy_target = np.zeros(TOTAL_MOVES)
        for m, child in root.children.items():
            idx = move_index_map[m]
            policy_target[idx] = child.visit_count / visit_sum

        states.append(board_tensor)
        policies.append(policy_target)
        board.push(move)

        # Save board image
        svg = chess.svg.board(board=board)
        cairosvg.svg2png(bytestring=svg.encode('utf-8'), write_to='temp.jpg')

        i += 1
        print(f"Finished Move: {i}")


    if board.is_game_over(claim_draw=True):
        result = board.result(claim_draw=True)
        if result == '1-0':
            z = 1
        elif result == '0-1':
            z = -1
        else:
            z = 0
    else:
        # Use Stockfish to evaluate final position
        score = get_stockfish_score(board)
        z = np.tanh(score / 400)  # normalize to [-1, 1] range
        print(f"🧮 Stockfish eval: {score} → normalized {z:.2f}")

    # Assign outcomes for each step
    for j in range(len(states)):
        outcomes.append(z if (j % 2 == 0) else -z)

    return states, policies, outcomes


def generate_training_data(model, num_games=10, num_simulations=800, max_moves=200):
    all_states = []
    all_policies = []
    all_values = []

    for _ in tqdm(range(num_games)):
        states, policies, values = play_self_play_game(
            model, num_simulations, max_moves)
        all_states.extend(states)
        all_policies.extend(policies)
        all_values.extend(values)

    return np.array(all_states), np.array(all_policies), np.array(all_values)


def train_model(model, X, P, Z, epochs=10, batch_size=64, save_path="best_model.pt"):
    model.train()
    model.to(DEVICE)
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
    loss_fn_policy = torch.nn.CrossEntropyLoss()
    loss_fn_value = torch.nn.MSELoss()

    # Convert numpy arrays to PyTorch tensors
    X = torch.tensor(X, dtype=torch.float32)
    P = torch.tensor(P, dtype=torch.float32)
    Z = torch.tensor(Z, dtype=torch.float32)

    dataset = torch.utils.data.TensorDataset(X, P, Z)
    loader = torch.utils.data.DataLoader(
        dataset, batch_size=batch_size, shuffle=True)

    best_loss = float('inf')

    for epoch in range(epochs):
        total_loss = 0
        pbar = tqdm(loader, desc=f"Epoch {epoch+1}/{epochs}", leave=False)

        for xb, pb, zb in pbar:
            xb, pb, zb = xb.to(DEVICE), pb.to(DEVICE), zb.to(DEVICE)

            pred_policy, pred_value = model(xb)
            loss_p = loss_fn_policy(pred_policy, torch.argmax(pb, dim=1))
            loss_v = loss_fn_value(pred_value.squeeze(), zb)
            loss = loss_p + loss_v

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            total_loss += loss.item()
            pbar.set_postfix(loss=f"{loss.item():.4f}")

        avg_loss = total_loss / len(loader)
        print(f"✅ Epoch {epoch+1}: Avg Loss = {avg_loss:.4f}")

        # Save model if it's the best so far
        if avg_loss < best_loss:
            best_loss = avg_loss
            torch.save(model.state_dict(), save_path)
            print(
                f"💾 Best model saved at epoch {epoch+1} with loss {best_loss:.4f}")

# Run Pipeline

In [None]:
def run_training_pipeline(
    model,
    num_iterations=10,
    num_games=10,
    num_simulations=100,
    epochs=10,
    batch_size=64,
    max_moves=200,
    save_path="best_model.pt"
):
    print("🎮 Generating training data via self-play...")

    all_states, all_policies, all_values = generate_training_data(
        model, num_games=num_games,
        num_simulations=num_simulations,
        max_moves=max_moves
    )

    print(f"\n📊 Collected {len(all_states)} positions from {num_games} games.")
    print("🧠 Starting training...")

    train_model(
        model,
        all_states,
        all_policies,
        all_values,
        epochs=epochs,
        batch_size=batch_size,
        save_path=save_path
    )

    print("\n✅ Training complete!")


def run_full_training_loop(
    model,
    num_iterations=10,
    games_per_iteration=10,
    num_simulations=100,
    epochs=10,
    batch_size=64,
    max_moves=200,
    save_path="best_model.pt"
):
    for gen in range(1, num_iterations + 1):
        print(f"\n🚀 Starting Generation {gen}/{num_iterations}")

        run_training_pipeline(
            model=model,
            num_games=games_per_iteration,
            num_simulations=num_simulations,
            epochs=epochs,
            batch_size=batch_size,
            max_moves=max_moves,
            save_path=save_path
        )

        # Optionally reload best weights after each gen
        model.load_state_dict(torch.load(save_path))
        model.to(DEVICE)

    print("\n🏁 All generations complete.")


run_full_training_loop(
    model=model,
    num_iterations=10,
    games_per_iteration=5,
    num_simulations=50,
    epochs=10,
    batch_size=64,
    max_moves=100,
    save_path="best_model.pt"
)


🚀 Starting Generation 1/10
🎮 Generating training data via self-play...


  0%|          | 0/5 [00:00<?, ?it/s]

Finished Move: 1
Finished Move: 2
Finished Move: 3
Finished Move: 4
Finished Move: 5
Finished Move: 6
Finished Move: 7
Finished Move: 8
Finished Move: 9
Finished Move: 10
Finished Move: 11
Finished Move: 12
Finished Move: 13
Finished Move: 14
Finished Move: 15
Finished Move: 16
Finished Move: 17
Finished Move: 18
Finished Move: 19
Finished Move: 20
Finished Move: 21
Finished Move: 22
Finished Move: 23
Finished Move: 24
Finished Move: 25
Finished Move: 26
Finished Move: 27
Finished Move: 28
Finished Move: 29
Finished Move: 30
Finished Move: 31
Finished Move: 32
Finished Move: 33
Finished Move: 34
Finished Move: 35
Finished Move: 36
Finished Move: 1
Finished Move: 2
Finished Move: 3
Finished Move: 4
Finished Move: 5
Finished Move: 6
Finished Move: 7
Finished Move: 8
Finished Move: 9
Finished Move: 10
Finished Move: 11
Finished Move: 12
Finished Move: 13
Finished Move: 14
Finished Move: 15
Finished Move: 16
Finished Move: 17
Finished Move: 18
Finished Move: 19
Finished Move: 20
Finished M

Epoch 1/10:   0%|          | 0/3 [00:00<?, ?it/s]

✅ Epoch 1: Avg Loss = 0.2180
💾 Best model saved at epoch 1 with loss 0.2180


Epoch 2/10:   0%|          | 0/3 [00:00<?, ?it/s]

✅ Epoch 2: Avg Loss = 0.1765
💾 Best model saved at epoch 2 with loss 0.1765


Epoch 3/10:   0%|          | 0/3 [00:00<?, ?it/s]

✅ Epoch 3: Avg Loss = 0.1350
💾 Best model saved at epoch 3 with loss 0.1350


Epoch 4/10:   0%|          | 0/3 [00:00<?, ?it/s]

✅ Epoch 4: Avg Loss = 0.0441
💾 Best model saved at epoch 4 with loss 0.0441


Epoch 5/10:   0%|          | 0/3 [00:00<?, ?it/s]

✅ Epoch 5: Avg Loss = 0.0212
💾 Best model saved at epoch 5 with loss 0.0212


Epoch 6/10:   0%|          | 0/3 [00:00<?, ?it/s]

✅ Epoch 6: Avg Loss = 0.0168
💾 Best model saved at epoch 6 with loss 0.0168


Epoch 7/10:   0%|          | 0/3 [00:00<?, ?it/s]

✅ Epoch 7: Avg Loss = 0.0067
💾 Best model saved at epoch 7 with loss 0.0067


Epoch 8/10:   0%|          | 0/3 [00:00<?, ?it/s]

✅ Epoch 8: Avg Loss = 0.0039
💾 Best model saved at epoch 8 with loss 0.0039


Epoch 9/10:   0%|          | 0/3 [00:00<?, ?it/s]

✅ Epoch 9: Avg Loss = 0.0021
💾 Best model saved at epoch 9 with loss 0.0021


Epoch 10/10:   0%|          | 0/3 [00:00<?, ?it/s]

✅ Epoch 10: Avg Loss = 0.0015
💾 Best model saved at epoch 10 with loss 0.0015

✅ Training complete!

🚀 Starting Generation 2/10
🎮 Generating training data via self-play...


  0%|          | 0/5 [00:00<?, ?it/s]

Finished Move: 1
Finished Move: 2
Finished Move: 3
Finished Move: 4
Finished Move: 5
Finished Move: 6
Finished Move: 7
Finished Move: 8
Finished Move: 9
Finished Move: 10
Finished Move: 11
Finished Move: 12
Finished Move: 13
Finished Move: 14
Finished Move: 15
Finished Move: 16
Finished Move: 17
Finished Move: 18
Finished Move: 19
Finished Move: 20
Finished Move: 21
Finished Move: 22
Finished Move: 23
Finished Move: 24
Finished Move: 25
Finished Move: 26
Finished Move: 27
Finished Move: 28
Finished Move: 29
Finished Move: 30
Finished Move: 31
Finished Move: 32
Finished Move: 33
Finished Move: 34
Finished Move: 35
Finished Move: 36
Finished Move: 37
Finished Move: 38
Finished Move: 1
Finished Move: 2
Finished Move: 3
Finished Move: 4
Finished Move: 5
Finished Move: 6
Finished Move: 7
Finished Move: 8
Finished Move: 9
Finished Move: 10
Finished Move: 11
Finished Move: 12
Finished Move: 13
Finished Move: 14
Finished Move: 15
Finished Move: 16
Finished Move: 17
Finished Move: 18
Finished M

Epoch 1/10:   0%|          | 0/3 [00:00<?, ?it/s]

✅ Epoch 1: Avg Loss = 0.3461
💾 Best model saved at epoch 1 with loss 0.3461


Epoch 2/10:   0%|          | 0/3 [00:00<?, ?it/s]

✅ Epoch 2: Avg Loss = 0.0477
💾 Best model saved at epoch 2 with loss 0.0477


Epoch 3/10:   0%|          | 0/3 [00:00<?, ?it/s]

✅ Epoch 3: Avg Loss = 0.0229
💾 Best model saved at epoch 3 with loss 0.0229


Epoch 4/10:   0%|          | 0/3 [00:00<?, ?it/s]

✅ Epoch 4: Avg Loss = 0.0048
💾 Best model saved at epoch 4 with loss 0.0048


Epoch 5/10:   0%|          | 0/3 [00:00<?, ?it/s]

✅ Epoch 5: Avg Loss = 0.0032
💾 Best model saved at epoch 5 with loss 0.0032


Epoch 6/10:   0%|          | 0/3 [00:00<?, ?it/s]

✅ Epoch 6: Avg Loss = 0.0014
💾 Best model saved at epoch 6 with loss 0.0014


Epoch 7/10:   0%|          | 0/3 [00:00<?, ?it/s]

✅ Epoch 7: Avg Loss = 0.0009
💾 Best model saved at epoch 7 with loss 0.0009


Epoch 8/10:   0%|          | 0/3 [00:00<?, ?it/s]

✅ Epoch 8: Avg Loss = 0.0007
💾 Best model saved at epoch 8 with loss 0.0007


Epoch 9/10:   0%|          | 0/3 [00:00<?, ?it/s]

✅ Epoch 9: Avg Loss = 0.0005
💾 Best model saved at epoch 9 with loss 0.0005


Epoch 10/10:   0%|          | 0/3 [00:00<?, ?it/s]

✅ Epoch 10: Avg Loss = 0.0003
💾 Best model saved at epoch 10 with loss 0.0003

✅ Training complete!

🚀 Starting Generation 3/10
🎮 Generating training data via self-play...


  0%|          | 0/5 [00:00<?, ?it/s]

Finished Move: 1
Finished Move: 2
Finished Move: 3
Finished Move: 4
Finished Move: 5
Finished Move: 6
Finished Move: 7
Finished Move: 8
Finished Move: 9
Finished Move: 10
Finished Move: 11
Finished Move: 12
Finished Move: 13
Finished Move: 14
Finished Move: 15
Finished Move: 16
Finished Move: 17
Finished Move: 18
Finished Move: 19
Finished Move: 20
Finished Move: 21
Finished Move: 22
Finished Move: 23
Finished Move: 24
Finished Move: 25
Finished Move: 26
Finished Move: 27
Finished Move: 28
Finished Move: 29


Exception ignored in: <bound method IPythonKernel._clean_thread_parent_frames of <ipykernel.ipkernel.IPythonKernel object at 0x7f8ea67ecf20>>
Traceback (most recent call last):
  File "/home/mystic/Programming/mystic-bot/.env/lib/python3.12/site-packages/ipykernel/ipkernel.py", line 775, in _clean_thread_parent_frames
    def _clean_thread_parent_frames(

KeyboardInterrupt: 


Finished Move: 30
Finished Move: 31
Finished Move: 32
Finished Move: 33
Finished Move: 34
Finished Move: 1
Finished Move: 2
Finished Move: 3
Finished Move: 4
Finished Move: 5
Finished Move: 6
Finished Move: 7
Finished Move: 8
Finished Move: 9
Finished Move: 10
Finished Move: 11
Finished Move: 12
Finished Move: 13
Finished Move: 14
Finished Move: 15
Finished Move: 16
Finished Move: 17
Finished Move: 18
Finished Move: 19
Finished Move: 20
Finished Move: 21
Finished Move: 22
