In [2]:
# --- COLAB TRAINER (MLP + MIXED REWARDS) ---
!pip install -q python-chess
import chess
import random
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import os
import time
from google.colab import drive

In [5]:
# --- CONFIG ---
MOUNT_PATH = '/content/drive'
SAVE_DIR = '/content/drive/My Drive/ChessRL_Models'
MODEL_NAME = 'chess_mlp_hybrid.pth'
MODEL_PATH = os.path.join(SAVE_DIR, MODEL_NAME)

TRAIN_BATCH_SIZE = 100  # Huge batch for GPU speed
MAX_MOVES = 100         # Short games to learn openings/middlegames fast
EPSILON_START = 0.9
EPSILON_DECAY = 0.97    # Fast decay: Drops to smart play in ~40 batches
MIN_EPSILON = 0.05
LEARNING_RATE = 0.001

# Setup Device
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Training on: {DEVICE}")

Training on: cuda


In [4]:
# Mount Drive
drive.mount(MOUNT_PATH)
os.makedirs(SAVE_DIR, exist_ok=True)

Mounted at /content/drive


In [7]:
# --- 1. THE EYES (FLAT 768 INPUTS) ---
def board_to_tensor(board):
    piece_map = {
        chess.PAWN: 0, chess.KNIGHT: 1, chess.BISHOP: 2,
        chess.ROOK: 3, chess.QUEEN: 4, chess.KING: 5
    }
    # 768 inputs = 64 squares * 12 piece types
    tensor = torch.zeros(768, dtype=torch.float32, device=DEVICE)

    for i in range(64):
        piece = board.piece_at(i)
        if piece:
            offset = piece_map[piece.piece_type]
            color = 0 if piece.color == chess.WHITE else 6
            idx = (offset + color) * 64 + i
            tensor[idx] = 1.0
    return tensor

# --- 2. THE BRAIN (3-LAYER MLP) ---
class ChessNet(nn.Module):
    def __init__(self):
        super(ChessNet, self).__init__()
        self.fc1 = nn.Linear(768, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, 128)
        self.fc4 = nn.Linear(128, 1) # Output: Board Value

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        return torch.tanh(self.fc4(x))

# --- HELPER: MATERIAL SCORE ---
def get_material_score(board):
    values = {chess.PAWN: 1, chess.KNIGHT: 3, chess.BISHOP: 3, chess.ROOK: 5, chess.QUEEN: 9}
    white = sum(values.get(p.piece_type, 0) for p in board.piece_map().values() if p.color == chess.WHITE)
    black = sum(values.get(p.piece_type, 0) for p in board.piece_map().values() if p.color == chess.BLACK)
    return white - black

# --- HELPER: CHOOSE MOVE (EPSILON GREEDY) ---
def choose_move(board, model, epsilon):
    legal_moves = list(board.legal_moves)
    if not legal_moves: return None

    # Exploration
    if random.random() < epsilon:
        return random.choice(legal_moves)

    # Exploitation (Pick best material move)
    best_move = None
    best_score = -float('inf')
    turn_mult = 1 if board.turn == chess.WHITE else -1

    for move in legal_moves:
        board.push(move)
        tensor = board_to_tensor(board)
        with torch.no_grad():
            score = model(tensor).item()
        board.pop()

        score *= turn_mult
        if score > best_score:
            best_score = score
            best_move = move
    return best_move

In [6]:
# --- SAVE CHECKPOINT ---
def save_checkpoint(model, optimizer, epsilon, games):
    state = {
        'state_dict': model.state_dict(),
        'optimizer': optimizer.state_dict(),
        'epsilon': epsilon,
        'games': games
    }
    torch.save(state, MODEL_PATH)
    print(f"Saved Checkpoint: {MODEL_PATH}")

def load_checkpoint(model, optimizer):
    if os.path.exists(MODEL_PATH):
        print("Found checkpoint! Loading...")
        checkpoint = torch.load(MODEL_PATH, map_location=DEVICE)
        print(checkpoint['epsilon'] , ' epsilon, games = ', checkpoint['games'])
        model.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        return checkpoint['epsilon'], checkpoint['games']
    return EPSILON_START, 0

In [None]:
# --- MAIN TRAINING LOOP ---
brain = ChessNet().to(DEVICE)
optimizer = optim.Adam(brain.parameters(), lr=LEARNING_RATE)
criterion = nn.MSELoss()

epsilon, total_games = load_checkpoint(brain, optimizer)

print("Starting Training...")
start_time = time.time()

while True:
    optimizer.zero_grad()
    batch_loss = 0

    # Play a batch of games
    for _ in range(TRAIN_BATCH_SIZE):
        board = chess.Board()
        game_states = []
        target_scores = []
        moves = 0

        curr_mat = get_material_score(board)

        while not board.is_game_over() and moves < MAX_MOVES:
            game_states.append(board_to_tensor(board))
            prev_mat = curr_mat

            move = choose_move(board, brain, epsilon)
            board.push(move)
            moves += 1

            curr_mat = get_material_score(board)

            # --- REWARD SYSTEM ---
            if board.is_checkmate():
                # Massive reward for winning
                if board.result() == "1-0": score = 2.5
                else: score = -2.5
            else:
                # Standard Material Reward
                score = curr_mat / 40.0
                # Stalling Penalty (force action)
                if curr_mat == prev_mat: score -= 0.002

            target_scores.append(score)

        # Stack data for learning
        t_states = torch.stack(game_states)
        t_targets = torch.tensor(target_scores, dtype=torch.float32, device=DEVICE).view(-1, 1)

        # Calculate Error
        pred = brain(t_states)
        loss = criterion(pred, t_targets)
        loss = loss / TRAIN_BATCH_SIZE # Normalize
        loss.backward()
        batch_loss += loss.item()

        total_games += 1

    # Update Brain
    optimizer.step()

    # Decay Randomness
    epsilon = max(MIN_EPSILON, epsilon * EPSILON_DECAY)

    duration = time.time() - start_time
    print(f"Games: {total_games} | Eps: {epsilon:.4f} | Loss: {batch_loss:.5f} | Time: {duration:.1f}s")

    save_checkpoint(brain, optimizer, epsilon, total_games)
    start_time = time.time() # Reset Timer