## Chess AI
construct $f(p)$ as a 3 layer deep 2048 units wide artificial neural network\
for each move, $f(p) = \max\limits_{p\rightarrow p_0} - f(p_0)$\

In [1]:
import os
import chess
import chess.pgn
import time
import math
from tqdm import tqdm
import random
import logging
import pickle
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.multiprocessing as mp
from torch.utils.data import DataLoader, Dataset
mp.set_start_method("spawn")
logger = logging.getLogger(__name__)
logging.basicConfig(filename='myapp.log', level=logging.INFO)
console_handler = logging.StreamHandler()
console_handler.setLevel(logging.DEBUG) 
formatter = logging.Formatter(
    "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
)
console_handler.setFormatter(formatter)
logger.addHandler(console_handler)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
LAMBDA = 10
EPOCHS = 10
KAPPA = 1

### Prepare dataset
1. Players will choose an optimal or near-optimal move. This means that for two position in succession 
$p \rightarrow q$ observed in the game, we will have $f(p) = -f(q)$
2. For the same reason above, going from $p$ not to $q$, but to a random position $r$, we must have $f(r) > f(q)$ because the random position is better for the next player and worse for the player that made the move.

In [3]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("dimitrioskourtikakis/gm-games-chesscom")

print("Path to dataset files:", path)

Path to dataset files: /root/.cache/kagglehub/datasets/dimitrioskourtikakis/gm-games-chesscom/versions/1


In [5]:
import pandas
chessGames = pandas.read_csv(path+"/GM_games_dataset.csv", chunksize=1000)
# copy the "pgn" column to a pgn file
for i, chunk in enumerate(tqdm(chessGames)):
    with open(f"/root/autodl-tmp/data/chessGame{i}.pgn", "w") as f:
        for pgn in chunk['pgn']:
            f.write(pgn + "\n\n")

4812it [02:27, 32.52it/s]


In [2]:
from chessModel import ChessDataset, ChessValueNetwork, objective_function, piece_to_index

def lr_lambda(current_epoch):
    current_time = time.time()  # 当前时间戳
    elapsed_time = current_time - t0
    return math.exp(-elapsed_time / 86400)
    
def board2vec(board, flip=False):
        vec = np.zeros((12, 8, 8), dtype=np.float32)
        for square in chess.SQUARES:
            piece = board.piece_at(square)
            if piece is not None:
                piece_index = piece_to_index[piece.symbol()]
                row, col = divmod(square, 8)
                if flip:
                    # 翻转行
                    row = 7 - row
                    # 翻转棋子颜色
                    if piece_index < 6:  
                        piece_index += 6 
                    else: 
                        piece_index -= 6  
                vec[piece_index, row, col] = 1
        vec = torch.tensor(vec, dtype=torch.float32)
        return vec
    
model = ChessValueNetwork().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.02, momentum=0.9, nesterov=True)
t0 = time.time()
scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda)

In [3]:
model.load_state_dict(torch.load("1.79.pth"))

<All keys matched successfully>

In [None]:

# 训练代码
def train_model(model, optimizer, scheduler, objective_function, pgn_files, test_pgn, device, EPOCHS, KAPPA, logger):
    logger.info("creating dataset for test data")
    test_dataset = ChessDataset(test_pgn, device=device)
    test_dataloader = DataLoader(test_dataset, batch_size=64)
    best_loss = float("inf")
    for epoch in range(EPOCHS):
        total_loss = 0
        for pgn_file in pgn_files:
            try:
                model.train()
                # 为每个PGN文件创建数据集和数据加载器
                logger.info("creating dataset for " + pgn_file)
                chess_dataset = ChessDataset(pgn_file, device=device)
                dataloader = DataLoader(chess_dataset, batch_size=64)
                
                for batch_idx, (p, q, r) in enumerate(dataloader):
                    p, q, r = p.to(device), q.to(device), r.to(device)
                    optimizer.zero_grad()
                    loss = objective_function(model, p, q, r, KAPPA)
                    loss.backward()
                    optimizer.step()
                    total_loss += loss.item()
                    if batch_idx % 1000 == 0:
                        logger.info(f"Epoch [{epoch+1}/{EPOCHS}], PGN File [{pgn_file}], Batch [{batch_idx}], Loss: {loss.item():.4f}, lr: {scheduler.get_last_lr()[0]}")
                model.eval()
                test_loss = 0
                for batch_idx, (p, q, r) in enumerate(test_dataloader):
                    p, q, r = p.to(device), q.to(device), r.to(device)
                    loss = objective_function(model, p, q, r, KAPPA)
                    test_loss += loss.item()
                logger.info("test loss:" + str(test_loss/len(test_dataloader)))
                if test_loss < best_loss:
                    torch.save(model.state_dict(), str(round(test_loss/len(test_dataloader), 2))+".pth")
                    logger.info("New best loss, saving model to" + str(round(test_loss/len(test_dataloader), 2))+".pth")
                    best_loss = test_loss
                scheduler.step()
                # checkmate 1-0
                board = chess.Board("rnbqkbnr/pppppppp/8/8/4P3/8/PPPP1PPP/RNBQKBNR b KQkq e3 0 1")
                vec = board2vec(board).to(device)
                vec = vec.unsqueeze(0)
                logger.info("1-0")
                logger.info(model(vec))
                # black 5.0
                board = chess.Board("rnb1q3/pppp1kpp/8/6b1/8/8/PPPP1PPP/RNB1K2R w KQ - 0 10")
                vec = board2vec(board).to(device)
                vec = vec.unsqueeze(0)
                logger.info("0-1")
                logger.info(model(vec))
                
            except KeyboardInterrupt:
                raise
            except Exception as e:
                logger.error(f"An error occurred: {e}")
                continue
        logger.info(f"Epoch [{epoch+1}/{EPOCHS}], Total Loss: {total_loss:.4f}")

if __name__ == "__main__":
    pgn_files = []
    for root, dirs, files in os.walk('/root/autodl-tmp/data'):
        for file in files:
            pgn_files.append("/root/autodl-tmp/data/" + file)
    random.shuffle(pgn_files)
    test_pgn = '/root/autodl-tmp/data/chessGame4811.pgn'
    pgn_files.pop(pgn_files.index(test_pgn))
    train_model(model, optimizer, scheduler, objective_function, pgn_files, test_pgn, device, EPOCHS, KAPPA, logger)


2025-01-25 21:46:49,006 - __main__ - INFO - creating dataset for test data
2025-01-25 21:46:51,037 - __main__ - INFO - creating dataset for /root/autodl-tmp/data/chessGame313.pgn
2025-01-25 21:47:17,183 - __main__ - INFO - Epoch [1/10], PGN File [/root/autodl-tmp/data/chessGame313.pgn], Batch [0], Loss: 1.7411, lr: 0.019999999983443154
2025-01-25 21:47:25,470 - __main__ - INFO - Epoch [1/10], PGN File [/root/autodl-tmp/data/chessGame313.pgn], Batch [1000], Loss: 1.6778, lr: 0.019999999983443154
2025-01-25 21:47:28,104 - __main__ - INFO - test loss:1.8418260193788087
2025-01-25 21:47:28,155 - __main__ - INFO - New best loss, saving model to1.84.pth
2025-01-25 21:47:28,157 - __main__ - INFO - 1-0
2025-01-25 21:47:28,167 - __main__ - INFO - tensor([[1.9448]], device='cuda:0', grad_fn=<AddmmBackward0>)
2025-01-25 21:47:28,334 - __main__ - INFO - 0-1
2025-01-25 21:47:28,335 - __main__ - INFO - tensor([[-0.2545]], device='cuda:0', grad_fn=<AddmmBackward0>)
2025-01-25 21:47:28,337 - __main__ 

In [4]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("ronakbadhe/chess-evaluations")

print("Path to dataset files:", path)

Path to dataset files: /root/.cache/kagglehub/datasets/ronakbadhe/chess-evaluations/versions/5


In [4]:
from chessModel import EvalDataset, reinforcement_learning_loss
def custom_collate_fn(batch):
    p_batch = torch.stack([item[0] for item in batch]) 
    eval_batch = torch.stack([item[1] for item in batch])
    return p_batch, eval_batch

eval_dataset = EvalDataset('/root/autodl-tmp/chessData.csv', device)
eval_dataloader = DataLoader(eval_dataset, batch_size=64, collate_fn=custom_collate_fn)

In [5]:
def train_with_reinforcement_learning(model, dataset, optimizer, epochs=10, batch_size=64):
    model.train()
    for epoch in range(epochs):
        total_loss = 0
        # 每轮训练
        for i, batch in enumerate(dataset):
            if len(batch) % 2:
                batch = batch[:-1]
            # 分割批次为两部分
            half = len(batch) // 2
            p_batch, eval_batch = batch
            p1_batch, p2_batch = p_batch[:half], p_batch[half:]
            eval1_batch, eval2_batch = eval_batch[:half], eval_batch[half:]
            p1_batch = p1_batch.to(device)
            eval1_batch = eval1_batch.to(device)
            p2_batch = p2_batch.to(device)
            eval2_batch = eval2_batch.to(device)
            # 计算强化学习损失
            loss = reinforcement_learning_loss(model, p1_batch, eval1_batch, p2_batch, eval2_batch)
            # 反向传播与优化
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
            if i % 1000 == 0:
                with torch.no_grad():
                    model.eval()
                    # checkmate 1-0
                    board = chess.Board("rnbqkbnr/pppppppp/8/8/4P3/8/PPPP1PPP/RNBQKBNR b KQkq e3 0 1")
                    vec = board2vec(board).to(device)
                    vec = vec.unsqueeze(0)
                    logger.info("1-0")
                    logger.info(model(vec))
                    # black 5.0
                    board = chess.Board("rnb1q3/pppp1kpp/8/6b1/8/8/PPPP1PPP/RNB1K2R w KQ - 0 10")
                    vec = board2vec(board).to(device)
                    vec = vec.unsqueeze(0)
                    logger.info("0-1")
                    logger.info(model(vec))
                    logger.info(f"Step {i}, Loss: {loss.item():.4f}")
                    model.train()
                
        avg_loss = total_loss / steps_per_epoch
        logger.info(f"Epoch {epoch + 1}/{epochs}, Avg Loss: {avg_loss:.4f}")
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
train_with_reinforcement_learning(model, eval_dataloader, optimizer, 10)

2025-01-25 21:02:00,064 - __main__ - INFO - 1-0
2025-01-25 21:02:00,066 - __main__ - INFO - tensor([[1.3108]], device='cuda:0')
2025-01-25 21:02:00,218 - __main__ - INFO - 0-1
2025-01-25 21:02:00,219 - __main__ - INFO - tensor([[-0.3861]], device='cuda:0')
2025-01-25 21:02:00,221 - __main__ - INFO - Step 0, Loss: 0.6341
2025-01-25 21:02:24,388 - __main__ - INFO - 1-0
2025-01-25 21:02:24,390 - __main__ - INFO - tensor([[0.6867]], device='cuda:0')
2025-01-25 21:02:24,393 - __main__ - INFO - 0-1
2025-01-25 21:02:24,394 - __main__ - INFO - tensor([[-0.0997]], device='cuda:0')
2025-01-25 21:02:24,396 - __main__ - INFO - Step 1000, Loss: 0.5280
2025-01-25 21:02:48,642 - __main__ - INFO - 1-0
2025-01-25 21:02:48,644 - __main__ - INFO - tensor([[0.0794]], device='cuda:0')
2025-01-25 21:02:48,647 - __main__ - INFO - 0-1
2025-01-25 21:02:48,648 - __main__ - INFO - tensor([[-0.0147]], device='cuda:0')
2025-01-25 21:02:48,650 - __main__ - INFO - Step 2000, Loss: 0.3225
2025-01-25 21:03:12,400 - __

KeyboardInterrupt: 

In [None]:
torch.save(model.state_dict(), "1.pth")