## Chess AI
construct $f(p)$ as a 3 layer deep 2048 units wide artificial neural network\
for each move, $f(p) = \max\limits_{p\rightarrow p_0} - f(p_0)$\

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import chess
import chess.pgn
import time
import math

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

LAMBDA = 0.01
EPOCHS = 10
KAPPA = 10.0

### Prepare dataset
1. Players will choose an optimal or near-optimal move. This means that for two position in succession 
$p \rightarrow q$ observed in the game, we will have $f(p) = -f(q)$
2. For the same reason above, going from $p$ not to $q$, but to a random position $r$, we must have $f(r) > f(q)$ because the random position is better for the next player and worse for the player that made the move.

In [None]:
import pandas
chessGames = pandas.read_csv('chessgm.csv')
# copy the "pgn" column to a pgn file
f = open("chessGames.pgn", "w")
for pgn in chessGames['pgn']:
    f.write(pgn + "\n\n")
f.close()

In [2]:
import random

f = open("chessGames.pgn", "r")
piece_to_index = {
    'P': 0,  'N': 1,  'B': 2,  'R': 3,  'Q': 4,  'K': 5,  # White pieces
    'p': 6,  'n': 7,  'b': 8,  'r': 9,  'q': 10, 'k': 11,  # Black pieces
}
chess_data = []
# board to vector(8*8*12) one-hot encoding
def board2vec(board):
    vec = torch.zeros((12, 8, 8), dtype=torch.float32).to(device)  # 调整维度顺序
    for square in chess.SQUARES:
        piece = board.piece_at(square)
        if piece is not None:
            vec[piece_to_index[piece.symbol()]][square // 8][square % 8] = 1
    return vec

while game:=chess.pgn.read_game(f):
    chessBoard = game.board()
    for move in game.mainline_moves():
        legal_moves = list(chessBoard.legal_moves)
        pseudo_move = random.choice(legal_moves)
        chessBoard2 = chessBoard.copy()
        chessBoard2.push(pseudo_move)
        r = board2vec(chessBoard2)
        p = board2vec(chessBoard)
        chessBoard.push(move)
        q = board2vec(chessBoard)
        chess_data.append((p, q, r))

f.close()

In [3]:
# 自定义数据集
class ChessDataset(Dataset):
    def __init__(self, data, device):
        self.data = data
        self.device = device

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        p, q, r = self.data[idx]
        return p, q, r
dataloader = ChessDataset(chess_data, device)

In [4]:
class ChessValueNetwork(nn.Module):
    def __init__(self):
        super(ChessValueNetwork, self).__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(12, 32, kernel_size=3, padding=1), # [12, 8, 8] -> [32, 8, 8]
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=3, padding=1), # [32, 8, 8] -> [64, 8, 8]
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2) # [64, 8, 8] -> [64, 4, 4]
        )
        self.fc = nn.Sequential(
            nn.Linear(1024, 256), # [1, 64*4*4] -> [1, 256]
            nn.ReLU(),
            nn.Linear(256, 128), # [1, 256] -> [1, 128]
            nn.ReLU(),
            nn.Linear(128, 1),  # 输出标量
            nn.Tanh()  # 限制在 [-1, 1]
        )

    def forward(self, x):
        x = self.conv(x)
        # 展平成 [Features]
        x = x.view(-1)
        x = self.fc(x)
        return x
    
# 目标函数定义
import torch
import torch.nn.functional as F

def objective_function(model, p, q, r, kappa=10.0):
    # Forward pass: Compute scores for p, q, and r
    f_p = model(p).squeeze()  # Score for p
    f_q = model(q).squeeze()  # Score for q
    f_r = model(r).squeeze()  # Score for r
    
    # Loss components
    # Loss A: Ensure f(q) > f(r) (optimal move vs random move)
    loss_a = -torch.log(F.sigmoid(f_q - f_r)).mean()

    # Loss B: Ensure f(p) + f(q) close to zero (soft equality constraint)
    loss_b = -torch.log(F.sigmoid(kappa * (f_p + f_q))).mean()

    # Loss C: Ensure -f(p) - f(q) close to zero (soft equality constraint)
    loss_c = -torch.log(F.sigmoid(-kappa * (f_p + f_q))).mean()

    # Total loss: Combine all components
    total_loss = loss_a + loss_b + loss_c

    return total_loss

def lr_lambda(current_epoch):
    current_time = time.time()  # 当前时间戳
    elapsed_time = current_time - t0
    return math.exp(-elapsed_time / 86400)

model = ChessValueNetwork().to(device)
optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=LAMBDA)
t0 =time.time()
scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda)


In [None]:
for epoch in range(EPOCHS):
    total_loss = 0
    for p, q, r in dataloader:
        optimizer.zero_grad()
        loss = objective_function(model, p, q, r, KAPPA).to(device)
        loss.backward()
        print(loss)
        optimizer.step()
        scheduler.step()   
        total_loss += loss.item()
    print(f"Epoch {epoch + 1}/{EPOCHS}, Loss: {total_loss:.4f}, LR: {scheduler.get_last_lr()[0]:.6f}")