## Chess AI
construct $f(p)$ as a 3 layer deep 2048 units wide artificial neural network\
for each move, $f(p) = \max\limits_{p\rightarrow p_0} - f(p_0)$\

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset

### Prepare dataset
1. Players will choose an optimal or near-optimal move. This means that for two position in succession 
$p \rightarrow q$ observed in the game, we will have $f(p) = -f(q)$
2. For the same reason above, going from $p$ not to $q$, but to a random position $r$, we must have $f(r) > f(q)$ because the random position is better for the next player and worse for the player that made the move.

In [None]:
# 自定义数据集
class ChessDataset(Dataset):
    def __init__(self, data):
        self.data = data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        p, q, r = self.data[idx]
        return torch.tensor(p, dtype=torch.float32), \
               torch.tensor(q, dtype=torch.float32), \
               torch.tensor(r, dtype=torch.float32)

In [None]:
class ChessValueNetwork(nn.Module):
    def __init__(self):
        super(ChessValueNetwork, self).__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(12, 32, kernel_size=3, padding=1),  # 输入：8x8x12
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2)  # 输出：4x4x64
        )
        self.fc = nn.Sequential(
            nn.Linear(4 * 4 * 64, 128),
            nn.ReLU(),
            nn.Linear(128, 1),  # 输出标量
            nn.Tanh()  # 限制在 [-1, 1]
        )

    def forward(self, x):
        x = self.conv(x)
        x = x.view(x.size(0), -1)  # 展平成 [Batch, Features]
        x = self.fc(x)
        return x
    
# 目标函数定义
def objective_function(model, p, q, r, kappa=10.0):
    f_p = model(p)
    f_q = model(q)
    f_r = model(r)

    # 第一项：优选原则
    loss_preference = -torch.log(torch.sigmoid(f_q - f_r))

    # 第二和第三项：零和原则
    loss_zero_sum_1 = -kappa * torch.log(f_p + f_q + 1e-8)
    loss_zero_sum_2 = -kappa * torch.log(-f_q - f_p + 1e-8)

    return torch.mean(loss_preference + loss_zero_sum_1 + loss_zero_sum_2)

model = ChessValueNetwork()

In [None]:
EPOCHS = 10
KAPPA = 10.0
optimizer = optim.Adam(model.parameters(), lr=0.001)
for epoch in range(EPOCHS):
    total_loss = 0
    for p, q, r in dataloader:
        optimizer.zero_grad()
        loss = objective_function(model, p, q, r, KAPPA)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    print(f"Epoch {epoch + 1}/{EPOCHS}, Loss: {total_loss:.4f}")