In [1]:
import pandas as pd
import torch
import torch.optim as optim
import os

from tqdm import tqdm
from torch.utils.data import Dataset, DataLoader, random_split
from torch import nn
from torchsummary import summary

In [2]:
def FEN_to_bit_vector(fen):
    # Converting the Peices to a bit vector
    piece_layer = {
        'P': 0,
        'N': 1,
        'B': 2,
        'R': 3,
        'Q': 4,
        'K': 5,
        'p': 6,
        'n': 7,
        'b': 8,
        'r': 9,
        'q': 10,
        'k': 11
    }
    
    fen = fen.split(' ')
    piece_vector = torch.zeros(12, 8, 8)
    pieces = fen[0]
    rows = pieces.split('/')
    for i, row in enumerate(rows):
        j = 0
        for c in row:
            if c.isdigit():
                j += int(c)
            else:
                piece_vector[piece_layer[c], i, j] = 1
                j += 1
                
    # Converting the castling rights to a bit vector
    castling_vector = torch.zeros(4)
    castling = fen[2]
    for c in castling:
        if c == 'K':
            castling_vector[0] = 1
        if c == 'Q':
            castling_vector[1] = 1
        if c == 'k':
            castling_vector[2] = 1
        if c == 'q':
            castling_vector[3] = 1
            
    # Converting the en passant square to a bit vector
    en_passant_vector = torch.zeros(8)
    en_passant = fen[3]
    if en_passant != '-':
        en_passant = ord(en_passant[0]) - 97
        en_passant_vector[en_passant] = 1
        
    # Getting the current player
    curr_player_vector = torch.zeros(8)
    curr_player = fen[1]
    
    if curr_player == 'w':
        curr_player_vector = torch.ones(8)
        
    # Append all the bit vectors
    bit_vector = torch.cat((piece_vector.view(-1), castling_vector, en_passant_vector, curr_player_vector))
    
    return bit_vector
    

def eval_to_int(eval):
    try:
        res = int(eval)
    except ValueError:
        res = 5000 if eval[1] == '+' else -5000
        
    return torch.tensor(res / 100, dtype=torch.float32)

class ChessDataset(Dataset):
    def __init__(self, csv):
        self.csv = csv
        
    def __len__(self):
        return len(self.csv)
    
    def __getitem__(self, idx):
        x = FEN_to_bit_vector(self.csv.iloc[idx]['FEN'])
        y = eval_to_int(self.csv.iloc[idx]['Evaluation'])
        return x, y

In [3]:
class ChessModel(nn.Module):
    def __init__(self):
        super(ChessModel, self).__init__()
        self.linear1 = nn.Linear(12 * 8 * 8 + 4 + 8 + 8, 1024)
        self.bn1 = nn.BatchNorm1d(1024)
        self.linear2 = nn.Linear(1024, 2048)
        self.bn2 = nn.BatchNorm1d(2048)
        self.linear3 = nn.Linear(2048, 4096)
        self.bn3 = nn.BatchNorm1d(4096)
        self.dropout1 = nn.Dropout(0.2)
        self.linear4 = nn.Linear(4096, 2048)
        self.bn4 = nn.BatchNorm1d(2048)
        self.dropout2 = nn.Dropout(0.2)
        self.linear5 = nn.Linear(2048, 512)
        self.bn5 = nn.BatchNorm1d(512)
        self.dropout3 = nn.Dropout(0.2)
        self.linear6 = nn.Linear(512, 256)
        self.bn6 = nn.BatchNorm1d(256)
        self.dropout4 = nn.Dropout(0.2)
        self.linear7 = nn.Linear(256, 64)
        self.bn7 = nn.BatchNorm1d(64)
        self.dropout5 = nn.Dropout(0.2)
        self.linear8 = nn.Linear(64, 1)
        
    def forward(self, x):
        x = torch.relu(self.bn1(self.linear1(x)))
        x = torch.relu(self.bn2(self.linear2(x)))
        x = torch.relu(self.bn3(self.linear3(x)))
        x = self.dropout1(x)
        x = torch.relu(self.bn4(self.linear4(x)))
        x = self.dropout2(x)
        x = torch.relu(self.bn5(self.linear5(x)))
        x = self.dropout3(x)
        x = torch.relu(self.bn6(self.linear6(x)))
        x = self.dropout4(x)
        x = torch.relu(self.bn7(self.linear7(x)))
        x = self.dropout5(x)
        x = self.linear8(x)
        # Clamp the output to -50 and 50
        x = torch.clamp(x, -50, 50)
        return x

In [4]:
# Load the dataset
df = pd.read_csv("data/tactic_evals.csv")

row = df.iloc[10]

print(row['Evaluation'])

+667


In [5]:
torch.manual_seed(700007)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

dataset = ChessDataset(df)

train_size = int(0.95 * len(dataset))
val_size = len(dataset) - train_size

train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
    
model = ChessModel().to(device)
criterion = nn.SmoothL1Loss().to(device)
optimizer = optim.Adam(model.parameters(), lr=0.001)
        

In [6]:
summary(model, (12 * 8 * 8 + 4 + 8 + 8,))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Linear-1                 [-1, 1024]         807,936
       BatchNorm1d-2                 [-1, 1024]           2,048
            Linear-3                 [-1, 2048]       2,099,200
       BatchNorm1d-4                 [-1, 2048]           4,096
            Linear-5                 [-1, 4096]       8,392,704
       BatchNorm1d-6                 [-1, 4096]           8,192
           Dropout-7                 [-1, 4096]               0
            Linear-8                 [-1, 2048]       8,390,656
       BatchNorm1d-9                 [-1, 2048]           4,096
          Dropout-10                 [-1, 2048]               0
           Linear-11                  [-1, 512]       1,049,088
      BatchNorm1d-12                  [-1, 512]           1,024
          Dropout-13                  [-1, 512]               0
           Linear-14                  [

In [None]:
loss_epoch = 0.0
start_epoch = 15
if start_epoch != 0:
    model.load_state_dict(torch.load(f"model_epoch_{start_epoch}.pth"))
# Train the model
for epoch in range(start_epoch, 25):
    if epoch != start_epoch:
        torch.save(model.state_dict(), f"model_epoch_{epoch}.pth")
    model.train()
    running_loss = 0.0
    p_bar = tqdm(enumerate(train_loader), total=len(train_loader))
    p_bar.set_description(f"Epoch {epoch + 1}")
    exponential_moving_loss = loss_epoch
    for i, data in p_bar:
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels.view(-1, 1))
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        # Take exponential moving average of loss and print to p_bar
        exponential_moving_loss = 0.99 * exponential_moving_loss + 0.01 * loss.item()
        p_bar.set_postfix({'loss': exponential_moving_loss})
    loss_epoch = running_loss / len(train_loader)
    print(f"Epoch {epoch + 1}, loss: {loss_epoch}")

    model.eval()
    running_loss = 0.0
    with torch.no_grad():
        for i, data in enumerate(val_loader, 0):
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels.view(-1, 1))
            running_loss += loss.item()
    print(f"Epoch: {epoch + 1}, Validation loss: {running_loss / len(val_loader)}")

  model.load_state_dict(torch.load(f"model_epoch_{start_epoch}.pth"))
Epoch 16: 100%|██████████| 78026/78026 [54:32<00:00, 23.85it/s, loss=5.26] 


Epoch 16, loss: 5.388063728042393
Epoch: 16, Validation loss: 5.607604257782388


Epoch 17: 100%|██████████| 78026/78026 [55:32<00:00, 23.42it/s, loss=5.44]  


Epoch 17, loss: 5.347583203232866
Epoch: 17, Validation loss: 5.503602239056344


Epoch 18: 100%|██████████| 78026/78026 [42:09<00:00, 30.85it/s, loss=5.22] 


Epoch 18, loss: 5.290565274596426
Epoch: 18, Validation loss: 5.482320652003413


Epoch 19: 100%|██████████| 78026/78026 [35:52<00:00, 36.24it/s, loss=5.57]


Epoch 19, loss: 5.234864099939574
Epoch: 19, Validation loss: 5.392862740937654


Epoch 20:  41%|████      | 32173/78026 [15:42<21:25, 35.68it/s, loss=5.16]  

In [None]:
model.eval()
running_loss = 0.0
with torch.no_grad():
    for i, data in enumerate(tqdm(val_loader), 0):
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        loss = criterion(outputs, labels.view(-1, 1))
        running_loss += loss.item()
print(f"Validation loss: {running_loss / len(val_loader)}")

torch.save(model.state_dict(), "models/model5/model.pth")
    

100%|██████████| 4107/4107 [00:48<00:00, 84.51it/s]


Validation loss: 5.558965100753269


In [10]:
# import chess

model = ChessModel().to(device)
state_dict = torch.load("models\model4\model.pth")
from collections import OrderedDict

# Create new OrderedDict that does not contain `module.`
new_state_dict = OrderedDict()
for k, v in state_dict.items():
    name = k[7:] if k.startswith("module.") else k  # remove `module.`
    new_state_dict[name] = v

model.load_state_dict(new_state_dict)

# Show a handful of predictions
model.eval()
with torch.no_grad():
    for i in range(70, 100):
        x, y = dataset[i]
        x = x.to(device).unsqueeze(0)  # Add an extra dimension
        y = y.to(device)
        y_pred = model(x)
        print(f"Prediction: {y_pred.item()}, Actual: {y.item()}")

  state_dict = torch.load("models\model4\model.pth")
  state_dict = torch.load("models\model4\model.pth")


RuntimeError: mat1 and mat2 shapes cannot be multiplied (1x788 and 781x512)

In [None]:
# Print the occurance of some evaluations
evals = df['Evaluation'][1:1000]
evals = evals.apply(eval_to_int)
evals = evals.tolist()
evals = [int(e) for e in evals]
evals = torch.tensor(evals)
unique, counts = evals.unique(return_counts=True)

for u, c in zip(unique, counts):
    print(f"{u.item()}: {c.item()}")


In [None]:
import matplotlib.pyplot as plt

plt.bar(unique, counts)