In [1]:
import torch
import time
import gc
import os
import io
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from math import ceil
from tqdm.notebook import tqdm
from copy import deepcopy 
from tabulate import tabulate
from torch.utils.data import Dataset, DataLoader, random_split
from sklearn.metrics import ConfusionMatrixDisplay
from collections import namedtuple
import plotly.express as px
import chess
import chess.pgn

In [5]:
### Basic structure

DATA_DIR = "data/"
STOCKFISH_DIR = 'stockfish/'
ARCHIVE_DIR = DATA_DIR + 'archives/'

###
##
###

### Stockfish

STOCKFISH_AVX512_TAR = "stockfish-ubuntu-x86-64-avx512.tar"
STOCKFISH_AVX512 = "stockfish-ubuntu-x86-64-avx512"
STOCKFISH_AVX512_EXE = STOCKFISH_DIR + STOCKFISH_AVX512


###
##
###

### URLs

ELITE_DATABASE_URL  = "https://database.nikonoel.fr/lichess_elite_2021-11.zip"
STOCKFISH_DOWNSTREAM = "https://github.com/official-stockfish/Stockfish/releases/latest/download/"
STOCKFISH_AVX512_URL = STOCKFISH_DOWNSTREAM + STOCKFISH_AVX512_TAR

### 
##
###

### Datasets 

ELITE_DATASET_ARCHIVE = "lichess_elite_2021-11.zip"
ELITE_DATASET_FILENAME = "lichess_elite_2021-11.pgn"


LICHESS_EVAL_ARCHIVE = ARCHIVE_DIR + "lichess_db_eval.jsonl.zst"
LICHESS_EVAL_FILENAME = DATA_DIR + "lichess_db_eval.jsonl"


In [6]:
BITBOARD_DIR = DATA_DIR + 'bitboards/'
ELITE_DATA_BASE_URL  = "https://database.nikonoel.fr/"
STOCKFISH_DOWNSTREAM = "https://github.com/official-stockfish/Stockfish/releases/latest/download/"

SAMPLE_ZIP = "lichess_elite_2021-11.zip"
SAMPLE_PGN = "lichess_elite_2021-11.pgn"
SAMPLE_BITBOARD = "elite_bitboard.csv"
BITBOARD_1M = "1M.csv"
BITBOARD_10M = "10M.csv"
ELITE_DATA_SAMPLE_URL = ELITE_DATA_BASE_URL + SAMPLE_ZIP
SAMPLE_ZIP_FILE = ARCHIVE_DIR + SAMPLE_ZIP
SAMPLE_PGN_FILE  = DATA_DIR + SAMPLE_PGN

SAMPLE_BITBOARD_FILE = BITBOARD_DIR + SAMPLE_BITBOARD
BITBOARD_10M_FILE = BITBOARD_DIR + BITBOARD_10M
BITBOARD_1M_FILE = BITBOARD_DIR + BITBOARD_1M

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
device

device(type='cpu')

# Dataset and models

In [7]:
# def sizeof_fmt(num, suffix="B"):
#     for unit in ("", "Ki", "Mi", "Gi"):
#         if abs(num) < 1024.0:
#             return f"{num:3.1f}{unit}{suffix}"
#         num /= 1024.0
#     return f"{num:.1f}Yi{suffix}"
    
def sizeof_fmt(num):
    for unit in ("", "K", "M", "G"):
        if abs(num) < 1000.0:
            return f"{num:.0f}{unit}"
        num /= 1000.0
    return f"{num:.0f}"


print(f"Allocated: {sizeof_fmt(torch.cuda.memory_allocated())}") 
print(f"Reserved: {sizeof_fmt(torch.cuda.memory_reserved())}") 

Allocated: 0
Reserved: 0


In [8]:
class BitboardDrawDatasetSimple(Dataset):
    def __init__(self, bitboard_file):
        bitboards_df = pd.read_csv(bitboard_file, dtype="uint64", usecols=range(12))
        metadata_df = pd.read_csv(bitboard_file)

        self.bitboards = self.bitboards_to_layers(bitboards_df)
        # self.meta_features = self.binary_features_to_layers(metadata_df[["white", "cK", "cQ", "ck", "cq"]])
        self.is_draw = metadata_df['draw'].to_numpy(dtype=np.single)
        self.length = self.is_draw.size

        # self.bitboards = np.hstack((self.bitboards, self.meta_features))

    def __len__(self):
        return self.length

    def __getitem__(self, idx):
        return self.bitboards[idx], self.is_draw[idx]

    def bitboards_to_layers(self, bitboards):
        cont = np.ascontiguousarray(np.expand_dims(bitboards.to_numpy(), 2)).view(np.uint8)
        return np.unpackbits(np.flip(cont, axis=2), axis=2).astype(np.single).reshape(-1, 768)

    def binary_features_to_layers(self, features):
        i = (features.to_numpy(dtype=np.uint64) - 1) ^ 0xffffffffffffffff
        cont = np.ascontiguousarray(np.expand_dims(i, 2)).view(np.uint8)
        return np.unpackbits(np.flip(cont, axis=2), axis=2).astype(np.single).reshape(-1, 768)

In [9]:
class BitboardDrawDataset(Dataset):

    @classmethod
    def from_dataset_info(cls, dataset_info):
        return cls(dataset_info.source_file, dataset_info.chunk_size, dataset_info.shuffle, dataset_info.in_memory)
    
    def __init__(self, bitboard_file, chunk_size, shuffle = True, in_memory = False):
        self.curr_batch = 0
        self.shuffle = shuffle
        self.bitboard_file = bitboard_file
        self.chunk_size = chunk_size
        self.datasamples = self.calculate_dataset_size()
        
        self.batches = ceil(self.datasamples / chunk_size)
        self.splits = np.arange(chunk_size, self.datasamples, chunk_size)
        self.indices = np.arange(1, self.datasamples+1)
        self.splits = np.append(self.splits, [0])

        self.in_memory = in_memory or self.chunk_size == self.datasamples
        
        self.load_data()
    
    def __len__(self):
        return self.datasamples
        
    def __getitem__(self, idx):
        batch = idx // self.chunk_size
        # Laod new data 
        if self.curr_batch != batch and not self.in_memory:
            self.curr_batch = batch
            self.load_data()            

        idx -= self.chunk_size * self.curr_batch
        
        return self.bitboards[idx], self.is_draw[idx]

    def bitboards_to_layers(self, bitboards):
        cont = np.ascontiguousarray(np.expand_dims(bitboards.to_numpy(), 2)).view(np.uint8)
        return np.unpackbits(np.flip(cont, axis=2), axis=2).astype(np.single).reshape(-1, 768)

    def binary_features_to_layers(self, features):
        i = (features.to_numpy(dtype=np.uint64) - 1) ^ 0xffffffffffffffff
        cont = np.ascontiguousarray(np.expand_dims(i, 2)).view(np.uint8)
        return np.unpackbits(np.flip(cont, axis=2), axis=2).astype(np.single).reshape(-1, 768)
        
    def load_data(self):
        if self.curr_batch == 0 and self.shuffle:
            np.random.shuffle(self.indices)
            
        ignore = set(self.indices)

        if self.curr_batch == self.batches-1:
            ignore.difference_update(self.indices[self.splits[self.curr_batch-1]:])
        else:
            ignore.difference_update(self.indices[self.splits[self.curr_batch-1]:self.splits[self.curr_batch]])

        df = pd.read_csv(self.bitboard_file, dtype="uint64", usecols=range(18), skiprows=ignore)
        self.bitboards = self.bitboards_to_layers(df.iloc[:, range(12)])
        # self.meta_features = self.binary_features_to_layers(df[["white", "cK", "cQ", "ck", "cq"]])
        self.is_draw = df['draw'].to_numpy(dtype=np.single)

        # self.bitboards = np.hstack((self.bitboards, self.meta_features))
    
    def calculate_dataset_size(self):
        with open(self.bitboard_file) as f:
            return sum(1 for line in f) - 1

    def dataloader(self, batch_size):
        return DataLoader(self, batch_size=batch_size, shuffle=self.in_memory)

DatasetInfo = namedtuple("DatasetInfo", ["source_file", "chunk_size", "shuffle", "in_memory"])

In [10]:
class ConvolutionNetwork(nn.Module):

    def __init__(self, neurons=256):
        super().__init__()
        self.neurons = neurons
        self.model = nn.Sequential(
            nn.Unflatten(1, (13, 8, 8)),
            nn.Conv2d(13, 32, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Flatten(1),
            
            nn.Linear(64 * 2 * 2, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            
            nn.Linear(128, 64),
            nn.BatchNorm1d(64),
            nn.ReLU(),

            nn.Linear(64, 32),
            nn.BatchNorm1d(32),
            nn.ReLU(),

            nn.Linear(32, 1)
        )

    def forward(self, x):
        return self.model.forward(x)

In [11]:
class DenseNetwork(nn.Module):

    def __init__(self, layers, sizes, normalization=False):
        super().__init__()
        self.sizes = sizes
        self.layers = layers
        ll = []

        assert self.layers == len(sizes) - 2, "Wrong layers to sizes number."
        
        for i in range(layers+1):
            ll.append(nn.Linear(self.sizes[i], self.sizes[i+1]))   
            if i < layers:
                if layers > 1 and normalization:
                    ll.append(nn.BatchNorm1d(self.sizes[i+1]))
                ll.append(nn.ReLU())
                
        self.model = nn.Sequential(*ll)

    def forward(self, x):
        return self.model.forward(x)

    def __str__(self):
        return f"Dense_{'_'.join(map(str, self.sizes))}"

    def get_hidden_layer_count(self):
        return self.layers

# Training

In [12]:
def timeit(f):

    def timed(*args, **kw):

        ts = time.time()
        result = f(*args, **kw)
        te = time.time()

        print(f"Took: {te-ts:.2f}s")
        return result
    return timed

class Train:
    
    def __init__(self, train_dataset_info, validate_dataset_info, batch_size):
        self.batch_size = batch_size
        self.train_dataset = BitboardDrawDataset.from_dataset_info(train_dataset_info)
        self.validate_dataset = BitboardDrawDataset.from_dataset_info(validate_dataset_info)
        self.train_dataloader = self.train_dataset.dataloader(batch_size)
        self.validate_dataloader = self.validate_dataset.dataloader(batch_size)

        self.loss_fn = nn.BCEWithLogitsLoss()
        self.total_batches = len(self.train_dataloader)
        self.print_every = 100
        self.epoch_print_interval = 1
        
    def train_one_epoch(self, model, optimizer, p=False) -> int:
        for i, data in enumerate(self.train_dataloader):
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.unsqueeze(1).to(device)
    
            optimizer.zero_grad()

            outputs = model(inputs)
            
            loss = self.loss_fn(outputs, labels)
            loss.backward()
            optimizer.step()
    
    @timeit
    def train(self, model, optimizer, epochs, p=True):
        try:
            best_vloss = np.inf
            best_model = deepcopy(model.state_dict())
            best_epoch = 0
            best_acc = 0
        
            for epoch in range(1, epochs + 1):
                if p and epoch % self.epoch_print_interval == 0 or epoch == 1: print(f'EPOCH {epoch}')
            
                # Make sure gradient tracking is on, and do a pass over the data
                model.train(True)
                self.train_one_epoch(model, optimizer)
            
                model.eval()
    
                with torch.no_grad():
                    train_acc, train_loss, train_prec, train_recall = self.test(model, self.train_dataloader)
                    # train_acc, train_loss, train_prec, train_recall = 0, 0, 0, 0
                    validate_acc, validate_loss, validate_prec, validate_recall = self.test(model, self.validate_dataloader)
    
                    if p and epoch % self.epoch_print_interval == 0 or epoch == 1: 
                        print(tabulate([["Loss", train_loss, validate_loss], 
                                        ["Precision", train_prec, validate_prec],
                                        ["Recall", train_recall, validate_recall],
                                        ["Accuracy", f"{train_acc:.2f}%", f"{validate_acc:.2f}%"]],
                                       headers=["", "Train", "Test"]))
            
                if validate_loss < best_vloss:
                    best_vloss = validate_loss
                    best_model = deepcopy(model.state_dict())
                    best_epoch = epoch
                    best_acc = validate_acc
                    
        except KeyboardInterrupt:
            self.save_model(model, best_model, best_epoch, lr, momentum, acc)

        return best_model, best_epoch, best_acc

    def test(self, model, dataloader):
        acc, loss = 0, 0
        conf_mat = torch.zeros(2, 2) 
        for i, (vinputs, vlabels) in enumerate(dataloader):
            vinputs, vlabels = vinputs.to(device), vlabels.unsqueeze(1).to(device)
            voutputs = model(vinputs)
            pred = nn.functional.sigmoid(voutputs).round()
            
            acc += (pred == vlabels).sum() / self.batch_size
            loss += self.loss_fn(voutputs, vlabels) 
            # conf_mat += confusion_matrix(vlabels.to('cpu'), pred.to('cpu'))

        acc = acc / (i+1) * 100
        loss /= (i+1)

        prec = conf_mat[1, 1] / (conf_mat[1, 1] + conf_mat[0, 1])
        recall = conf_mat[1, 1] / (conf_mat[1, 1] + conf_mat[1, 0])
        
        return acc, loss, prec, recall
    
    
                
    def save_model(self, model, state_dict, epoch, lr, momentum, acc):
        base_path = f"models/{sizeof_fmt(len(self.train_dataset))}/{str(model.get_hidden_layer_count())}l/"
        if(not os.path.isdir(base_path)):
            !mkdir -p {base_path}
        
        model_path = base_path + f"{str(model)}_b{self.batch_size}_e{epoch}_lr{lr}_m{momentum}_acc{acc:.2f}"
        torch.save(state_dict, model_path)
        
    
    def find_best(self, model, epochs, lr=1e-3, momentum=0.9):
        print(str(model))
        model.to(device)
        optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum)
        # optimizer = optim.Adam(model.parameters())
        best_model_state_dict, epoch, acc = self.train(model, optimizer, epochs)
        self.save_model(model, best_model_state_dict, epoch, lr, momentum, acc)
        model.to('cpu')
        self.cleanup()
        print("")
            
    def cleanup(self):  
        gc.collect()
        torch.cuda.empty_cache()
        

In [23]:
eval_dataset_info = DatasetInfo("data/eval_dataset/bitboards/1000000_0_19_24.csv", 1000000, False, True)
validate_dataset_info = DatasetInfo("data/eval_dataset/bitboards/100000_1509579_19_24.csv", 1000000, False, True)

In [24]:
pipe = Train(eval_dataset_info, validate_dataset_info, 512)

In [13]:
model = DenseNetwork(5, [768, 768, 512, 256, 128, 64, 1])
# model = DenseNetwork(3, [768, 768, 256, 128, 1])
# model = DenseNetwork(2, [768, 256, 128, 1])
# model = DenseNetwork(1, [768, 768, 1])
# model = DenseNetwork(0, [768, 1])
model.load_state_dict(torch.load("models/3K/5l/Dense_768_768_512_256_128_64_1_b512_e8_lr0.00216_m0.9_acc32.42", map_location="cpu"))

<All keys matched successfully>

In [26]:
pipe.find_best(model, 50, lr=1e-2)

Dense_768_768_256_128_1
EPOCH 1
           Train               Test
---------  ------------------  ------------------
Loss       0.5408589243888855  0.5241131782531738
Precision  nan                 nan
Recall     nan                 nan
Accuracy   72.37%              73.87%
EPOCH 2
           Train               Test
---------  ------------------  ------------------
Loss       0.5238837003707886  0.5175482630729675
Precision  nan                 nan
Recall     nan                 nan
Accuracy   73.47%              74.14%
EPOCH 3
           Train               Test
---------  ------------------  ------------------
Loss       0.5144701600074768  0.5142543911933899
Precision  nan                 nan
Recall     nan                 nan
Accuracy   74.13%              74.55%
EPOCH 4
           Train               Test
---------  ------------------  ------------------
Loss       0.5009475946426392  0.5277002453804016
Precision  nan                 nan
Recall     nan                 nan
Accura

NameError: name 'lr' is not defined

## Results of traingin

In [19]:
pipe.find_best(model, 50, lr=1e-2)

Dense_768_1
EPOCH 1
           Train               Test
---------  ------------------  ------------------
Loss       0.6204628944396973  0.6315557956695557
Precision  nan                 nan
Recall     nan                 nan
Accuracy   65.92%              64.05%
EPOCH 2
           Train               Test
---------  ------------------  ------------------
Loss       0.6179744005203247  0.6281192898750305
Precision  nan                 nan
Recall     nan                 nan
Accuracy   66.26%              64.48%
EPOCH 3
           Train               Test
---------  ------------------  ------------------
Loss       0.6164440512657166  0.6277697086334229
Precision  nan                 nan
Recall     nan                 nan
Accuracy   66.42%              64.46%
EPOCH 4
           Train               Test
---------  ------------------  ------------------
Loss       0.6157306432723999  0.6264545321464539
Precision  nan                 nan
Recall     nan                 nan
Accuracy   66.46% 

In [13]:
pipe.find_best(model, 50, lr=1e-2)

Dense_768_768_1
EPOCH 1
           Train               Test
---------  ------------------  ------------------
Loss       0.6102012395858765  0.6209103465080261
Precision  nan                 nan
Recall     nan                 nan
Accuracy   66.76%              65.18%
EPOCH 2
           Train               Test
---------  ------------------  ------------------
Loss       0.5948296785354614  0.6062929034233093
Precision  nan                 nan
Recall     nan                 nan
Accuracy   68.64%              67.05%
EPOCH 3
           Train               Test
---------  ------------------  ------------------
Loss       0.5827310085296631  0.5942636132240295
Precision  nan                 nan
Recall     nan                 nan
Accuracy   69.76%              68.22%
EPOCH 4
           Train               Test
---------  ------------------  ----------------
Loss       0.5741562843322754  0.58530193567276
Precision  nan                 nan
Recall     nan                 nan
Accuracy   70.34% 

In [13]:
pipe.find_best(model, 50, lr=1e-2)

Dense_768_256_128_1
EPOCH 1
           Train               Test
---------  ------------------  -----------------
Loss       0.6070567965507507  0.618075966835022
Precision  nan                 nan
Recall     nan                 nan
Accuracy   67.12%              65.64%
EPOCH 2
           Train               Test
---------  ------------------  ------------------
Loss       0.5833998918533325  0.5958794951438904
Precision  nan                 nan
Recall     nan                 nan
Accuracy   69.69%              68.03%
EPOCH 3
           Train               Test
---------  ------------------  ------------------
Loss       0.5701068043708801  0.5803307890892029
Precision  nan                 nan
Recall     nan                 nan
Accuracy   70.67%              69.39%
EPOCH 4
           Train               Test
---------  ------------------  ------------------
Loss       0.5587959885597229  0.5666363835334778
Precision  nan                 nan
Recall     nan                 nan
Accuracy   7

In [16]:
pipe.find_best(model, 50, lr=1e-2)

Dense_768_768_256_128_1
EPOCH 1
           Train               Test
---------  ------------------  ------------------
Loss       0.6034354567527771  0.6142628788948059
Precision  nan                 nan
Recall     nan                 nan
Accuracy   67.42%              66.08%
EPOCH 2
           Train               Test
---------  ------------------  ------------------
Loss       0.5760378241539001  0.5863416790962219
Precision  nan                 nan
Recall     nan                 nan
Accuracy   70.27%              69.00%
EPOCH 3
           Train               Test
---------  ------------------  ------------------
Loss       0.5583397746086121  0.5652486681938171
Precision  nan                 nan
Recall     nan                 nan
Accuracy   71.71%              70.89%
EPOCH 4
           Train              Test
---------  -----------------  ------------------
Loss       0.541077733039856  0.5462588667869568
Precision  nan                nan
Recall     nan                nan
Accuracy   

In [49]:
# model = DenseNetwork(3, [768, 768, 256, 128, 1])
model = DenseNetwork(35 [768, 768, 512, 256, 128, 64, 1])
model.load_state_dict(torch.load("models/3K/", map_location="cpu"))

  model = DenseNetwork(35 [768, 768, 512, 256, 128, 64, 1])


TypeError: 'int' object is not subscriptable

In [14]:
def save_weights_to_csv(model: nn.Module, directory: str):
    if not os.path.exists(directory):
        os.makedirs(directory)

    for name, param in model.named_parameters():
        layer_name, param_type = name.rsplit('.', 1)
        param_data = param.detach().cpu().numpy()
        df = pd.DataFrame(param_data)
        filename = f"{layer_name}_{param_type}.csv"
        df.to_csv(os.path.join(directory, filename), header=None, index=False)

    print(f"All weights and biases have been saved to the '{directory}' directory.")

save_weights_to_csv(model, 'models/weights/3K/5l/')

All weights and biases have been saved to the 'models/weights/3K/5l/' directory.


In [29]:
def unpack_to_bits(dataset):
    dataset.map(lambda x: ";".join(np.char.mod('%d', np.unpackbits(np.array([x]).view(np.uint8))))).to_csv("dataset_bits.csv", sep=";", index=False)
    bity = pd.read_csv("dataset_bits.csv", dtype="uint64", sep=";", header=None)
    dataset.rename(columns={"draw": 768})
    pd.concat([bity, dataset.rename(columns={"draw": 768})], axis=1).to_csv("dataset_bits.csv", sep=";", index=False)