In [1]:
import os
import math
from datetime import datetime
from glob import glob
from tqdm import tqdm
import numpy as np
import torch
from torch.utils.tensorboard import SummaryWriter

from lib.service import SamplesService
from lib.model import NnueModel
from lib.model import decode_int64_bitset
from lib.serialize import NnueWriter
from lib.puzzles import PuzzleAccuracy

2024-03-28 22:41:21.529708: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-03-28 22:41:21.529915: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-03-28 22:41:21.548596: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-03-28 22:41:21.602784: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
SCALING = 356.0

class PQRLoss(torch.nn.Module):
    def __init__(self):
        super(PQRLoss, self).__init__()

    def forward(self, output, _target):
        output = output.reshape(-1, 3)
        
        p = output[:,0] / SCALING
        q = output[:,1] / SCALING
        r = output[:,2] / SCALING
        
        a = -torch.mean(torch.log(torch.sigmoid(r - q)))
        b = torch.mean(torch.square(p + q))

        loss = a + b

        return loss

class EvalLoss(torch.nn.Module):
    def __init__(self):
        super(EvalLoss, self).__init__()

    def forward(self, output, target):

        # go from UCI cp to Stockfish's internal engine units
        # https://github.com/official-stockfish/Stockfish/blob/fb07281f5590bc216ecbacd468aa0d06fdead70c/src/uci.cpp#L341
        target = target * SCALING / 100.0

        # targets are in CP-space change it to WDL-space [0, 1]
        wdl_model = torch.sigmoid(output / SCALING)
        wdl_target = torch.sigmoid(target / SCALING)

        loss = torch.pow(torch.abs(wdl_model - wdl_target), 2.5)

        return loss.mean()

In [3]:
EPOCHS = 100000
BATCHES_PER_EPOCH = 1000
BATCH_SIZE = 4096

FEATURE_SET = "half-king-piece"
NUM_FEATURES = 40960 # 192 768
METHOD = "eval"

if METHOD == "pqr":
    X_SHAPE = (BATCH_SIZE, 3, 2, NUM_FEATURES // 64)
    Y_SHAPE = (BATCH_SIZE, 0)
    INPUTS = glob("/mnt/d/datasets/pqr-1700/*.csv")
    loss_fn = PQRLoss()
elif METHOD == "eval":
    X_SHAPE = (BATCH_SIZE, 2, NUM_FEATURES // 64)
    Y_SHAPE = (BATCH_SIZE, 1)
    INPUTS = glob("/mnt/d/datasets/eval/*.csv")
    loss_fn = EvalLoss()

timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
folder = f'runs/{timestamp}_{METHOD}_{FEATURE_SET}_{BATCH_SIZE}'
os.makedirs(f'{folder}/models', exist_ok=True)

puzzles = PuzzleAccuracy('/mnt/c/Users/mlomb/Desktop/Tesis/cs-master-thesis/puzzles.csv')
samples_service = SamplesService(x_shape=X_SHAPE, y_shape=Y_SHAPE, inputs=INPUTS, feature_set=FEATURE_SET, method=METHOD)
chessmodel = NnueModel(num_features=NUM_FEATURES)
chessmodel.cuda()

#for i in tqdm(range(1000000)):
#    a = samples_service.next_batch()

optimizer = torch.optim.Adam(chessmodel.parameters(), lr=0.0015)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', threshold=0.00001, factor=0.7, patience=100)
writer = SummaryWriter(folder)

# @torch.compile # 30% speedup
def train_step(X, y):
    # Clear the gradients
    optimizer.zero_grad()

    # Forward pass
    outputs = chessmodel(X)

    # Compute the loss
    loss = loss_fn(outputs, y)
    loss.backward()

    # Update the parameters
    optimizer.step()

    chessmodel.clip_weights()

    return loss

# Make sure gradient tracking is on
chessmodel.train()

for epoch in range(EPOCHS):
    avg_loss = 0.0

    for _ in tqdm(range(BATCHES_PER_EPOCH), desc=f'Epoch {epoch}'):
        X, y = samples_service.next_batch()
    
        # expand bitset
        X = decode_int64_bitset(X)
        X = X.reshape(-1, 2, NUM_FEATURES)

        loss = train_step(X, y)
        avg_loss += loss.item()

        if math.isnan(avg_loss):
            raise Exception("Loss is NaN, exiting")

    avg_loss /= BATCHES_PER_EPOCH

    # Step the scheduler
    scheduler.step(avg_loss)

    # save model
    model_path = f'{folder}/models/{epoch}'
    model_pth = f'{model_path}.pth'
    model_nn = f'{model_path}.nn'
    torch.save(chessmodel.state_dict(), model_pth)
    nn_writer = NnueWriter(chessmodel, FEATURE_SET)
    with open(model_nn, "wb") as f:
        f.write(nn_writer.buf)
    
    # run metrics
    puzzles_ratings, puzzles_accuracy = puzzles.measure(["/mnt/c/Users/mlomb/Desktop/Tesis/cs-master-thesis/engine/target/release/engine", f"--nn={model_nn}"])

    # log to tensorboard
    writer.add_scalar('Train/loss', avg_loss, epoch)
    writer.add_scalar('Train/lr', scheduler._last_lr[0], epoch) # get_last_lr()
    writer.add_scalar('Params/mean-f1', torch.mean(chessmodel.ft.weight), epoch)
    writer.add_scalar('Params/mean-l1', torch.mean(chessmodel.linear1.weight), epoch)
    writer.add_scalar('Params/mean-l2', torch.mean(chessmodel.linear2.weight), epoch)
    writer.add_scalar('Params/mean-out', torch.mean(chessmodel.output.weight), epoch)
    for name, param in chessmodel.named_parameters():
        writer.add_histogram(name, param, epoch)
    writer.add_scalar(f'Puzzles/accuracy', puzzles_accuracy, epoch)
    for rating_min, rating_max, accuracy in puzzles_ratings:
        writer.add_scalar(f'Puzzles/{rating_min}-{rating_max}/accuracy', accuracy, epoch)

    writer.flush()


Epoch 0: 100%|██████████| 1000/1000 [03:04<00:00,  5.41it/s]
100%|██████████| 1000/1000 [00:20<00:00, 48.84it/s]
Epoch 1: 100%|██████████| 1000/1000 [03:04<00:00,  5.42it/s]
100%|██████████| 1000/1000 [00:20<00:00, 48.76it/s]
Epoch 2: 100%|██████████| 1000/1000 [03:01<00:00,  5.52it/s]
100%|██████████| 1000/1000 [00:21<00:00, 47.37it/s]
Epoch 3: 100%|██████████| 1000/1000 [03:01<00:00,  5.50it/s]
100%|██████████| 1000/1000 [00:20<00:00, 49.66it/s]
Epoch 4: 100%|██████████| 1000/1000 [03:00<00:00,  5.54it/s]
100%|██████████| 1000/1000 [00:20<00:00, 48.62it/s]
Epoch 5: 100%|██████████| 1000/1000 [03:06<00:00,  5.38it/s]
100%|██████████| 1000/1000 [00:20<00:00, 49.46it/s]
Epoch 6: 100%|██████████| 1000/1000 [03:15<00:00,  5.13it/s]
100%|██████████| 1000/1000 [00:21<00:00, 47.14it/s]
Epoch 7: 100%|██████████| 1000/1000 [03:12<00:00,  5.20it/s]
100%|██████████| 1000/1000 [00:20<00:00, 48.98it/s]
Epoch 8: 100%|██████████| 1000/1000 [03:13<00:00,  5.16it/s]
100%|██████████| 1000/1000 [00:20<0

KeyboardInterrupt: 