In [None]:
import sys
sys.path.append('../')

import torch
from torch import nn
import numpy as np
import json
from tqdm import tqdm
from lib.service import SamplesService
from lib.model import ChessModel

In [None]:
class PQRLoss(torch.nn.Module):
    def __init__(self):
        super(PQRLoss, self).__init__()

    def forward(self, pred):
        pred = pred.reshape(-1, 3)
        
        p = pred[:,0]
        q = pred[:,1]
        r = pred[:,2]
        
        a = -torch.mean(torch.log(torch.sigmoid(r - q)))
        b = torch.mean(torch.square(p + q))

        loss = a + b

        return loss

class EvalLoss(torch.nn.Module):
    def __init__(self):
        super(EvalLoss, self).__init__()

    def forward(self, output, target):
        # since we are clipping the layers, we need to scale the output so it can reach higher values
        output = output * 600.0

        scaling = 356.0

        # scale CP score to engine units [-10_000, 10_000]
        target = target * scaling / 100.0

        # targets are in CP-space change it to WDL-space [0, 1]
        wdl_model = torch.sigmoid(output / scaling)
        wdl_target = torch.sigmoid(target / scaling)

        loss = torch.pow(torch.abs(wdl_model - wdl_target), 2.5)

        return loss.mean()

In [None]:
import os
import math
from torch.utils.tensorboard import SummaryWriter
from datetime import datetime
from glob import glob

EPOCHS = 100000
BATCHES_PER_EPOCH = 1000
BATCH_SIZE = 4096

FEATURE_SET = "basic"
NUM_FEATURES = 768
METHOD = "eval"

if METHOD == "pqr":
    X_SHAPE = (BATCH_SIZE, 3, 2, NUM_FEATURES // 64)
    Y_SHAPE = (BATCH_SIZE, 0)
    INPUTS = glob("/mnt/d/datasets/pqr-1700/*.csv")
    loss_fn = PQRLoss()
elif METHOD == "eval":
    X_SHAPE = (BATCH_SIZE, 2, NUM_FEATURES // 64)
    Y_SHAPE = (BATCH_SIZE, 1)
    INPUTS = glob("/mnt/d/datasets/eval/*.csv")
    loss_fn = EvalLoss()

timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
folder = f'runs/{timestamp}_{METHOD}_{FEATURE_SET}_{BATCH_SIZE}'
os.makedirs(f'{folder}/models', exist_ok=True)

samples_service = SamplesService(x_shape=X_SHAPE, y_shape=Y_SHAPE, inputs=INPUTS, feature_set=FEATURE_SET, method=METHOD)
chessmodel = ChessModel(num_features=NUM_FEATURES)
chessmodel.cuda()

#for i in tqdm(range(1000000)):
#    a = samples_service.next_batch()

optimizer = torch.optim.Adam(chessmodel.parameters(), lr=0.0001)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', threshold=0.0001, factor=0.7, patience=10)
writer = SummaryWriter(folder)

# @torch.compile # 30% speedup
def train_step(X, y):
    # Clear the gradients
    optimizer.zero_grad()

    # Forward pass
    outputs = chessmodel(X)

    # Compute the loss
    loss = loss_fn(outputs, y)
    loss.backward()

    # Update the parameters
    optimizer.step()

    chessmodel._clip_weights()

    return loss

# Make sure gradient tracking is on
chessmodel.train()

for epoch in range(EPOCHS):
    avg_loss = 0.0

    for _ in tqdm(range(BATCHES_PER_EPOCH), desc=f'Epoch {epoch}'):
        X, y = samples_service.next_batch()
    
        # expand bitset
        X = decode_int64_bitset(X)
        X = X.reshape(-1, 2, NUM_FEATURES)

        loss = train_step(X, y)
        avg_loss += loss.item()

        if math.isnan(avg_loss):
            raise Exception("Loss is NaN, exiting")

    avg_loss /= BATCHES_PER_EPOCH

    # Step the scheduler
    scheduler.step(avg_loss)

    writer.add_scalar('Train/loss', avg_loss, epoch)
    writer.add_scalar('Train/lr', scheduler._last_lr[0], epoch) # get_last_lr()
    writer.add_scalar('Params/mean-f1', torch.mean(chessmodel.ft.weight), epoch)
    writer.add_scalar('Params/mean-l1', torch.mean(chessmodel.linear1.weight), epoch)
    writer.add_scalar('Params/mean-l2', torch.mean(chessmodel.linear2.weight), epoch)
    writer.add_scalar('Params/mean-out', torch.mean(chessmodel.output.weight), epoch)
    for name, param in chessmodel.named_parameters():
        writer.add_histogram(name, param, epoch)
    writer.flush()

    # save model
    model_path = f'{folder}/models/{epoch}'
    torch.save(chessmodel.state_dict(), f'{model_path}.pth')
    with open(f'{model_path}.json', 'w') as f:
        model = {
            "config": {
                "batches_per_epoch": BATCHES_PER_EPOCH,
                "batch_size": BATCH_SIZE,

                "feature_set": FEATURE_SET,
                "num_features": NUM_FEATURES,
                "method": METHOD,
            },
            "train": {
                "epoch": epoch,
                "loss": avg_loss
            },
            "layers": chessmodel.to_json()
        }

        f.write(json.dumps(model))

In [None]:
a = list(chessmodel.output.parameters())[0].cpu().detach().numpy()

import seaborn as sns

# plot distribution
sns.histplot(a.flatten(), kde=True)


In [None]:
testmodel = ChessModel(768)
testmodel.load_state_dict(torch.load('/mnt/c/Users/mlomb/Desktop/Tesis/cs-master-thesis/notebooks/runs/20240310_220627_eval_basic_4096/models/0.pth'))

input = np.array([
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1,
1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0,
])

fl = testmodel.linear1
r = fl(torch.tensor(input, dtype=torch.float32).reshape(-1, 768))

np.set_printoptions(threshold=sys.maxsize, suppress=True)
print(np.round(fl.bias.detach().numpy()*127*64))
print(r.detach().numpy()*100)
