In [1]:
import torch
import time
import gc
import os
import io
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from math import ceil
from tqdm.notebook import tqdm
from copy import deepcopy 
from tabulate import tabulate
from torch.utils.data import Dataset, DataLoader, random_split, ConcatDataset
from sklearn.metrics import ConfusionMatrixDisplay
from collections import namedtuple
from collections import defaultdict as dd
import plotly.express as px
import chess
import chess.pgn

In [2]:
### Basic structure

DATA_DIR = "data/"
STOCKFISH_DIR = 'stockfish/'
ARCHIVE_DIR = DATA_DIR + 'archives/'

###
##
###

### Stockfish

STOCKFISH_AVX512_TAR = "stockfish-ubuntu-x86-64-avx512.tar"
STOCKFISH_AVX512 = "stockfish-ubuntu-x86-64-avx512"
STOCKFISH_AVX512_EXE = STOCKFISH_DIR + STOCKFISH_AVX512


###
##
###

### URLs

ELITE_DATABASE_URL  = "https://database.nikonoel.fr/lichess_elite_2021-11.zip"
STOCKFISH_DOWNSTREAM = "https://github.com/official-stockfish/Stockfish/releases/latest/download/"
STOCKFISH_AVX512_URL = STOCKFISH_DOWNSTREAM + STOCKFISH_AVX512_TAR

### 
##
###

### Datasets 

ELITE_DATASET_ARCHIVE = "lichess_elite_2021-11.zip"
ELITE_DATASET_FILENAME = "lichess_elite_2021-11.pgn"


LICHESS_EVAL_ARCHIVE = ARCHIVE_DIR + "lichess_db_eval.jsonl.zst"
LICHESS_EVAL_FILENAME = DATA_DIR + "lichess_db_eval.jsonl"


In [3]:
BITBOARD_DIR = DATA_DIR + 'bitboards/'
ELITE_DATA_BASE_URL  = "https://database.nikonoel.fr/"
STOCKFISH_DOWNSTREAM = "https://github.com/official-stockfish/Stockfish/releases/latest/download/"

SAMPLE_ZIP = "lichess_elite_2021-11.zip"
SAMPLE_PGN = "lichess_elite_2021-11.pgn"
SAMPLE_BITBOARD = "elite_bitboard.csv"
BITBOARD_1M = "1M.csv"
BITBOARD_10M = "10M.csv"
ELITE_DATA_SAMPLE_URL = ELITE_DATA_BASE_URL + SAMPLE_ZIP
SAMPLE_ZIP_FILE = ARCHIVE_DIR + SAMPLE_ZIP
SAMPLE_PGN_FILE  = DATA_DIR + SAMPLE_PGN

SAMPLE_BITBOARD_FILE = BITBOARD_DIR + SAMPLE_BITBOARD
BITBOARD_10M_FILE = BITBOARD_DIR + BITBOARD_10M
BITBOARD_1M_FILE = BITBOARD_DIR + BITBOARD_1M

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
device

device(type='cuda')

# Dataset and models

In [4]:
# def sizeof_fmt(num, suffix="B"):
#     for unit in ("", "Ki", "Mi", "Gi"):
#         if abs(num) < 1024.0:
#             return f"{num:3.1f}{unit}{suffix}"
#         num /= 1024.0
#     return f"{num:.1f}Yi{suffix}"
    
def sizeof_fmt(num):
    for unit in ("", "K", "M", "G"):
        if abs(num) < 1000.0:
            return f"{num:.0f}{unit}"
        num /= 1000.0
    return f"{num:.0f}"


print(f"Allocated: {sizeof_fmt(torch.cuda.memory_allocated())}") 
print(f"Reserved: {sizeof_fmt(torch.cuda.memory_reserved())}") 

Allocated: 0
Reserved: 0


In [5]:
class BitboardDrawDatasetSimple(Dataset):
    def __init__(self, bitboard_file):
        bitboards_df = pd.read_csv(bitboard_file, dtype="uint64", usecols=range(12))
        metadata_df = pd.read_csv(bitboard_file)

        self.bitboards = self.bitboards_to_layers(bitboards_df)
        # self.meta_features = self.binary_features_to_layers(metadata_df[["white", "cK", "cQ", "ck", "cq"]])
        self.is_draw = metadata_df['draw'].to_numpy(dtype=np.single)
        self.length = self.is_draw.size

        # self.bitboards = np.hstack((self.bitboards, self.meta_features))

    def __len__(self):
        return self.length

    def __getitem__(self, idx):
        return self.bitboards[idx], self.is_draw[idx]

    def bitboards_to_layers(self, bitboards):
        cont = np.ascontiguousarray(np.expand_dims(bitboards.to_numpy(), 2)).view(np.uint8)
        return np.unpackbits(np.flip(cont, axis=2), axis=2).astype(np.single).reshape(-1, 768)

    def binary_features_to_layers(self, features):
        i = (features.to_numpy(dtype=np.uint64) - 1) ^ 0xffffffffffffffff
        cont = np.ascontiguousarray(np.expand_dims(i, 2)).view(np.uint8)
        return np.unpackbits(np.flip(cont, axis=2), axis=2).astype(np.single).reshape(-1, 768)

In [6]:
class BitboardDrawDataset(Dataset):

    @classmethod
    def from_dataset_info(cls, dataset_info):
        return cls(dataset_info.source_file, dataset_info.chunk_size, dataset_info.shuffle, dataset_info.in_memory)
    
    def __init__(self, bitboard_file, chunk_size, shuffle = True, in_memory = False):
        self.curr_batch = 0
        self.shuffle = shuffle
        self.bitboard_file = bitboard_file
        self.chunk_size = chunk_size
        self.datasamples = self.calculate_dataset_size()
        
        self.batches = ceil(self.datasamples / chunk_size)
        self.splits = np.arange(chunk_size, self.datasamples, chunk_size)
        self.indices = np.arange(1, self.datasamples+1)
        self.splits = np.append(self.splits, [0])

        self.in_memory = in_memory or self.chunk_size == self.datasamples
        
        self.load_data()
    
    def __len__(self):
        return self.datasamples
        
    def __getitem__(self, idx):
        batch = idx // self.chunk_size
        # Laod new data 
        if self.curr_batch != batch and not self.in_memory:
            self.curr_batch = batch
            self.load_data()            

        idx -= self.chunk_size * self.curr_batch
        
        return self.bitboards[idx], self.is_draw[idx]

    def bitboards_to_layers(self, bitboards):
        cont = np.ascontiguousarray(np.expand_dims(bitboards.to_numpy(), 2)).view(np.uint8)
        return np.unpackbits(np.flip(cont, axis=2), axis=2).astype(np.single).reshape(-1, 768)

    def binary_features_to_layers(self, features):
        i = (features.to_numpy(dtype=np.uint64) - 1) ^ 0xffffffffffffffff
        cont = np.ascontiguousarray(np.expand_dims(i, 2)).view(np.uint8)
        return np.unpackbits(np.flip(cont, axis=2), axis=2).astype(np.single).reshape(-1, 768)
        
    def load_data(self):
        if self.curr_batch == 0 and self.shuffle:
            np.random.shuffle(self.indices)
            
        ignore = set(self.indices)

        if self.curr_batch == self.batches-1:
            ignore.difference_update(self.indices[self.splits[self.curr_batch-1]:])
        else:
            ignore.difference_update(self.indices[self.splits[self.curr_batch-1]:self.splits[self.curr_batch]])

        df = pd.read_csv(self.bitboard_file, dtype="uint64", usecols=range(18), skiprows=ignore)
        self.bitboards = self.bitboards_to_layers(df.iloc[:, range(12)])
        # self.meta_features = self.binary_features_to_layers(df[["white", "cK", "cQ", "ck", "cq"]])
        self.is_draw = df['draw'].to_numpy(dtype=np.single)

        # self.bitboards = np.hstack((self.bitboards, self.meta_features))
    
    def calculate_dataset_size(self):
        with open(self.bitboard_file) as f:
            return sum(1 for line in f) - 1

    def dataloader(self, batch_size):
        return DataLoader(self, batch_size=batch_size, shuffle=self.in_memory)

DatasetInfo = namedtuple("DatasetInfo", ["source_file", "chunk_size", "shuffle", "in_memory"])

In [7]:
class DenseNetwork(nn.Module):

    def __init__(self, layers, sizes, normalization=False):
        super().__init__()
        self.sizes = sizes
        self.layers = layers
        ll = []

        assert self.layers == len(sizes) - 2, "Wrong layers to sizes number."
        
        for i in range(layers+1):
            ll.append(nn.Linear(self.sizes[i], self.sizes[i+1]))   
            if i < layers:
                if layers > 1 and normalization:
                    ll.append(nn.BatchNorm1d(self.sizes[i+1]))
                ll.append(nn.ReLU())
                
        self.model = nn.Sequential(*ll)

    def forward(self, x):
        return self.model.forward(x)

    def __str__(self):
        return f"Dense_{'_'.join(map(str, self.sizes))}"

    def get_hidden_layer_count(self):
        return self.layers

In [8]:
class ConvLayer(nn.Module):

    def __init__(self, in_layers, out_layers, ksize=3, stride=1, padding=1):
        super().__init__()
        self.model = nn.Sequential(
            nn.Conv2d(in_layers, out_layers, kernel_size=ksize, stride=stride, padding=padding),
            nn.BatchNorm2d(out_layers),
            nn.ReLU(),
        )

    def forward(self, x):
        return self.model.forward(x)

In [9]:
class ResLayer(nn.Module):

    def __init__(self, in_layers, out_layers, ksize=3, stride=1, padding=1):
        super().__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(in_layers, out_layers, kernel_size=ksize, stride=stride, padding=padding),
            nn.BatchNorm2d(out_layers),
            nn.ReLU(),
            nn.Conv2d(out_layers, out_layers, kernel_size=ksize, stride=stride, padding=padding),
            nn.BatchNorm2d(out_layers),
        )

    def forward(self, x):
        return nn.functional.relu(self.conv(x) + x)

In [10]:
class ConvolutionNetwork(nn.Module):

    def __init__(self, neurons=256):
        super().__init__()
        self.neurons = neurons
        self.model = nn.Sequential(
            nn.Unflatten(1, (12, 8, 8)),
            ConvLayer(12, 32),
            # nn.MaxPool2d(kernel_size=3, stride=1),

            ConvLayer(32, 64),
            # nn.MaxPool2d(kernel_size=3, stride=1),

            ConvLayer(64, 128),
            nn.AvgPool2d(kernel_size=2, stride=2),

            ConvLayer(128, 256),
            nn.AvgPool2d(kernel_size=2, stride=2),
            
            nn.Flatten(1),

            DenseNetwork(3, [256 * 2 * 2, 128, 64, 32, 1], normalization=True),
        )

    def forward(self, x):
        return self.model.forward(x)

    def get_hidden_layer_count(self):
        return 3

    def __str__(self):
        return f"ConvNet"

In [11]:
class MiniAlphaZeroNetwork(nn.Module):

    def __init__(self, res_layers, layers):
        super().__init__()
        ll = []

        for i in range(res_layers):
            ll.append(ResLayer(layers, layers))

        self.model = nn.Sequential(
            nn.Unflatten(1, (12, 8, 8)),
            ConvLayer(12, layers),
            *ll,
            nn.Flatten(1),
            DenseNetwork(3, [layers*8*8, 1024, 256, 128, 1])
        )

    def forward(self, x):
        return self.model.forward(x)

    def get_hidden_layer_count(self):
        return 3

    def __str__(self):
        return f"MiniAlphaZero"

# Training

In [25]:
def timeit(f):

    def timed(*args, **kw):

        ts = time.time()
        result = f(*args, **kw)
        te = time.time()

        print(f"Took: {te-ts:.2f}s")
        return result
    return timed

class Train:
    
    def __init__(self, train_dataset, validate_dataset, batch_size):
        self.batch_size = batch_size
        
        self.train_dataset = train_dataset
        self.validate_dataset = validate_dataset
        
        self.train_dataloader = DataLoader(self.train_dataset, batch_size=batch_size, shuffle=True)
        self.validate_dataloader = DataLoader(self.validate_dataset, batch_size=batch_size, shuffle=True)

        self.loss_fn = nn.BCEWithLogitsLoss()
        self.total_batches = len(self.train_dataloader)
        self.print_every = 100
        self.epoch_print_interval = 1
        
    def train_one_epoch(self, model, optimizer, p=False) -> int:
        running_loss = 0.
        last_loss = 0.
    
        for i, data in enumerate(self.train_dataloader):
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.unsqueeze(1).to(device)
    
            optimizer.zero_grad()

            outputs = model(inputs)
            
            loss = self.loss_fn(outputs, labels)
            loss.backward()
            optimizer.step()
    
            running_loss += loss.item()
            if i % self.print_every == self.print_every - 1:
                last_loss = running_loss / self.print_every
                if p: print(f"  batch {i+1} loss: {last_loss}")
                running_loss = 0.
            elif i == self.total_batches - 1:
                last_loss = running_loss / (i % self.print_every + 1)
                if p: print(f"  batch {i+1} loss: {last_loss}")
            
        return last_loss
    
    @timeit
    def train(self, model, optimizer, epochs, p=True):
        best_result = dd(None)
        try:
            best_result['vloss'] = np.inf
        
            for epoch in range(1, epochs + 1):
                if p and epoch % self.epoch_print_interval == 0 or epoch == 1: print(f'EPOCH {epoch}')
            
                # Make sure gradient tracking is on, and do a pass over the data
                model.train(True)
                avg_loss = self.train_one_epoch(model, optimizer)
            
                model.eval()
    
                with torch.no_grad():
                    train_acc, train_loss, train_prec, train_recall = self.test(model, self.train_dataloader)
                    # train_acc, train_loss, train_prec, train_recall = 0, 0, 0, 0
                    validate_acc, validate_loss, validate_prec, validate_recall = self.test(model, self.validate_dataloader)
    
                    if p and epoch % self.epoch_print_interval == 0 or epoch == 1: 
                        print(tabulate([["Loss", train_loss, validate_loss], 
                                        ["Precision", train_prec, validate_prec],
                                        ["Recall", train_recall, validate_recall],
                                        ["Accuracy", f"{train_acc:.2f}%", f"{validate_acc:.2f}%"]],
                                       headers=["", "Train", "Test"]))
            
                if validate_loss < best_result['vloss']:
                    best_result['vloss'] = validate_loss
                    best_result['model'] = deepcopy(model.state_dict())
                    best_result['epoch'] = epoch
                    best_result['acc']   = validate_acc
                    
        except KeyboardInterrupt:
            self.save_model(model, best_result['model'], best_result['epoch'], optimizer.param_groups[0]['lr'], optimizer.param_groups[0]['momentum'], best_result['acc'])

        return best_result

    def test(self, model, dataloader):
        acc, loss = 0, 0
        conf_mat = torch.zeros(2, 2) 
        for i, (vinputs, vlabels) in enumerate(dataloader):
            vinputs, vlabels = vinputs.to(device), vlabels.unsqueeze(1).to(device)
            voutputs = model(vinputs)
            pred = nn.functional.sigmoid(voutputs).round()
            
            acc += (pred == vlabels).sum() / self.batch_size
            loss += self.loss_fn(voutputs, vlabels) 
            # conf_mat += confusion_matrix(vlabels.to('cpu'), pred.to('cpu'))

        acc = acc / (i+1) * 100
        loss /= (i+1)

        prec = conf_mat[1, 1] / (conf_mat[1, 1] + conf_mat[0, 1])
        recall = conf_mat[1, 1] / (conf_mat[1, 1] + conf_mat[1, 0])
        
        return acc, loss, prec, recall
    
    def find_best(self, model, epochs, lr=1e-3, momentum=0.9):
        print(str(model))
        model.to(device)
        optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum, weight_decay=0.001)
        # optimizer = optim.Adam(model.parameters())
        best_results = self.train(model, optimizer, epochs)
        self.save_model(model, best_results['model'], best_results['epoch'], lr, momentum, best_results['acc'])
        model.to('cpu')
        self.cleanup()
        
        return best_results

    def save_model(self, model, state_dict, epoch, lr, momentum, acc):
        base_path = f"models/{sizeof_fmt(len(self.train_dataset))}/{str(model.get_hidden_layer_count())}l/"
        if(not os.path.isdir(base_path)):
            !mkdir -p {base_path}
        
        model_path = base_path + f"{str(model)}_b{self.batch_size}_e{epoch}_lr{lr}_m{momentum}_acc{acc:.2f}"
        torch.save(state_dict, model_path)
            
    def cleanup(self):  
        gc.collect()
        torch.cuda.empty_cache()
        

In [13]:
def k_fold_cross_validation(dataset_info, k, new_model, epochs=50, lr=1e-2):
    def result_avg(key):
        return sum([r[key] for r in results]) / k
    
    dataset = BitboardDrawDataset.from_dataset_info(dataset_info)
    folds = random_split(dataset, [ 1/k ] * k)
    results = []
    
    for i in range(k):
        print(f"RUNNING ITERATION {i+1}.")
        train = ConcatDataset([x for j,x in enumerate(folds) if j != k])
        test  = folds[i]
        model = new_model()

        trainer = Train(train, test, 512)

        results.append(trainer.find_best(model, epochs, lr=lr))

    print("DONE")
    avg_acc = result_avg('acc')
    print(f"AVG ACC: {avg_acc}")
    

In [14]:
def standard_training(train_dataset_info, test_dataset_info, model, epochs=50, lr=1e-2):
    train = BitboardDrawDataset.from_dataset_info(train_dataset_info)
    test  = BitboardDrawDataset.from_dataset_info(test_dataset_info)
    trainer = Train(train, test, 512)

    return trainer.find_best(model, epochs, lr=lr)

In [26]:
eval_dataset_info = DatasetInfo("data/eval_dataset/bitboards/6000000_0.csv", 6000000, False, True)
validate_dataset_info = DatasetInfo("data/eval_dataset/bitboards/1000000_30000000.csv", 1000000, False, True)

In [52]:
test_dataset = DatasetInfo("data/eval_dataset/bitboards/100000_1509579_19_24.csv", 100000, False, True)

In [49]:
k_fold_cross_validation(eval_dataset_info, 5, lambda: DenseNetwork(1, [768, 768, 1]), epochs=50)

RUNNING ITERATION 1.
Dense_768_768_1
EPOCH 1
           Train               Test
---------  ------------------  ------------------
Loss       0.5995900630950928  0.5993191003799438
Precision  nan                 nan
Recall     nan                 nan
Accuracy   66.95%              66.98%
EPOCH 2
           Train               Test
---------  ------------------  ------------------
Loss       0.5815632343292236  0.5812456011772156
Precision  nan                 nan
Recall     nan                 nan
Accuracy   68.59%              68.60%
EPOCH 3
           Train               Test
---------  ------------------  ------------------
Loss       0.5676711797714233  0.5673450231552124
Precision  nan                 nan
Recall     nan                 nan
Accuracy   69.83%              69.84%
EPOCH 4
           Train               Test
---------  ------------------  ------------------
Loss       0.5551223158836365  0.5547881722450256
Precision  nan                 nan
Recall     nan              

In [27]:
# model = DenseNetwork(3, [768, 10_000, 256, 128, 1])
# model = DenseNetwork(3, [768, 768, 256, 128, 1])
# model = DenseNetwork(2, [768, 256, 128, 1])
# model = DenseNetwork(1, [768, 768, 1])
# model = DenseNetwork(0, [768, 1])
# model = ConvolutionNetwork()
model = MiniAlphaZeroNetwork(6, 32)
# model.load_state_dict(torch.load("models/1M/3l/Dense_768_768_256_128_1_b512_e50_lr0.01_m0.9_acc92.27", map_location="cpu"))

In [28]:
standard_training(eval_dataset_info, validate_dataset_info, model, epochs=10)

MiniAlphaZero
EPOCH 1
           Train               Test
---------  ------------------  ------------------
Loss       0.5921809077262878  0.6279385685920715
Precision  nan                 nan
Recall     nan                 nan
Accuracy   67.07%              64.80%
EPOCH 2
           Train               Test
---------  ------------------  ------------------
Loss       0.5368253588676453  0.5750608444213867
Precision  nan                 nan
Recall     nan                 nan
Accuracy   71.81%              69.01%
EPOCH 3
           Train               Test
---------  ------------------  ------------------
Loss       0.5397118926048279  0.5827426910400391
Precision  nan                 nan
Recall     nan                 nan
Accuracy   71.94%              68.48%
EPOCH 4
           Train               Test
---------  ------------------  ------------------
Loss       0.5417366027832031  0.5880905985832214
Precision  nan                 nan
Recall     nan                 nan
Accuracy   71.43

defaultdict(None,
            {'vloss': tensor(0.5597, device='cuda:0'),
             'model': OrderedDict([('model.1.model.0.weight',
                           tensor([[[[-2.6960e-02, -3.7865e-03, -2.8808e-02],
                                     [-2.3475e-03,  4.6759e-02, -2.3275e-02],
                                     [-2.0079e-03, -1.3963e-02, -2.7925e-02]],
                           
                                    [[ 1.0891e-01,  6.1010e-02,  1.3627e-02],
                                     [-1.4610e-02,  2.2349e-01, -2.0620e-02],
                                     [-1.9177e-02,  5.6077e-02, -2.4855e-03]],
                           
                                    [[ 6.5802e-02,  5.6134e-02,  1.0835e-01],
                                     [ 7.0924e-03,  1.1770e-01, -4.1149e-02],
                                     [-2.4105e-02,  4.1086e-04, -5.3843e-03]],
                           
                                    ...,
                           
       

In [58]:
standard_training(eval_dataset_info, validate_dataset_info, model, epochs=100)

Dense_768_768_256_128_1
EPOCH 1
           Train               Test
---------  ------------------  ------------------
Loss       0.5788663625717163  0.6263464093208313
Precision  nan                 nan
Recall     nan                 nan
Accuracy   68.66%              64.30%
EPOCH 2
           Train               Test
---------  ------------------  -----------------
Loss       0.5405199527740479  0.617194414138794
Precision  nan                 nan
Recall     nan                 nan
Accuracy   71.81%              65.51%
EPOCH 3
           Train               Test
---------  ------------------  ------------------
Loss       0.5102490186691284  0.6108804941177368
Precision  nan                 nan
Recall     nan                 nan
Accuracy   74.01%              66.55%
EPOCH 4
           Train                Test
---------  -------------------  ------------------
Loss       0.49315857887268066  0.6155732870101929
Precision  nan                  nan
Recall     nan                  nan
Acc

defaultdict(None,
            {'vloss': tensor(0.6109, device='cuda:0'),
             'model': OrderedDict([('model.0.weight',
                           tensor([[-0.0237, -0.0210, -0.0029,  ...,  0.0209, -0.0342,  0.0159],
                                   [-0.0137, -0.0030,  0.0268,  ..., -0.0234, -0.0301, -0.0103],
                                   [-0.0097, -0.0236,  0.0174,  ..., -0.0175, -0.0432, -0.0219],
                                   ...,
                                   [-0.0011,  0.0006, -0.0051,  ..., -0.0499, -0.0113,  0.0017],
                                   [ 0.0109,  0.0178,  0.0126,  ...,  0.0217,  0.0074,  0.0221],
                                   [ 0.0277,  0.0275,  0.0270,  ...,  0.0150, -0.0142,  0.0136]],
                                  device='cuda:0')),
                          ('model.0.bias',
                           tensor([-4.3279e-02, -8.5282e-02,  3.5466e-02, -4.2082e-03, -7.4021e-02,
                                   -4.3472e-02, -3.559

In [26]:
pipe.find_best(model, 50, lr=1e-2)

Dense_768_768_256_128_1
EPOCH 1
           Train               Test
---------  ------------------  ------------------
Loss       0.5408589243888855  0.5241131782531738
Precision  nan                 nan
Recall     nan                 nan
Accuracy   72.37%              73.87%
EPOCH 2
           Train               Test
---------  ------------------  ------------------
Loss       0.5238837003707886  0.5175482630729675
Precision  nan                 nan
Recall     nan                 nan
Accuracy   73.47%              74.14%
EPOCH 3
           Train               Test
---------  ------------------  ------------------
Loss       0.5144701600074768  0.5142543911933899
Precision  nan                 nan
Recall     nan                 nan
Accuracy   74.13%              74.55%
EPOCH 4
           Train               Test
---------  ------------------  ------------------
Loss       0.5009475946426392  0.5277002453804016
Precision  nan                 nan
Recall     nan                 nan
Accura

NameError: name 'lr' is not defined

## Results of traingin

In [19]:
pipe.find_best(model, 50, lr=1e-2)

Dense_768_1
EPOCH 1
           Train               Test
---------  ------------------  ------------------
Loss       0.6204628944396973  0.6315557956695557
Precision  nan                 nan
Recall     nan                 nan
Accuracy   65.92%              64.05%
EPOCH 2
           Train               Test
---------  ------------------  ------------------
Loss       0.6179744005203247  0.6281192898750305
Precision  nan                 nan
Recall     nan                 nan
Accuracy   66.26%              64.48%
EPOCH 3
           Train               Test
---------  ------------------  ------------------
Loss       0.6164440512657166  0.6277697086334229
Precision  nan                 nan
Recall     nan                 nan
Accuracy   66.42%              64.46%
EPOCH 4
           Train               Test
---------  ------------------  ------------------
Loss       0.6157306432723999  0.6264545321464539
Precision  nan                 nan
Recall     nan                 nan
Accuracy   66.46% 

In [13]:
pipe.find_best(model, 50, lr=1e-2)

Dense_768_768_1
EPOCH 1
           Train               Test
---------  ------------------  ------------------
Loss       0.6102012395858765  0.6209103465080261
Precision  nan                 nan
Recall     nan                 nan
Accuracy   66.76%              65.18%
EPOCH 2
           Train               Test
---------  ------------------  ------------------
Loss       0.5948296785354614  0.6062929034233093
Precision  nan                 nan
Recall     nan                 nan
Accuracy   68.64%              67.05%
EPOCH 3
           Train               Test
---------  ------------------  ------------------
Loss       0.5827310085296631  0.5942636132240295
Precision  nan                 nan
Recall     nan                 nan
Accuracy   69.76%              68.22%
EPOCH 4
           Train               Test
---------  ------------------  ----------------
Loss       0.5741562843322754  0.58530193567276
Precision  nan                 nan
Recall     nan                 nan
Accuracy   70.34% 

In [13]:
pipe.find_best(model, 50, lr=1e-2)

Dense_768_256_128_1
EPOCH 1
           Train               Test
---------  ------------------  -----------------
Loss       0.6070567965507507  0.618075966835022
Precision  nan                 nan
Recall     nan                 nan
Accuracy   67.12%              65.64%
EPOCH 2
           Train               Test
---------  ------------------  ------------------
Loss       0.5833998918533325  0.5958794951438904
Precision  nan                 nan
Recall     nan                 nan
Accuracy   69.69%              68.03%
EPOCH 3
           Train               Test
---------  ------------------  ------------------
Loss       0.5701068043708801  0.5803307890892029
Precision  nan                 nan
Recall     nan                 nan
Accuracy   70.67%              69.39%
EPOCH 4
           Train               Test
---------  ------------------  ------------------
Loss       0.5587959885597229  0.5666363835334778
Precision  nan                 nan
Recall     nan                 nan
Accuracy   7

In [16]:
pipe.find_best(model, 50, lr=1e-2)

Dense_768_768_256_128_1
EPOCH 1
           Train               Test
---------  ------------------  ------------------
Loss       0.6034354567527771  0.6142628788948059
Precision  nan                 nan
Recall     nan                 nan
Accuracy   67.42%              66.08%
EPOCH 2
           Train               Test
---------  ------------------  ------------------
Loss       0.5760378241539001  0.5863416790962219
Precision  nan                 nan
Recall     nan                 nan
Accuracy   70.27%              69.00%
EPOCH 3
           Train               Test
---------  ------------------  ------------------
Loss       0.5583397746086121  0.5652486681938171
Precision  nan                 nan
Recall     nan                 nan
Accuracy   71.71%              70.89%
EPOCH 4
           Train              Test
---------  -----------------  ------------------
Loss       0.541077733039856  0.5462588667869568
Precision  nan                nan
Recall     nan                nan
Accuracy   

In [24]:
def save_weights_to_csv(model: nn.Module, directory: str):
    if not os.path.exists(directory):
        os.makedirs(directory)

    for name, param in model.named_parameters():
        layer_name, param_type = name.rsplit('.', 1)
        param_data = param.detach().cpu().numpy()
        df = pd.DataFrame(param_data)
        filename = f"{layer_name}_{param_type}.csv"
        df.to_csv(os.path.join(directory, filename), header=None, index=False)

    print(f"All weights and biases have been saved to the '{directory}' directory.")

save_weights_to_csv(model, 'models/weights/3l/92/')

All weights and biases have been saved to the 'models/weights/3l/92/' directory.


In [29]:
def unpack_to_bits(dataset):
    dataset.map(lambda x: ";".join(np.char.mod('%d', np.unpackbits(np.array([x]).view(np.uint8))))).to_csv("dataset_bits.csv", sep=";", index=False)
    bity = pd.read_csv("dataset_bits.csv", dtype="uint64", sep=";", header=None)
    dataset.rename(columns={"draw": 768})
    pd.concat([bity, dataset.rename(columns={"draw": 768})], axis=1).to_csv("dataset_bits.csv", sep=";", index=False)