In [14]:
import chess
from random import choice

from torch.utils.data import Dataset,DataLoader,SubsetRandomSampler
from sklearn.model_selection import train_test_split
from torch.autograd import Variable
import numpy as np 
from model import * 
from tqdm import tqdm

device = 'cuda' if torch.cuda.is_available() else 'cpu'

create multiple dataset classes, for different training tasks:

 -- learn the next word - autoregressive task (if the model is to be a decoder based)


 For each of these we may have to modify the last linear layer of the model (to match dimensionality of the output) So maybe it should not be part of ChessEncoder but outside of it. 

the model needs to be changed to be a decoder model only - https://datascience.stackexchange.com/questions/65241/why-is-the-decoder-not-a-part-of-bert-architecture

https://arxiv.org/pdf/2204.05832.pdf#:~:text=Decoder%2Donly%20models%20process%20a,decoder%20processes%20only%20the%20target.


google PaLM: https://arxiv.org/pdf/2204.02311.pdf


In [2]:
class LegalMovesBase(object):

  def __init__(self,num_games:int=int(1e3),max_len:int=100):
    self.num_games = num_games
    self.max_len = max_len
    self.games_arr = []

  def generate_games(self):
    games_arr = []
    print('This may take a while...Please wait.')
    for idx in tqdm(range(self.num_games)):
      board = chess.Board()
      game = ['<START>']
      game = []
      game_over = False
      while game_over is False:
          move=board.lan(choice(list(board.legal_moves)))
          if '=' in move:
            move=move.replace('=','')
          board.push_san(move)
          game.append(move)
          game_over =  board.is_checkmate() or board.is_insufficient_material() or board.is_stalemate()
          if game_over:
              break
      game.append('<END>')
      games_arr.extend(game)
    self.games_arr = games_arr
    
  
  def __getitem__(self, i):
        x = self.games_arr[i: i + self.max_len]
        y = self.games_arr[i+1: i+ self.max_len+1]
        return x, y

  def __len__(self):
        return max((len(self.games_arr) - self.max_len),0)


In [3]:
dataset = LegalMovesBase()
dataset.generate_games()

  0%|          | 1/1000 [00:00<01:52,  8.91it/s]

This may take a while...Please wait.


100%|██████████| 1000/1000 [02:06<00:00,  7.91it/s]


In [12]:
valid_test_split = 0.4
random_seed= 31
batch_size = 100
data_size = len(dataset)

# dividing training set to 0.6 of the total dataset
train_idx, valid_test_idx = train_test_split(np.arange(data_size),test_size=valid_test_split,shuffle=True)

# dividing validation and test set to sets of equal size, i.e. each 0.2 of the total dataset
valid_idx, test_idx = train_test_split(valid_test_idx,test_size=0.5,shuffle=True)


train_sampler = SubsetRandomSampler(train_idx)
valid_sampler = SubsetRandomSampler(valid_idx)
test_sampler = SubsetRandomSampler(test_idx)

train_loader = DataLoader(dataset, batch_size=batch_size, sampler=train_sampler)
validation_loader = DataLoader(dataset, batch_size=batch_size,sampler=valid_sampler)
test_loader = DataLoader(dataset, batch_size=batch_size,sampler=test_sampler)



In [5]:

def accuracy_score(predictions, labels,thresh=0.5):
    pred_labels = predictions.argmax(dim=-1)
    corrects = (pred_labels == labels)
    accuracy = corrects.sum().float() / float(labels.size(0))
    return accuracy.cpu().detach().numpy()


def run_epoch(train_mode,loader,epoch, model, optimizer, loss_fnc):
    epoch_metrics = {
        'epoch': epoch,
        'loss': 0.0,
        'n_batches': len(loader),
        'running_accuracy':0.0
    }

    if train_mode:
        model.train()
    else:
        model.eval()

    num_batches = len(loader)

    msg= 'Training' if train_mode else 'Validation'


    for (X,target) in tqdm(loader, desc=f'{msg} epoch {epoch}', total=num_batches,position=0,leave=True):
        
        X,target = np.array(X).T,np.array(target).T
        target = model.embedding.translate_wti(target)
        target = Variable(torch.from_numpy(target)).type(torch.LongTensor)

        if train_mode:
            optimizer.zero_grad()

        output = model.forward(X)

        loss = loss_fnc(output.transpose(1,2), target)

        total_loss = loss.mean()

        if train_mode:
            total_loss.backward()
            optimizer.step()

                
        epoch_metrics['running_accuracy'] += accuracy_score(output,target)

        epoch_metrics['loss'] += float(total_loss.cpu().detach().numpy())

    epoch_metrics['loss'] = epoch_metrics['loss'] / epoch_metrics['n_batches']
    epoch_metrics['running_accuracy'] = epoch_metrics['running_accuracy']/epoch_metrics['n_batches']
    
    return epoch_metrics

In [7]:
N_epochs=10

model = ChessTransformer(embed_path='d:/chess/chess_embedding/fasttext/fasttext_chess2vec.model')
model = model.to(device)
optim = torch.optim.Adam(params=model.parameters())
loss_fnc = nn.CrossEntropyLoss()


train_log = np.zeros([N_epochs,2])
valid_log = np.zeros([N_epochs,2])
train_acc_curve = []
valid_acc_curve = []
for epoch in range(N_epochs):
  train_metrics = run_epoch(train_mode=True,loader=train_loader,epoch=epoch,model=model, optimizer=optim,loss_fnc = loss_fnc)
  torch.cuda.empty_cache()
  valid_metrics = run_epoch(train_mode=False,loader=validation_loader,epoch=epoch,model=model, optimizer=optim,loss_fnc = loss_fnc)
  torch.cuda.empty_cache()

  print("\n Metrics after epoch:{}".format(epoch))
  print('\n Training: loss: {}; accuracy: {}'.format(round(train_metrics['loss'],2),round(train_metrics['running_accuracy'],2)))
  print('\n Validation: loss: {}; accuracy: {}'.format(round(valid_metrics['loss'],2),round(valid_metrics['running_accuracy'],2)))

  train_log[epoch,:]=(train_metrics['loss'],train_metrics['running_accuracy'])

Training epoch 0:   0%|          | 3/2255 [06:45<84:31:03, 135.11s/it] 


KeyboardInterrupt: 

In [6]:
model = ChessTransformer(embed_path='d:/chess/chess_embedding/fasttext/fasttext_chess2vec.model')
model = model.to(device)



In [13]:
for (X,target) in train_loader:
    
    X,target = np.array(X).T,np.array(target).T
    target = model.embedding.translate_wti(target)
    target = Variable(torch.from_numpy(target)).type(torch.LongTensor)

    output = model.forward(X)
    break


embed 0.30812573432922363
position_encoding 0.0062596797943115234
masking 0.0009968280792236328
forward 1.5510761737823486
forward 1.5897037982940674
forward 1.8883414268493652
forward 1.7434546947479248
forward 1.9937310218811035
forward 6.094274520874023
