

3 stages of training:
- initial training on any sequences so that the model learns legal moves and iterating two-player moves
- 2nd stage in which the algorithm learns winning moves of the black player (trained on a subset of data containing victorious black games)
- 3rd stage in which the algorithm learns winning moves for the white player 




In [None]:
# !pip install chess
import pandas as pd 
#from google.colab import files,drive
import IPython
import chess
import chess.svg
import numpy as np 
from gensim.models import fasttext
from tqdm import tqdm

In [None]:
"""
zstd -d lichess_db_standard_rated_2018-06.pgn.zst
"""

"""
pgn-extract.exe lichess_db_standard_rated_2018-06.pgn -otrain_20_million.pgn -w1000 -Wxolalg -t WhiteElo >= "1500" BlackElo >= "1500" -bl20 --noresults --notags --nomovenumbers --nocomments --nonags --nosetuptags --quiet
"""


In [None]:
#drive.mount('/content/drive')

In [None]:
#!unzip 'drive/MyDrive/chess/train_10_million.zip' -d 'train'

In [None]:
class ChessDatabase(object):    
    def __init__(self, filename):
        self.filename = filename

    def __len__(self):
      return sum(1 for line in open(self.filename,'r'))

    def __iter__(self):
        for line in open(self.filename, 'r'):
            ll = [i for i in line.split()]
            ll.insert(0,'<START>')
            ll.append('<END>')
            yield ll

In [None]:
class WhiteMoves(object):    
    def __init__(self, filename):
        self.filename = filename

    def __len__(self):
      return sum(1 for line in open(self.filename,'r'))

    def __iter__(self):
        for line in open(self.filename, 'r'):
            ll = [i for i in line.split()][::2]
            yield ll

class BlackMoves(object):    
    def __init__(self, filename):
        self.filename = filename

    def __len__(self):
      return sum(1 for line in open(self.filename,'r'))

    def __iter__(self):
        for line in open(self.filename, 'r'):
            ll = [i for i in line.split()][1::2]
            yield ll
            

In [None]:
filepath = 'D:/chess/train_10_million.pgn'
chess_games = ChessDatabase(filepath)
black_moves = BlackMoves(filepath)
white_moves = WhiteMoves(filepath)

In [None]:
game_example = next(iter(chess_games))

len(chess_games)/1e6

In [None]:
partie = game_example[1:-1]

board = chess.Board()

i = 0
for zug in partie:
    try:
        board.push_san(zug) 
        i = i+1
    except:
        print(i)
        break
    
board_svg=chess.svg.board(board, size=350) 
display(IPython.display.HTML(board_svg))

In [None]:
# !mkdir drive/MyDrive/chess/chess_embedding_fasttext_model/

In [None]:
from gensim.test.utils import get_tmpfile
from gensim.models.callbacks import CallbackAny2Vec


class EpochSaver(CallbackAny2Vec):
    '''Callback to save model after each epoch.'''

    def __init__(self,output_path):
        self.output_path = output_path
        self.epoch = 0

    def on_epoch_end(self, model):
        model.save(self.output_path)
        self.epoch += 1


class EpochLogger(CallbackAny2Vec):
    '''Callback to log information about training'''

    def __init__(self):
        self.epoch = 0

    def on_train_begin(self, model):
        print("Training starts now.")

    def on_epoch_begin(self, model):
        print("Epoch #{} start".format(self.epoch))

    def on_epoch_end(self, model):
        print("Epoch #{} end".format(self.epoch))
        self.epoch += 1


class tqdmEveryIteration(object):
    def __init__(self,filename):
        super().__init__()
        self.inner_iterable = ChessDatabase(filename)
    def __iter__(self):
        return iter(tqdm(self.inner_iterable,position=0,leave=True))

epoch_logger = EpochLogger()
epoch_saver = EpochSaver(output_path="drive/MyDrive/chess/chess_embedding_fasttext_model/chess2vec.model")

tqdm_chess_games=tqdmEveryIteration(filepath)

In [None]:
fastembed = fasttext.FastText(sentences=tqdm_chess_games, vector_size=500, window=7,epochs=5, workers=4,min_n=1,
                              sg=0,callbacks=[epoch_logger,epoch_saver])

In [None]:
fastembed.callbacks = ()
fastembed.save("drive/MyDrive/chess/chess_embedding_fasttext_model/chess2vec.model")

In [None]:
modelWhiteOnly = Word2Vec(sentences=white_moves, size=500, window=7, workers=8)

In [None]:
modelWhiteOnly.build_vocab(black_moves, update=True)
model_sep_trained = modelWhiteOnly.train(black_moves, total_examples=modelWhiteOnly.corpus_count, epochs=modelWhiteOnly.epochs)
#!mkdir drive/MyDrive/chess/chess_embedding_sep_model/
#modelWhiteOnly.save("drive/MyDrive/chess/chess_embedding_sep_model/chess2vec.model")

In [None]:
model500=Word2Vec.load("drive/MyDrive/chess/chess_embedding_500_model/chess2vec.model")
modelsep=Word2Vec.load("drive/MyDrive/chess/chess_embedding_sep_model/chess2vec.model")

In [None]:
print(list(modelsep.wv.similar_by_word('Ng1-f3')))

print(model500.wv.similar_by_word('Ng1-f3'))


In [None]:
fastembed