In [102]:
import torch.nn as nn
import torch

class rl(nn.Module):
    def __init__(
            self,
            input_size: int,
            output_size: int,
            layer_sizes: list[int],
            dropout: float=0.1):
        super(rl, self).__init__()

        layers = []
        flat = nn.Flatten(start_dim=1)
        layers.append(flat)
        old_size = input_size
        for layer in layer_sizes:
            layers.append(nn.Linear(old_size, layer))
            layers.append(nn.Dropout(dropout))
            layers.append(nn.ReLU())
            old_size = layer

        layers.append(nn.Linear(old_size, output_size))
        layers.append(nn.Tanh())
        self.model = nn.Sequential(*layers)
        self.criterion = nn.MSELoss()

    def forward(self, x):
        x = torch.unsqueeze(x, 0)
        return self.model(x)

    def fit(self, tensor, score, optimalizer):
        score = torch.tensor([[score]], dtype=torch.float32)
        out = self.forward(tensor)
        loss = self.criterion(out, score)
        loss.backward()
        optimalizer.step()
        optimalizer.zero_grad()

    def predict(self, tensor):
        return self.forward(tensor)



In [103]:
import chess

class Game:

    layer_to_piece = {
        1: chess.Piece(chess.PAWN, chess.WHITE),
        2: chess.Piece(chess.KNIGHT, chess.WHITE),
        3: chess.Piece(chess.BISHOP, chess.WHITE),
        4: chess.Piece(chess.ROOK, chess.WHITE),
        5: chess.Piece(chess.QUEEN, chess.WHITE),
        6: chess.Piece(chess.KING, chess.WHITE),
        -1: chess.Piece(chess.PAWN, chess.BLACK),
        -2: chess.Piece(chess.KNIGHT, chess.BLACK),
        -3: chess.Piece(chess.BISHOP, chess.BLACK),
        -4: chess.Piece(chess.ROOK, chess.BLACK),
        -5: chess.Piece(chess.QUEEN, chess.BLACK),
        -6: chess.Piece(chess.KING, chess.BLACK),
        0: None
    }

    @staticmethod
    def from_tensor(tensor: torch.Tensor):
        game = Game()
        game.tensor = tensor
        game.board = Game.create_board()
        return game

    @staticmethod
    def from_board(board: chess.Board):
        game = Game()
        game.board = board
        game.tensor = Game.create_tensor(board)
        return game

    @staticmethod
    def create_tensor(board: chess.Board):
        matrix_board = torch.zeros((6, 8, 8))

        for i in range(8):
            for j in range(8):
                piece = board.piece_at(chess.square(i, j))
                if piece is not None:
                    piece_type = piece.piece_type
                    piece_color = piece.color
                    index = piece_type - 1
                    if piece_color == chess.WHITE:
                        matrix_board[index, 7-j, i] = 1
                    else:
                        matrix_board[index, 7-j, i] = -1
        return matrix_board

    @staticmethod
    def create_board(tensor: torch.Tensor):
        nonzero_mask = (tensor != 0).float()
        multiplied_indices = torch.arange(1, 7, device=tensor.device).unsqueeze(-1).unsqueeze(-1)
        result_tensor = tensor * multiplied_indices * nonzero_mask
        tensor = torch.sum(result_tensor, dim=0)
        board = chess.Board.empty()
        for row_idx, row in enumerate(tensor):
            for col_idx, val in enumerate(row):
                piece = Game.layer_to_piece[int(val.item())]
                if piece is not None:
                    square = chess.square(col_idx, 7 - row_idx)
                    board.set_piece_at(square, piece)
        return board

    def state(self):
        return self.tensor

    def over(self):
        self.outcome = self.board.outcome()
        ignored = [
            chess.Termination.FIFTY_MOVES,
            chess.Termination.SEVENTYFIVE_MOVES,
            chess.Termination.FIVEFOLD_REPETITION,
            chess.Termination.THREEFOLD_REPETITION,
            ]
        return False if self.outcome is None or self.outcome not in ignored else True

    def score(self):
        winner = self.outcome.winner
        if winner is None:
            return 0
        return 1 if winner == chess.WHITE else -1

    def valid_moves(self):
        return self.board.legal_moves

    def make_move(self, move):
        new_tensor = self.simulate_move(move)
        self.tensor = new_tensor
        self.board.push(move)
        # try:
        #     # self.equal_boards()
        # except Exception:
        #     print("board and tensor different")
        #     print(self.board)
        #     print(self.tensor)
        #     print(self.board.pop())
        #     raise AssertionError

    def equal_boards(self):
        nonzero_mask = (self.tensor != 0).float()
        multiplied_indices = torch.arange(1, 7, device=self.tensor.device).unsqueeze(-1).unsqueeze(-1)
        result_tensor = self.tensor * multiplied_indices * nonzero_mask
        tensor = torch.sum(result_tensor, dim=0)
        for row_idx, row in enumerate(tensor):
            for col_idx, val in enumerate(row):
                piece_tensor = self.layer_to_piece[int(val.item())]
                square = chess.square(col_idx, 7 - row_idx)
                piece_board = self.board.piece_at(square)
                assert piece_tensor == piece_board

    def simulate_move(self, move: chess.Move):
        if self.board.is_en_passant(move) or self.board.is_castling(move):
            new_board = self.board.copy()
            new_board.push(move)
            return Game.create_tensor(new_board)
        new_tensor = self.tensor.clone()
        idx_beg = self.board.piece_at(move.from_square).piece_type - 1
        idx_end = idx_beg if move.promotion is None else int(move.promotion) - 1
        rank_beg = 7 - chess.square_rank(move.from_square)
        file_beg = chess.square_file(move.from_square)
        rank_end = 7 - chess.square_rank(move.to_square)
        file_end = chess.square_file(move.to_square)
        value = 1 if self.board.turn else -1
        for i in range(len(new_tensor)):
            new_tensor[i][rank_end][file_end] = torch.tensor(0)
        new_tensor[idx_beg][rank_beg][file_beg] = torch.tensor(0)
        new_tensor[idx_end][rank_end][file_end] = torch.tensor(value)
        return new_tensor

    def copy(self):
        copy = self.__class__.__new__(self.__class__)
        copy.tensor = self.tensor.clone()
        copy.board = self.board.copy(stack=True)
        return copy


In [104]:
import torch.optim as optim

model = rl(6*8*8, 1, [384, 400, 300, 200, 100, 50])
optimalizer = optim.Adam(model.parameters())


def record(game, score):
    model.fit(game.state(), score, optimalizer)

def heuristic_value(tensor):
    return model.predict(tensor)

def playout_value(game: Game):
    if game.over():
        print(game.board)
        score = torch.tensor(game.score())
        record(game, score)
        print(f"--------------: {score.item()}")
        return torch.tensor(game.score())

    action_heuristic_dict = {}
    for move in game.valid_moves():
        tempTensor = game.simulate_move(move)
        heu = heuristic_value(tempTensor).item()
        action_heuristic_dict[move] = heu if game.board.turn else -heu
    move = max(action_heuristic_dict, key=action_heuristic_dict.get)

    next_game = game.copy()
    next_game.make_move(move)

    value = playout_value(next_game)
    record(game, value)

    return value

def monte_carlo_value(game, N=100):
    for _ in range(N):
        playout_value(game)
    return



In [105]:
fens = []
local_filename = "m8n2.txt"
with open(local_filename, 'r') as file:
    lines = file.readlines()
    for line in lines:
        line = line.strip()
        if '/' in line and ' ' in line:
            fens.append(line)

for fen in fens:
    board = chess.Board(fen)
    game = Game.from_board(board)
    monte_carlo_value(game, 20)