In [1]:
import torch.nn as nn
import torch

class rlClassifier(nn.Module):
    def __init__(
            self,
            input_size: int,
            output_size: int,
            layer_sizes: list[int],
            dropout: float=0.1):
        super(rlClassifier, self).__init__()

        layers = []
        flat = nn.Flatten()
        layers.append(flat)
        old_size = input_size
        for layer in layer_sizes:
            layers.append(nn.Linear(old_size, layer))
            layers.append(nn.BatchNorm1d(layer))
            layers.append(nn.Dropout(dropout))
            layers.append(nn.ReLU())
            old_size = layer

        layers.append(nn.Linear(old_size, output_size))
        layers.append(nn.BatchNorm1d(output_size))
        layers.append(nn.Softmax(dim=-1))
        self.model = nn.Sequential(*layers)

    def forward(self, x):
        logits = self.model(x)
        probs = torch.softmax(logits, dim=-1)
        return probs

    def get_action(self, state):
        probs = self.forward(state)
        actions = torch.multinomial(probs, 1).squeeze(1)
        return actions


    def reinforce_update(self, batch_log_probs, batch_rewards, optimizer):
        """
        Zakłada, że batch_log_probs i batch_rewards to listy tensorów,
        gdzie każdy tensor ma rozmiar (batch_size,).
        """
        batch_losses = []

        for log_probs, rewards in zip(batch_log_probs, batch_rewards):
            # log_probs i rewards mają rozmiar (batch_size,)
            episode_loss = -log_probs * rewards
            batch_losses.append(episode_loss.sum())

        # Uśrednianie strat w batchu
        loss = torch.stack(batch_losses).mean()

        # Aktualizacja modelu
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()


In [2]:
rl = rlClassifier(6*8*8, 4096, [512, 1024, 2048])

In [3]:
import torch
tensor = torch.load('X_all.pt')
print("Wczytany tensor:", tensor)
print("Wymiary wczytanego tensora:", tensor.shape)

Wczytany tensor: tensor([[[[ 0.,  0.,  0.,  ...,  0.,  0.,  0.],
          [-1., -1., -1.,  ..., -1., -1., -1.],
          [ 0.,  0.,  0.,  ...,  0.,  0.,  0.],
          ...,
          [ 0.,  0.,  0.,  ...,  0.,  0.,  0.],
          [ 1.,  1.,  1.,  ...,  1.,  1.,  1.],
          [ 0.,  0.,  0.,  ...,  0.,  0.,  0.]],

         [[ 0., -1.,  0.,  ...,  0., -1.,  0.],
          [ 0.,  0.,  0.,  ...,  0.,  0.,  0.],
          [ 0.,  0.,  0.,  ...,  0.,  0.,  0.],
          ...,
          [ 0.,  0.,  0.,  ...,  0.,  0.,  0.],
          [ 0.,  0.,  0.,  ...,  0.,  0.,  0.],
          [ 0.,  1.,  0.,  ...,  0.,  1.,  0.]],

         [[ 0.,  0., -1.,  ..., -1.,  0.,  0.],
          [ 0.,  0.,  0.,  ...,  0.,  0.,  0.],
          [ 0.,  0.,  0.,  ...,  0.,  0.,  0.],
          ...,
          [ 0.,  0.,  0.,  ...,  0.,  0.,  0.],
          [ 0.,  0.,  0.,  ...,  0.,  0.,  0.],
          [ 0.,  0.,  1.,  ...,  1.,  0.,  0.]],

         [[-1.,  0.,  0.,  ...,  0.,  0., -1.],
          [ 0.,  0.,

In [4]:
print(tensor[0])

tensor([[[ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
         [-1., -1., -1., -1., -1., -1., -1., -1.],
         [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
         [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
         [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
         [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
         [ 1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.],
         [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.]],

        [[ 0., -1.,  0.,  0.,  0.,  0., -1.,  0.],
         [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
         [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
         [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
         [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
         [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
         [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
         [ 0.,  1.,  0.,  0.,  0.,  0.,  1.,  0.]],

        [[ 0.,  0., -1.,  0.,  0., -1.,  0.,  0.],
         [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
         [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
         [ 0.,  0.,  0.,  0

In [5]:
# flat = nn.Flatten()
# flatten = flat(tensor[:10])
# print(flatten.shape)
# out = rl(flatten)
# print(out.shape)
# print(out[0])
# print(max(out[0]))
# arg = torch.argmax(out[0])
# print(arg)
# print(out[0][arg])

In [6]:
input_tensor = tensor[:1024]
input_tensor.shape
nonzero_mask = (input_tensor != 0).float()
multiplied_indices = torch.arange(1, 7, device=input_tensor.device).unsqueeze(-1).unsqueeze(-1)
result_tensor = input_tensor * multiplied_indices * nonzero_mask
result_tensor_summed = torch.sum(result_tensor, dim=1)
print(result_tensor_summed.shape) 
result_tensor_summed[0]

torch.Size([1024, 8, 8])




tensor([[-4., -2., -3., -5., -6., -3., -2., -4.],
        [-1., -1., -1., -1., -1., -1., -1., -1.],
        [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
        [ 1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.],
        [ 4.,  2.,  3.,  5.,  6.,  3.,  2.,  4.]])

In [7]:
import chess

boards = []

layer_to_piece = {
    1: chess.Piece(chess.PAWN, chess.WHITE),
    2: chess.Piece(chess.KNIGHT, chess.WHITE),
    3: chess.Piece(chess.BISHOP, chess.WHITE),
    4: chess.Piece(chess.ROOK, chess.WHITE),
    5: chess.Piece(chess.QUEEN, chess.WHITE),
    6: chess.Piece(chess.KING, chess.WHITE),
   -1: chess.Piece(chess.PAWN, chess.BLACK),
   -2: chess.Piece(chess.KNIGHT, chess.BLACK),
   -3: chess.Piece(chess.BISHOP, chess.BLACK),
   -4: chess.Piece(chess.ROOK, chess.BLACK),
   -5: chess.Piece(chess.QUEEN, chess.BLACK),
   -6: chess.Piece(chess.KING, chess.BLACK),
    0: None
}

for tensor in result_tensor_summed:
    board = chess.Board.empty()
    
    for row_idx, row in enumerate(tensor):
        for col_idx, val in enumerate(row):
            piece = layer_to_piece[int(val.item())]
            
            if piece is not None:
                square = chess.square(col_idx, 7 - row_idx)
                board.set_piece_at(square, piece)
    
    boards.append(board)


In [8]:
from functools import lru_cache

@lru_cache(maxsize=None)
def number_to_move(number) -> chess.Move:
    if number < 0 or number > 4095:
        raise ValueError("Number not from range(4095).")
    start_index = number // 64
    end_index = number % 64
    start_row = 8 - (start_index // 8)
    start_column = start_index % 8
    end_row = 8 - (end_index // 8)
    end_column = end_index % 8
    from_square = chess.square(start_column, start_row - 1)
    to_square = chess.square(end_column, end_row - 1)
    move = chess.Move(from_square, to_square)
    return move


In [9]:
class ChessEnv:

    def __init__(self) -> None:
        self.layer_to_piece = {
            1: chess.Piece(chess.PAWN, chess.WHITE),
            2: chess.Piece(chess.KNIGHT, chess.WHITE),
            3: chess.Piece(chess.BISHOP, chess.WHITE),
            4: chess.Piece(chess.ROOK, chess.WHITE),
            5: chess.Piece(chess.QUEEN, chess.WHITE),
            6: chess.Piece(chess.KING, chess.WHITE),
            -1: chess.Piece(chess.PAWN, chess.BLACK),
            -2: chess.Piece(chess.KNIGHT, chess.BLACK),
            -3: chess.Piece(chess.BISHOP, chess.BLACK),
            -4: chess.Piece(chess.ROOK, chess.BLACK),
            -5: chess.Piece(chess.QUEEN, chess.BLACK),
            -6: chess.Piece(chess.KING, chess.BLACK),
            0: None
        }
    
    def reset(self, batch):
        self.boards = self.generate_boards(batch)

    def generate_boards(self, input_tensor) -> list[chess.Board]:
        nonzero_mask = (input_tensor != 0).float()
        multiplied_indices = torch.arange(1, 7, device=input_tensor.device).unsqueeze(-1).unsqueeze(-1)
        result_tensor = input_tensor * multiplied_indices * nonzero_mask
        result_tensor_summed = torch.sum(result_tensor, dim=1)

        boards: list[chess.Board] = []
        for tensor in result_tensor_summed:
            board = chess.Board.empty()
            for row_idx, row in enumerate(tensor):
                for col_idx, val in enumerate(row):
                    piece = layer_to_piece[int(val.item())]
                    if piece is not None:
                        square = chess.square(col_idx, 7 - row_idx)
                        board.set_piece_at(square, piece)
            
            boards.append(board)
        return boards

    @staticmethod
    def generate_tensor(board: chess.Board):
        matrix_board = torch.zeros((6, 8, 8))

        for i in range(8):
            for j in range(8):
                piece = board.piece_at(chess.square(i, j))
                if piece is not None:
                    piece_type = piece.piece_type
                    piece_color = piece.color
                    index = piece_type - 1
                    if piece_color == chess.WHITE:
                        matrix_board[index, 7-j, i] = 1
                    else:
                        matrix_board[index, 7-j, i] = -1

        return matrix_board

    def step(self, actions):
        moves = [number_to_move(action) for action in actions]
        assert len(moves) == len(self.boards)
        corrects: list[bool] = [move in board.legal_moves for board, move in zip(self.boards, moves)]
        self.boards: list[chess.Board] = [
            board.push(move)
            if not board.is_game_over() and correct else board
            for board, move, correct in zip(self.boards, moves, corrects)
        ]

        tensors = []
        for board in boards:
            tensors.append(ChessEnv.generate_tensor(board))
        
        rewards = [
            5 if correct else -2
            for correct in corrects
            ]
        return torch.stack(tensors).to('cuda'), torch.tensor(rewards).to('cuda')
        



In [10]:
def train_model(env, model, optimizer, batch, num_episodes=2, max_steps_per_episode=2):
    batch_log_probs = []
    batch_rewards = []
    state = batch

    for episode in range(num_episodes):

        env.reset(batch)
        log_probs = []
        rewards = []

        for step in range(max_steps_per_episode):
            action = model.get_action(state)
            next_states, reward = env.step(action)
            
            logits = model.forward(state)
            log_probs_batch = torch.log_softmax(logits, dim=-1).gather(1, action.unsqueeze(1)).squeeze(1)
            log_probs.append(log_probs_batch)
            rewards.append(reward)

            state = next_states

        batch_log_probs.append(log_probs)
        batch_rewards.append(rewards)

        model.reinforce_update(batch_log_probs, batch_rewards, optimizer)
        batch_log_probs = []
        batch_rewards = []

        # Wyświetl postęp co pewną liczbę epizodów
        if (episode + 1) % 10 == 0:
            print(f"Episode {episode + 1}/{num_episodes}")

    print("Training finished.")

In [11]:
from torch import optim
tensor = torch.load('X_all.pt')
rl = rlClassifier(6*8*8, 4096, [512, 1024, 2048]).to('cuda')
tensor = tensor[:64]
tensor = tensor.to("cuda")
optimizer = optim.Adam(rl.parameters(), lr=0.001)
env = ChessEnv()

train_model(env, rl, optimizer, tensor)