<a href="https://colab.research.google.com/github/ryankkien/ChessAI/blob/main/ChessAI.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [25]:
%pip install swig
%pip install gymnasium[box2d]

  and should_run_async(code)


Collecting swig
  Downloading swig-4.1.1-py2.py3-none-manylinux_2_5_x86_64.manylinux1_x86_64.whl (1.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m12.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: swig
Successfully installed swig-4.1.1
Collecting box2d-py==2.3.5 (from gymnasium[box2d])
  Downloading box2d-py-2.3.5.tar.gz (374 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m374.4/374.4 kB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: box2d-py
  Building wheel for box2d-py (setup.py) ... [?25l[?25hdone
  Created wheel for box2d-py: filename=box2d_py-2.3.5-cp310-cp310-linux_x86_64.whl size=2373077 sha256=20fff3c9dc973986ea13658ff344a01e6a0a177212fffccf358fe9514fc1b32e
  Stored in directory: /root/.cache/pip/wheels/db/8f/6a/eaaadf056fba10a98d986f6dce954e6201ba3126926fc5ad9e
Successfully built box2d-py
Installin

In [28]:
import chess
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor
import torch.nn.functional as F
import numpy as np

In [42]:
# Get cpu, gpu or mps device for training.
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

# Define model
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(8*8*13, 832),
            nn.ReLU(),
            nn.Linear(832, 1024),
            nn.ReLU(),
            nn.Linear(1024, 2048),
            nn.ReLU(),
            nn.Linear(2048, 64*64)
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return F.softmax(logits, dim=1)

model = NeuralNetwork().to(device)
print(model)

Using cpu device
NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=832, out_features=832, bias=True)
    (1): ReLU()
    (2): Linear(in_features=832, out_features=1024, bias=True)
    (3): ReLU()
    (4): Linear(in_features=1024, out_features=2048, bias=True)
    (5): ReLU()
    (6): Linear(in_features=2048, out_features=4096, bias=True)
  )
)


In [48]:
import gymnasium as gym
from gymnasium import spaces

In [49]:
def onehot(board):
  board_str = str(board)
  # Convert the string representation to a 2D list without spaces
  piece_to_index = {
    '.': 0, 'P': 1, 'R': 2, 'N': 3, 'B': 4, 'Q': 5, 'K': 6,
    'p': 7, 'r': 8, 'n': 9, 'b': 10, 'q': 11, 'k': 12
  }
  board = [list(row.replace(" ", "")) for row in board_str.strip().split("\n")]

  # One-hot encode the board again
  encoded_board = np.zeros((8, 8, 13), dtype=int)

  for i in range(8):
      for j in range(8):
          piece = board[i][j]
          index = piece_to_index[piece]
          encoded_board[i, j, index] = 1

  return encoded_board

In [54]:
class CustomEnv(gym.Env):
    def __init__(self):
        super(CustomEnv, self).__init__()
        self.board = chess.Board()
        # Define action and observation space
        # They must be gym.spaces objects
        # For example, a discrete action space with 2 actions:
        self.action_space = spaces.Discrete(64*64)

        # Example for using image as observation (3x3 RGB image):
        self.observation_space = spaces.Box(low=0, high=1, shape=(8, 8, 13), dtype=np.uint8)

    def reset(self):
        # Reset the state of the environment and returns an initial observation.
        self.board.reset()
        obs = onehot(board)
        return obs

    def step(self, action):
        # Execute one time step within the environment

        # You should return:
        # - observation (object): agent's observation of the current environment
        # - reward (float) : amount of reward returned after previous action
        # - done (bool): whether the episode has ended
        # - info (dict): contains auxiliary diagnostic information (helpful for debugging, and sometimes learning)
        move = self.action_to_move(action)

        if move in self.board.legal_moves:
            self.board.push(move)
            reward = self.compute_reward(move)  # Compute reward after making the move
        else:
            reward = -9999  # Negative reward for illegal move; this value can be adjusted
        obs = onehot(self.board)
        done = self.board.is_stalemate() or self.board.is_checkmate() or self.board.is_fivefold_repetition() or self.board.is_seventyfive_moves()
        info = 0
        return obs, reward, done, info

    def action_to_move(self, action):
        # Convert the discrete action (0 to 4095) to a chess move
        # This is a basic example and may need refinement
        from_square = chess.SQUARES[action // 64]
        to_square = chess.SQUARES[action % 64]
        return chess.Move(from_square, to_square)

    def render(self, mode='human'):
        print(self.board)

    def close(self):
        return

    def is_capture_move(self, board, move):

    # Get the piece at the target square of the move
      target_piece = board.piece_at(move.to_square)

    # If the target square has a piece and it's not of the same color as the moving piece, it's a capture
      return target_piece is not None and target_piece.color != board.turn

    def compute_reward(self, move):
        val = {
              '.': 0,
              'P': -1, 'p': 1,
              'R': -5, 'r': 5,
              'N': -3, 'n': 3,
              'B': -3, 'b': 3,
              'Q': -9, 'q': 9,
              'K': 999, 'k': 999
          }
        if self.is_capture_move(self.board, move):
          square_name = move
          square = chess.parse_square(square_name)
          piece = self.board.piece_at(square)
          return val[str(piece)]
        return 0

In [46]:
import torch.optim as optim

# Initialize environment and neural network
env = CustomEnv()
policy_net = NeuralNetwork()
optimizer = optim.Adam(policy_net.parameters(), lr=0.001)

num_episodes = 1000

for episode in range(num_episodes):
    state = env.reset()
    done = False

    while not done:
        # Convert state to tensor
        state_tensor = torch.FloatTensor(state).unsqueeze(0)

        # Get action values from the neural network
        with torch.no_grad():
            action_values = policy_net(state_tensor)

        # Select an action (e.g., using epsilon-greedy)
        action = torch.argmax(action_values).item()  # This is a basic greedy action selection

        # Step the environment
        next_state, reward, done, _ = env.step(action)

        # Here, add your RL training code (e.g., compute loss and backpropagate)

        state = next_state


KeyboardInterrupt: ignored