In [None]:
import chess

class ChessGame:
    def __init__(self):
        self.board = chess.Board()
        self.player = chess.WHITE

    def play(self):
        while not self.board.is_game_over():
            print(self.board)
            try:
                move = input("Enter move (e.g. e2e4): ")
                move = chess.Move.from_uci(move)
                if move in self.board.legal_moves:
                    self.board.push(move)
                    self.player = not self.player
                else:
                    print("Invalid move. Please try again.")
            except KeyboardInterrupt:
                print("Game interrupted.")
                break

        print(self.board.result())

game = ChessGame()
game.play()


In [2]:
import numpy as np
import chess
import random

class QLearningAgent:
    def __init__(self, alpha=0.5, epsilon=0.1, discount=0.9):
        self.alpha = alpha # learning rate
        self.epsilon = epsilon # exploration rate
        self.discount = discount # discount factor
        self.q_values = {}

    def get_q_value(self, state, action):
        if (state, action) not in self.q_values:
            self.q_values[(state, action)] = 0.0
        return self.q_values[(state, action)]

    def choose_action(self, state, legal_actions):
        if np.random.uniform(0, 1) < self.epsilon:
            return random.choice(legal_actions)
        else:
            q_values = [self.get_q_value(state, action) for action in legal_actions]
            max_q_value = max(q_values)
            best_actions = [action for action, q_value in zip(legal_actions, q_values) if q_value == max_q_value]
            return random.choice(best_actions)

    def update(self, state, action, reward, next_state):
        q_value = self.get_q_value(state, action)
        next_q_values = [self.get_q_value(next_state, next_action) for next_action in chess.Board(next_state).legal_moves]
        if next_q_values:
            max_next_q_value = max(next_q_values)
        else:
            max_next_q_value = 0.0
        new_q_value = q_value + self.alpha * (reward + self.discount * max_next_q_value - q_value)
        self.q_values[(state, action)] = new_q_value

class ChessGame:
    def __init__(self, agent):
        self.board = chess.Board()
        self.agent = agent

    def play(self):
        while not self.board.is_game_over():
            state = self.board.fen()
            legal_actions = [move.uci() for move in self.board.legal_moves]
            action = self.agent.choose_action(state, legal_actions)
            move = chess.Move.from_uci(action)
            self.board.push(move)
            reward = self.get_reward()
            next_state = self.board.fen()
            self.agent.update(state, action, reward, next_state)

    def get_reward(self):
        result = self.board.result()
        if result == "1-0":
            return 1.0
        elif result == "0-1":
            return -1.0
        else:
            return 0.0

agent = QLearningAgent()
game = ChessGame(agent)
game.play()


In [3]:
import numpy as np
import chess
import random

class QLearningAgent:
    def __init__(self, alpha=0.5, epsilon=0.1, discount=0.9):
        self.alpha = alpha # learning rate
        self.epsilon = epsilon # exploration rate
        self.discount = discount # discount factor
        self.q_values = {}

    def get_q_value(self, state, action):
        if (state, action) not in self.q_values:
            self.q_values[(state, action)] = 0.0
        return self.q_values[(state, action)]

    def choose_action(self, state, legal_actions):
        if np.random.uniform(0, 1) < self.epsilon:
            return random.choice(legal_actions)
        else:
            q_values = [self.get_q_value(state, action) for action in legal_actions]
            max_q_value = max(q_values)
            best_actions = [action for action, q_value in zip(legal_actions, q_values) if q_value == max_q_value]
            return random.choice(best_actions)

    def update(self, state, action, reward, next_state):
        q_value = self.get_q_value(state, action)
        next_q_values = [self.get_q_value(next_state, next_action) for next_action in chess.Board(next_state).legal_moves]
        if next_q_values:
            max_next_q_value = max(next_q_values)
        else:
            max_next_q_value = 0.0
        new_q_value = q_value + self.alpha * (reward + self.discount * max_next_q_value - q_value)
        self.q_values[(state, action)] = new_q_value

class ChessGame:
    def __init__(self, agent):
        self.board = chess.Board()
        self.agent = agent

    def play(self):
        while not self.board.is_game_over():
            print(self.board)
            if self.board.turn == chess.WHITE:
                state = self.board.fen()
                legal_actions = [move.uci() for move in self.board.legal_moves]
                action = self.agent.choose_action(state, legal_actions)
                move = chess.Move.from_uci(action)
                self.board.push(move)
                print("Agent plays: " + str(move))
                reward = self.get_reward()
                next_state = self.board.fen()
                self.agent.update(state, action, reward, next_state)
            else:
                move = input("Enter move (e.g. e2e4): ")
                move = chess.Move.from_uci(move)
                if move in self.board.legal_moves:
                    self.board.push(move)
                    print("Player plays: " + str(move))
                else:
                    print("Invalid move. Please try again.")

        print(self.board.result())

    def get_reward(self):
        result = self.board.result()
        if result == "1-0":
            return 1.0
        elif result == "0-1":
            return -1.0
        else:
            return 0.0

agent = QLearningAgent()
game = ChessGame(agent)
game.play()


r n b q k b n r
p p p p p p p p
. . . . . . . .
. . . . . . . .
. . . . . . . .
. . . . . . . .
P P P P P P P P
R N B Q K B N R
Agent plays: a2a4
r n b q k b n r
p p p p p p p p
. . . . . . . .
. . . . . . . .
P . . . . . . .
. . . . . . . .
. P P P P P P P
R N B Q K B N R
Invalid move. Please try again.
r n b q k b n r
p p p p p p p p
. . . . . . . .
. . . . . . . .
P . . . . . . .
. . . . . . . .
. P P P P P P P
R N B Q K B N R


InvalidMoveError: expected uci string to be of length 4 or 5: ''

In [27]:
import numpy as np
import chess
import random
import pickle

class QLearningAgent:
    def __init__(self, alpha=0.4, epsilon=1, discount=0.9):
        self.alpha = alpha # learning rate
        self.epsilon = epsilon # exploration rate
        self.discount = discount # discount factor
        self.q_values = {}

    def get_q_value(self, state, action):
        if (state, action) not in self.q_values:
            self.q_values[(state, action)] = 0.0
        return self.q_values[(state, action)]

    def choose_action(self, state, legal_actions):
        if np.random.uniform(0, 1) < self.epsilon:
            return random.choice(legal_actions)
        else:
            q_values = [self.get_q_value(state, action) for action in legal_actions]
            max_q_value = max(q_values)
            best_actions = [action for action, q_value in zip(legal_actions, q_values) if q_value == max_q_value]
            return random.choice(best_actions)

    def update(self, state, action, reward, next_state):
        q_value = self.get_q_value(state, action)
        next_q_values = [self.get_q_value(next_state, next_action) for next_action in chess.Board(next_state).legal_moves]
        if next_q_values:
            max_next_q_value = max(next_q_values)
        else:
            max_next_q_value = 0.0
        new_q_value = q_value + self.alpha * (reward + self.discount * max_next_q_value - q_value)
        self.q_values[(state, action)] = new_q_value

    def save_model(self, filename):
        with open(filename, 'wb') as f:
            pickle.dump(self.q_values, f)

    def load_model(self, filename):
        with open(filename, 'rb') as f:
            self.q_values = pickle.load(f)

# class ChessGame:
#     def __init__(self, white_agent, black_agent):
#         self.board = chess.Board()
#         self.white_agent = white_agent
#         self.black_agent = black_agent

#     def play(self):
#         while not self.board.is_game_over():
#             print(self.board)
#             if self.board.turn == chess.WHITE:
#                 state = self.board.fen()
#                 legal_actions = [move.uci() for move in self.board.legal_moves]
#                 action = self.white_agent.choose_action(state, legal_actions)
#                 move = chess.Move.from_uci(action)
#                 self.board.push(move)
#                 print("White agent plays: " + str(move))
#                 reward = self.get_reward()
#                 next_state = self.board.fen()
#                 self.white_agent.update(state, action, reward, next_state)
#             else:
#                 state = self.board.fen()
#                 legal_actions = [move.uci() for move in self.board.legal_moves]
#                 action = self.black_agent.choose_action(state, legal_actions)
#                 move = chess.Move.from_uci(action)
#                 self.board.push(move)
#                 print("Black agent plays: " + str(move))
#                 reward = self.get_reward()
#                 next_state = self.board.fen()
#                 self.black_agent.update(state, action, reward, next_state)

#         print(self.board.result())
#         self.white_agent.save_model('white_agent_model.pkl')
#         self.black_agent.save_model('black_agent_model.pkl')
class ChessGame:
    def __init__(self, white_agent, black_agent):
        self.board = chess.Board()
        self.white_agent = white_agent
        self.black_agent = black_agent

    def play(self, num_rounds):
        black_n=0
        white_n=0
        draw_n=0
        for i in range(num_rounds):
            print(f"Round {i+1}/{num_rounds}")

            self.board.reset()

            while not self.board.is_game_over():
                if self.board.turn == chess.WHITE:
                    current_agent = self.white_agent
                else:
                    current_agent = self.black_agent

                legal_moves = list(self.board.legal_moves)
                current_state = self.board.fen()
                action = current_agent.choose_action(current_state, legal_moves)
                self.board.push(action)
                next_state = self.board.fen()

                if self.board.is_checkmate():
                    if self.board.turn == chess.WHITE:
                        reward = -1.0
                        winner = "black"
                        black_n=black_n+1
                    else:
                        reward = 1.0
                        winner = "white"
                        white_n=white_n+1
                elif self.board.is_stalemate() or self.board.is_insufficient_material() : #or self.board.is_seventyfive_moves():
                    reward = 0.0
                    winner = "draw"
                    draw_n=draw_n+1
                    
                else:
                    reward = None
                    winner = None

                if reward is not None:
                    current_agent.update(current_state, action, reward, next_state)

            self.white_agent.save_model(f"white_agent_round.pkl")
            self.black_agent.save_model(f"black_agent_round.pkl")

            print(f"Winner: {winner}")
        print(f"black: {black_n} White: {white_n} Draw: {draw_n}")



    def get_reward(self):
        result = self.board.result()
        if result == "1-0":
            return 1.0
        elif result == "0-1":
            return -1.0
        else:
            return 0.0
       
white_agent = QLearningAgent()
black_agent = QLearningAgent()

game = ChessGame(white_agent, black_agent)
game.play(500)

Round 1/500
Winner: None
Round 2/500
Winner: None
Round 3/500
Winner: draw
Round 4/500
Winner: white
Round 5/500
Winner: draw
Round 6/500
Winner: white
Round 7/500
Winner: draw
Round 8/500
Winner: draw
Round 9/500
Winner: draw
Round 10/500
Winner: draw
Round 11/500
Winner: draw
Round 12/500
Winner: draw
Round 13/500
Winner: None
Round 14/500
Winner: draw
Round 15/500
Winner: draw
Round 16/500
Winner: draw
Round 17/500
Winner: draw
Round 18/500
Winner: draw
Round 19/500
Winner: draw
Round 20/500
Winner: white
Round 21/500
Winner: draw
Round 22/500
Winner: black
Round 23/500
Winner: draw
Round 24/500
Winner: draw
Round 25/500
Winner: draw
Round 26/500
Winner: draw
Round 27/500
Winner: white
Round 28/500
Winner: draw
Round 29/500
Winner: draw
Round 30/500
Winner: black
Round 31/500
Winner: draw
Round 32/500
Winner: draw
Round 33/500
Winner: draw
Round 34/500
Winner: draw
Round 35/500
Winner: draw
Round 36/500
Winner: draw
Round 37/500
Winner: black
Round 38/500
Winner: draw
Round 39/500
W

In [23]:
import chess
import pickle


import random

class RLAgent:
    def __init__(self, alpha=0.4, epsilon=1, discount=0.9):
        self.Q = {}
        self.epsilon = epsilon
        self.alpha = alpha
        self.gamma = discount
        self.prev_state = None
        self.prev_action = None

    def get_Q(self, state, action):
        if state not in self.Q:
            self.Q[state] = {}
        if action not in self.Q[state]:
            self.Q[state][action] = 0.0
        return self.Q[state][action]

    def choose_action(self, state, legal_moves):
        if random.uniform(0, 1) < self.epsilon:
            action = random.choice(legal_moves)
        else:
            q_values = [self.get_Q(state, a) for a in legal_moves]
            max_q = max(q_values)
            if q_values.count(max_q) > 1:
                best_moves = [m for m in legal_moves if self.get_Q(state, m) == max_q]
                action = random.choice(best_moves)
            else:
                index = q_values.index(max_q)
                action = legal_moves[index]

        self.prev_state = state
        self.prev_action = action
        return action

    def update_Q(self, reward, new_state):
        if self.prev_state is not None:
            old_q = self.get_Q(self.prev_state, self.prev_action)
            max_q = max([self.get_Q(new_state, a) for a in self.get_legal_moves(new_state)])
            self.Q[self.prev_state][self.prev_action] += self.alpha * (reward + self.gamma * max_q - old_q)

    def get_legal_moves(self, state):
        board = chess.Board(state)
        return list(board.legal_moves)


class ChessGame:
    def __init__(self):
        self.board = chess.Board()

    def play_game(self, white_model_file, black_model_file):
        # Load models
        with open(white_model_file, "rb") as f:
            white_model = pickle.load(f)
        with open(black_model_file, "rb") as f:
            black_model = pickle.load(f)

        while not self.board.is_game_over():
            if self.board.turn == chess.WHITE:
                agent=RLAgent()
                agent.Q=white_model
                current_agent = agent
            else:
                agent=RLAgent()
                agent.Q=black_model
                current_agent = agent

            legal_moves = list(self.board.legal_moves)
            current_state = self.board.fen()

            action = current_agent.choose_action(current_state, legal_moves)
            self.board.push(action)

            print(self.board)
            print("")

        if self.board.result() == "1-0":
            print("White wins!")
        elif self.board.result() == "0-1":
            print("Black wins!")
        else:
            print("Draw!")
game = ChessGame( )
game.play_game('white_agent_round.pkl',  'black_agent_round.pkl'               )

r n b q k b n r
p p p p p p p p
. . . . . . . .
. . . . . . . .
. P . . . . . .
. . . . . . . .
P . P P P P P P
R N B Q K B N R

r n b q k b n r
p p p . p p p p
. . . p . . . .
. . . . . . . .
. P . . . . . .
. . . . . . . .
P . P P P P P P
R N B Q K B N R

r n b q k b n r
p p p . p p p p
. . . p . . . .
. . . . . . . .
. P . . . . . .
. . . P . . . .
P . P . P P P P
R N B Q K B N R

r n b q k b n r
. p p . p p p p
p . . p . . . .
. . . . . . . .
. P . . . . . .
. . . P . . . .
P . P . P P P P
R N B Q K B N R

r n b q k b n r
. p p . p p p p
p . . p . . . .
. . . . . . . .
. P . . . . . .
. . P P . . . .
P . . . P P P P
R N B Q K B N R

r n b q k b . r
. p p . p p p p
p . . p . n . .
. . . . . . . .
. P . . . . . .
. . P P . . . .
P . . . P P P P
R N B Q K B N R

r n b q k b . r
. p p . p p p p
p . . p . n . .
. . . . . . . .
. P . . . . . .
. . P P . N . .
P . . . P P P P
R N B Q K B . R

r n b q k b . r
. p p . . p p p
p . . p . n . .
. . . . p . . .
. P . . . . . .
. . P P . N . .
P