In [None]:
class BaghchalBoard:
    def __init__(self):
        # Initialize a 5x5 grid for the Baghchal board
        self.board = [['.' for _ in range(5)] for _ in range(5)]
        # Place the initial position of the tigers and goats on the board
        self.board[0][0] = 'T'
        self.board[0][4] = 'T'
        self.board[4][0] = 'T'
        self.board[4][4] = 'T'

        self.valid_move = False
        self.goat_killed= False
        self.goats_placed = 0  # Keep track of the number of goats placed
        self.goats_remaining = 20  # Keep track of the number of goats remaining in board
        self.turn = 'G'  # 'T' for Tiger, 'G' for Goat
        self.state_history=[]
        self.n= 5

    def display_board(self):
        # Display the current state of the Baghchal board
        for row in self.board:
            print(' '.join(row))
        print()

    def is_valid_move(self, start_row,start_col,end_row,end_col):


        piece= self.board[start_row][start_col]

        # Function to check if a move is valid for a specific piece

        # Check if the end position is out of bounds
        #f not (0 <= end_row < 5 and 0 <= end_col < 5):
            #eturn False


        if self.board[end_row][end_col] != '.':
            return False

        if self.goats_placed<20 and self.turn=="G" and piece!='.':
            return False
        if self.goats_placed<20 and self.turn=="T"and piece!='T':
            return False

        if self.goats_placed==20 and self.board[start_row][start_col]!='self.turn':
            return False




        # Perform piece-specific move validation
        if self.turn == 'T':
            return self.validate_tiger_move(start_row, start_col, end_row, end_col)
        if self.turn == 'G':
            return self.validate_goat_move(start_row, start_col, end_row, end_col)


    def is_even(self,number):
         return number % 2 == 0




    def validate_tiger_move(self, start_row, start_col, end_row, end_col):

        # Function to validate tiger move


            #Tigers can move one square horizontally or vertically.
            if abs(start_row - end_row) == 1 and start_col == end_col:
                return True
            if self.is_even(start_row+start_col):
                if abs(start_row - end_row) == 1 and abs(start_col - end_col) == 1:
                    return True

            if abs(start_col - end_col) == 1 and start_row == end_row:
                return True

            # Check for jump move (over goat) in all diagonal directions
            if self.is_even(start_row+start_col): # to avoid the diagonal move if the positional sum is odd
                if abs(start_row - end_row) == 2 and abs(start_col - end_col) == 2:

                    jump_row =(start_row + end_row) // 2
                    jump_col =(start_col + end_col) // 2

                    if self.board[jump_row][jump_col] == 'G':
                        return True



            if abs(start_row - end_row) == 2 and start_col ==end_col:
                jump_row = (start_row + end_row) // 2
                jump_col = start_col

                if self.board[jump_row][jump_col] == 'G':
                    return True

            if start_row ==end_row and abs(start_col - end_col) == 2:
                jump_row = start_row
                jump_col = (start_col + end_col) // 2

                if self.board[jump_row][jump_col] == 'G':
                    return True


            #print('Invalid tiger move')
            return False

    def validate_goat_move(self, start_row, start_col, end_row, end_col):


        # Function to validate goat move

        # Goats can move one square horizontally or vertically or diagonally.
        if abs(start_row - end_row) == 1 and start_col == end_col:
            return True
        if abs(start_col - end_col) == 1 and start_row == end_row:
            return True

        if self.is_even(start_row+start_col):
            if abs(start_col - end_col) == 1 and abs(start_row - end_row) == 1:
                return True

        if self.goats_placed<20 and self.turn=="G" and start_row==end_row and start_col==end_col:
            return True


        #print('invalid goat move')
        return False


    def make_move(self, start_row,start_col,end_row, end_col):


        # Function to make a move on the Baghchal board
         # Get the piece at the start position
        piece= self.board[start_row][start_col]
        if self.is_valid_move(start_row,start_col,end_row,end_col):
                self. valid_move = True
                # If it's the goat's turn and goats are available to be placed, perform placement
                if  self.goats_placed<20 and self.turn=='G':
                    self.board[end_row][end_col] = 'G'
                    self.goats_placed += 1


                # Perform the move for tiger or goat that can move
                self.board[start_row][start_col] = '.'
                self.board[end_row][end_col] = self.turn


                 # Remove the goat if the tiger captured it
                if piece == 'T':
                    jump_row = (start_row + end_row) // 2
                    jump_col = (start_col + end_col) // 2
                    if self.board[jump_row][jump_col] == 'G':
                        self.board[jump_row][jump_col] = '.'
                        self.goat_killed = True

                    else:
                        self.goat_killed = False



                # Switch turn to the other player
                self.turn = 'T' if self.turn == 'G' else 'G'
        else :
            self.valid_move=False


    def check_loop(self):
        # Function to check for loops in the last n game states

        # Get the current board state
        current_state = tuple(tuple(row) for row in self.board)

        # Check if the current state has been encountered in the last n states
        if current_state in self.state_history:
            return True

        # Add the current state to the state history list
        self.state_history.append(current_state)

        # If the list size exceeds n, remove the oldest state
        if len(self.state_history) > self.n:
            self.state_history.pop(0)

        return False

    def check_win_conditions(self):



        # Check if tigers have no valid moves (Goats win)
        valid_tiger_moves =  any(self.is_valid_move(row, col, new_row, new_col)
                                for row in range(5) for col in range(5)
                                for new_row in range(max(0, row - 2), min(5, row + 3))
                                for new_col in range(max(0, col - 2), min(5, col + 3)))





        if not valid_tiger_moves:
            return 'Goat'


        # Check if goats have no valid moves (Tigers win)
        valid_goat_moves =  any(self.is_valid_move( row, col, new_row, new_col)
                                for row in range(5) for col in range(5)
                                for new_row in range(max(0, row - 1), min(5, row + 2))
                                for new_col in range(max(0, col - 1), min(5, col + 2)))


        if not valid_goat_moves:
            return 'Tiger'

        # Check if the goats are reduced to less than or equal to 15 (Tigers win)
        goats_remaining = sum([obj == 'G' for row in self.board for obj in row])
        if goats_remaining <= 15 and self.goats_placed == 20:
            return 'Tiger'

        # If no win condition is met, return None (game ongoing)






In [None]:
!pip install gymnasium

Collecting gymnasium
  Downloading gymnasium-0.29.1-py3-none-any.whl (953 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m953.9/953.9 kB[0m [31m8.2 MB/s[0m eta [36m0:00:00[0m
Collecting farama-notifications>=0.0.1 (from gymnasium)
  Downloading Farama_Notifications-0.0.4-py3-none-any.whl (2.5 kB)
Installing collected packages: farama-notifications, gymnasium
Successfully installed farama-notifications-0.0.4 gymnasium-0.29.1


In [None]:
import  gymnasium as gym

from gym import spaces
import numpy as np
# initially we are considering to train tiger as the agent
# Define the BaghchalEnv class that inherits from gym.Env
class BaghchalEnv(gym.Env):
    def __init__(self):
        super().__init__()

        # Initialize the BaghchalBoard
        self.board = BaghchalBoard()

        # Define the action and observation space
        self.observation_space = spaces.MultiDiscrete([3]*25)  # 5x5 grid with values 0, 1, 2


        # Define the action space
        self.action_space = gym.spaces.Discrete(625) # Each component can be 0 to 4

        # Initialize the game
        self.reset()


    def reset(self):
        self.board.__init__()  # Reset the BaghchalBoard
        return self.get_observation()

    def step(self, action):
        start_row, start_col, end_row, end_col = action
        if self.board.turn == 'G':  # If it's the Goat's turn
            goat_move = self.minimax('G', depth=3)  # Adjust depth as needed
            self.board.make_move(goat_move[0], goat_move[1], goat_move[2], goat_move[3])
        else:
            self.board.make_move(start_row, start_col, end_row, end_col)

        observation = self.get_observation()
        reward = self.get_reward()
        done = self.is_game_over()

        return observation, reward, done, {}

    def render(self, mode='human'):
        self.board.display_board()




    def get_observation(self):

      observation = np.zeros((25,), dtype=int)  # Flattened 1D array
      for row in range(5):
          for col in range(5):
              piece = self.board.board[row][col]
              if piece == 'T':
                  observation[row * 5 + col] = 1
              elif piece == 'G':
                  observation[row * 5 + col] = 2
              elif piece == '.':
                  observation[row * 5 + col] = 0
      return observation.tolist()  # Convert the NumPy array to a Python list










    def get_reward(self): # these are hyperparameters ,, need to assign wisely
        win_condition = self.board.check_win_conditions()
        if self.board.valid_move is  False :
            return -1
        if win_condition == 'Tiger':
            return 100
        if win_condition == 'Goat':
            return -100
        if self.board.goat_killed is True :
            return 10

        if self.board.check_loop() is True :
            return -5


    def is_game_over(self):
        return self.board.check_win_conditions() in  ['Tiger', 'Goat']




    def minimax(self, player, depth, alpha=-float('inf'), beta=float('inf')):
        if depth == 0 or self.board.check_win_conditions() is not None:
            return None, None, None, None, self.evaluate_board()

        if player == 'G':
            max_eval = -float('inf')
            best_move = None
            for start_row in range(5):
                for start_col in range(5):
                    for end_row in range(5):
                        for end_col in range(5):
                            if self.board.is_valid_move(start_row, start_col, end_row, end_col):
                                self.board.make_move(start_row, start_col, end_row, end_col)
                                eval = self.minimax('T', depth - 1, alpha, beta)[4]
                                self.board.undo_move(start_row, start_col, end_row, end_col)
                                if eval > max_eval:
                                    max_eval = eval
                                    best_move = (start_row, start_col, end_row, end_col)
                                alpha = max(alpha, eval)
                                if beta <= alpha:
                                    break
            return best_move

        else:
            min_eval = float('inf')
            best_move = None
            for start_row in range(5):
                for start_col in range(5):
                    for end_row in range(5):
                        for end_col in range(5):
                            if self.board.is_valid_move(start_row, start_col, end_row, end_col):
                                self.board.make_move(start_row, start_col, end_row, end_col)
                                eval = self.minimax('G', depth - 1, alpha, beta)[4]
                                self.board.undo_move(start_row, start_col, end_row, end_col)
                                if eval < min_eval:
                                    min_eval = eval
                                    best_move = (start_row, start_col, end_row, end_col)
                                beta = min(beta, eval)
                                if beta <= alpha:
                                    break
            return best_move

    def evaluate_board(self):
        # Evaluation function to score the current board state
        goats_on_board = sum([1 for row in self.board.board for obj in row if obj == 'G'])
        tigers_on_board = sum([1 for row in self.board.board for obj in row if obj == 'T'])

        # A simple scoring function
        score = tigers_on_board - goats_on_board

        return score



In [None]:
pip install 'shimmy>=0.2.1'

  and should_run_async(code)


Collecting shimmy>=0.2.1
  Downloading Shimmy-1.2.1-py3-none-any.whl (37 kB)
Installing collected packages: shimmy
Successfully installed shimmy-1.2.1


In [None]:
pip install stable_baselines3

Collecting stable_baselines3
  Downloading stable_baselines3-2.1.0-py3-none-any.whl (178 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/178.7 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━[0m [32m92.2/178.7 kB[0m [31m2.6 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m178.7/178.7 kB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: stable_baselines3
Successfully installed stable_baselines3-2.1.0


In [None]:
import stable_baselines3

from stable_baselines3 import DQN
from stable_baselines3.common.vec_env import DummyVecEnv

env = BaghchalEnv()
env = DummyVecEnv([lambda: env])

model = DQN("MlpPolicy", env, verbose=1)


  if not hasattr(tensorboard, "__version__") or LooseVersion(


Using cuda device


NotImplementedError: ignored

In [None]:
fmodel.learn(total_timesteps=10000)


In [None]:
model.save("baghchal_dqn_model")


In [None]:
loaded_model = DQN.load("baghchal_dqn_model")

for _ in range(10):
    obs = env.reset()
    done = False
    while not done:
        action, _ = loaded_model.predict(obs)
        obs, reward, done, _ = env.step(action)
        env.render()


In [None]:
import gym
import numpy as np
from stable_baselines3 import DQN

# Create the Baghchal environment
env = BaghchalEnv()

# Create a DQN model
model = DQN("MlpPolicy", env, verbose=1)

# Train the agent
total_timesteps = 100000
model.learn(total_timesteps=total_timesteps)

# Save the trained model
model.save("dqn_baghchal")

# Test the trained agent
obs = env.reset()
for _ in range(10):
    action, _ = model.predict(obs)
    obs, reward, done, _ = env.step(action)  # Pass action as a single value, not in a list
    env.render()
    if done:
        obs = env.reset()

# Close the environment
env.close()


In [None]:
!pip install tensor
!pip install matplotlib
!pip install stable_baselines3

In [None]:
import tensorflow
import stable_baselines3
import matplotlib.pyplot as plt
from stable_baselines3.common.vec_env import DummyVecEnv

from stable_baselines3 import DQN


from stable_baselines3.common.monitor import Monitor
# Create the Baghchal environment
env = BaghchalEnv()
env = Monitor(env, "./logs", info_keywords=("episode_reward",))  # Wrap with Monitor
env = DummyVecEnv([lambda: env])
# Hyperparameters
hyperparameters = {
    "learning_rate": 0.001,
    "buffer_size": 1000,
    "batch_size": 64,
    "gamma": 0.95,
    "target_update_freq": 100,
    "exploration_fraction": 0.1,
    "exploration_initial_eps": 1.0,
    "exploration_final_eps": 0.05,
    "learning_starts": 1000,
    "total_timesteps": int(1e5),
    "policy": "MlpPolicy",
    "optimizer_class": "Adam",
    "target_update_tau": 0.995,
}

# Track rewards
episode_rewards = []

# Train and track rewards
for episode in range(hyperparameters["total_timesteps"]):

    # Create and train the DQN agent with TensorBoard logging
    model = DQN(hyperparameters["policy"], env, verbose=1,
                learning_rate=hyperparameters["learning_rate"],
                buffer_size=hyperparameters["buffer_size"],
                batch_size=hyperparameters["batch_size"],
                gamma=hyperparameters["gamma"],
                target_update_interval=hyperparameters["target_update_tau"],
                exploration_fraction=hyperparameters["exploration_fraction"],
                exploration_initial_eps=hyperparameters["exploration_initial_eps"],
                exploration_final_eps=hyperparameters["exploration_final_eps"],
                learning_starts=hyperparameters["learning_starts"],
                tensorboard_log="./logs")  # Enable TensorBoard logging

    # Train the agent
    model.learn(total_timesteps=hyperparameters["total_timesteps"])

    # After the episode, append the episode reward to the list
    episode_rewards.append(episode_rewards)

    # Plot rewards
    plt.plot(episode_rewards)
    plt.xlabel("Episode")
    plt.ylabel("Reward")
    plt.title("Training Performance")
    plt.show()

    # Use TensorBoard to visualize training performance
    # Run this command in a code cell to launch TensorBoard:
    # %tensorboard --logdir ./logs
    %tensorboard --logdir ./logs

mean_reward, _ = evaluate_policy(model, env, n_eval_episodes=10)
print(f"Mean reward: {mean_reward:.2f}")
