# **Introduction to AI - Lab 9**

## **Monte Carlo Tree Search (MCTS) and Connect Four**

### Motivation
In this lab, we will explore the Monte Carlo Tree Search (MCTS) algorithm and apply it to the Connect Four game. MCTS is a heuristic search algorithm for decision processes, especially useful in games and simulations.

### Components of MCTS
- **Selection:** Traverse the tree from the root to a leaf node.
- **Expansion:** Expand the leaf node by adding one or more child nodes.
- **Simulation:** Simulate the game from the new node to a terminal state.
- **Backpropagation:** Propagate the simulation results back up the tree to update the nodes.

### Connect Four Game
Connect Four is a two-player game with a 6x7 board where players drop their pieces to form a sequence of 4 horizontally, vertically, or diagonally. The first player to achieve this wins.

## **Task: Implementing MCTS for Connect Four**

### MCTS Algorithm

In [None]:
import numpy as np
import random

class Node:
    def __init__(self, state, parent=None):
        self.state = state
        self.parent = parent
        self.children = []
        self.visits = 0
        self.value = 0

def selection(node):
    while node.children:
        node = max(node.children, key=lambda child: child.value / child.visits + np.sqrt(2 * np.log(node.visits) / child.visits))
    return node

def expansion(node):
    state = node.state
    for action in available_actions(state):
        new_state = apply_action(state, action)
        child_node = Node(new_state, parent=node)
        node.children.append(child_node)

def simulation(node):
    state = node.state
    while not is_terminal(state):
        action = random.choice(available_actions(state))
        state = apply_action(state, action)
    return evaluate(state)

def backpropagation(node, reward):
    while node is not None:
        node.visits += 1
        node.value += reward
        node = node.parent

def mcts(root, iterations):
    for _ in range(iterations):
        node = selection(root)
        if not is_terminal(node.state):
            expansion(node)
            reward = simulation(node)
            backpropagation(node, reward)
    return max(root.children, key=lambda child: child.visits)

def available_actions(state):
    return [i for i in range(9) if state[i] == 0]

def apply_action(state, action):
    new_state = state.copy()
    new_state[action] = 1 if state.count(1) <= state.count(-1) else -1
    return new_state

def is_terminal(state):
    for i in range(3):
        if abs(sum(state[i*3:(i+1)*3])) == 3 or abs(sum(state[i::3])) == 3:
            return True
    if abs(state[0] + state[4] + state[8]) == 3 or abs(state[2] + state[4] + state[6]) == 3:
        return True
    return all(s != 0 for s in state)

def evaluate(state):
    for i in range(3):
        if sum(state[i*3:(i+1)*3]) == 3 or sum(state[i::3]) == 3:
            return 1
        if sum(state[i*3:(i+1)*3]) == -3 or sum(state[i::3]) == -3:
            return -1
    if state[0] + state[4] + state[8] == 3 or state[2] + state[4] + state[6] == 3:
        return 1
    if state[0] + state[4] + state[8] == -3 or state[2] + state[4] + state[6] == -3:
        return -1
    return 0

def get_initial_state():
    return [0] * 9

# Example usage
initial_state = get_initial_state()
root = Node(initial_state)
best_child = mcts(root, iterations=1000)
print("Best action:", best_child.state)

### Connect Four Environment

In [None]:
import numpy as np

class ConnectFour:
    def __init__(self):
        self.board = np.zeros((6, 7), dtype=int)
        self.current_player = 1

    def available_actions(self):
        return [c for c in range(7) if self.board[0, c] == 0]

    def apply_action(self, action):
        for row in range(5, -1, -1):
            if self.board[row, action] == 0:
                self.board[row, action] = self.current_player
                break
        self.current_player = 3 - self.current_player

    def is_terminal(self):
        # Check for a win
        for row in range(6):
            for col in range(7):
                if self.board[row, col] == 0:
                    continue
                if col + 3 < 7 and all(self.board[row, col + i] == self.board[row, col] for i in range(4)):
                    return True
                if row + 3 < 6 and all(self.board[row + i, col] == self.board[row, col] for i in range(4)):
                    return True
                if row + 3 < 6 and col + 3 < 7 and all(self.board[row + i, col + i] == self.board[row, col] for i in range(4)):
                    return True
                if row + 3 < 6 and col - 3 >= 0 and all(self.board[row + i, col - i] == self.board[row, col] for i in range(4)):
                    return True
        # Check for a draw (no more moves available)
        if all(self.board[0, c] != 0 for c in range(7)):
            return True
        return False

    def evaluate(self):
        # Evaluate the board state
        for row in range(6):
            for col in range(7):
                if self.board[row, col] == 0:
                    continue
                # Horizontal
                if col + 3 < 7 and all(self.board[row, col + i] == self.board[row, col] for i in range(4)):
                    return 1 if self.board[row, col] == 1 else -1
                # Vertical
                if row + 3 < 6 and all(self.board[row + i, col] == self.board[row, col] for i in range(4)):
                    return 1 if self.board[row, col] == 1 else -1
                # Diagonal /
                if row + 3 < 6 and col + 3 < 7 and all(self.board[row + i, col + i] == self.board[row, col] for i in range(4)):
                    return 1 if self.board[row, col] == 1 else -1
                # Diagonal \
                if row + 3 < 6 and col - 3 >= 0 and all(self.board[row + i, col - i] == self.board[row, col] for i in range(4)):
                    return 1 if self.board[row, col] == 1 else -1
        # No win, no loss -> return 0
        return 0

def get_initial_state():
    return ConnectFour()

# Example usage
game = get_initial_state()
print("Initial board:\n", game.board)

# Simulate a game (you can replace this with your MCTS implementation)
game.apply_action(3)  # Player 1
game.apply_action(3)  # Player 2
game.apply_action(2)  # Player 1
game.apply_action(2)  # Player 2
game.apply_action(1)  # Player 1
game.apply_action(1)  # Player 2
game.apply_action(0)  # Player 1
game.apply_action(0)  # Player 2
print("Board after a few moves:\n", game.board)

if game.is_terminal():
    print("Game over!")
    result = game.evaluate()
    if result == 1:
        print("Player 1 wins!")
    elif result == -1:
        print("Player 2 wins!")
    else:
        print("It's a draw!")

## **Conclusion**
In this lab, we implemented the Monte Carlo Tree Search algorithm and applied it to the Connect Four game. MCTS is a powerful algorithm used in decision-making processes for games, allowing us to explore and evaluate possible future moves effectively.