In [None]:
Create a board of Connect4 first

In [30]:
from collections import Counter
import numpy as np
NUM_COLUMNS = 7
COLUMN_HEIGHT = 6
FOUR = 4

In [31]:
def valid_moves(board):
    """Returns columns where a disc may be played"""
    return [n for n in range(NUM_COLUMNS) if board[n, COLUMN_HEIGHT - 1] == 0]


def play(board, column, player):
    """Updates `board` as `player` drops a disc in `column`"""
    (index,) = next((i for i, v in np.ndenumerate(board[column]) if v == 0))
    board[column, index] = player


def take_back(board, column):
    """Updates `board` removing top disc from `column`"""
    (index,) = [i for i, v in np.ndenumerate(board[column]) if v != 0][-1]
    board[column, index] = 0


def four_in_a_row(board, player):
    """Checks if `player` has a 4-piece line"""
    return (
        any(
            all(board[c, r] == player)
            for c in range(NUM_COLUMNS)
            for r in (list(range(n, n + FOUR)) for n in range(COLUMN_HEIGHT - FOUR + 1))
        )
        or any(
            all(board[c, r] == player)
            for r in range(COLUMN_HEIGHT)
            for c in (list(range(n, n + FOUR)) for n in range(NUM_COLUMNS - FOUR + 1))
        )
        or any(
            np.all(board[diag] == player)
            for diag in (
                (range(ro, ro + FOUR), range(co, co + FOUR))
                for ro in range(0, NUM_COLUMNS - FOUR + 1)
                for co in range(0, COLUMN_HEIGHT - FOUR + 1)
            )
        )
        or any(
            np.all(board[diag] == player)
            for diag in (
                (range(ro, ro + FOUR), range(co + FOUR - 1, co - 1, -1))
                for ro in range(0, NUM_COLUMNS - FOUR + 1)
                for co in range(0, COLUMN_HEIGHT - FOUR + 1)
            )
        )
    )

def _mc(board, player):
    p = -player
    while valid_moves(board):
        p = -p
        c = np.random.choice(valid_moves(board))
        play(board, c, p)
        if four_in_a_row(board, p):
            return p
    return 0


def montecarlo(board, player):
    montecarlo_samples = 100
    cnt = Counter(_mc(np.copy(board), player) for _ in range(montecarlo_samples))
    return (cnt[1] - cnt[-1]) / montecarlo_samples


def eval_board(board, player):
    if four_in_a_row(board, 1):
        # Alice won
        return 1
    elif four_in_a_row(board, -1):
        # Bob won
        return -1
    else:
        # Not terminal, let's simulate...
        return montecarlo(board, player)

In [32]:
PLAYERS = {1: "A", -1: "B"}
MAX_ROUNDS = NUM_COLUMNS * COLUMN_HEIGHT
NUM_ITERATIONS = 10
def initialize_board():
    return np.zeros((NUM_COLUMNS, COLUMN_HEIGHT), dtype=np.byte)
    
def display(board):
    for j in reversed(range(COLUMN_HEIGHT)):
        for i in range(NUM_COLUMNS):
            cell = board[i][j]
            if cell == 1:
                print(PLAYERS[1], end=" ")
            elif cell == -1:
                print(PLAYERS[-1], end=" ")
            else:
                print("-", end=" ")
        print()


def round_number(board):
    return np.count_nonzero(board)

def terminal_state(board):
    if round_number(board) == MAX_ROUNDS:    # draw
        return 0
    if four_in_a_row(board, 1):
        return 1
    elif four_in_a_row(board, -1):
        return -1
    else:
        return None

In [25]:
#set monte-carlo tree as the default opponent
from __future__ import annotations

class Node:
    def __init__(self, board: np.ndarray, player: int, parent: Node = None, move: int = None):
        self.board = np.copy(board)
        self.player = player    # player who did the previous move
        self.parent = parent
        self.move = move        # previous move that brought in this state
        self.num_visits = 0
        self.num_wins = 0
        self.children = []
        self.next_moves = valid_moves(board)

    def selection(self):
        def UCB1(node):
            c = np.sqrt(2)
            exploitation = node.num_wins / node.num_visits
            exploration = c * np.sqrt(np.log(node.parent.num_visits) / node.num_visits)
            return exploitation + exploration
        
        return max(self.children, key=UCB1)

    def expand(self, move):
        player = -self.player     
        new_board = np.copy(self.board)
        play(new_board, move, player)
        self.next_moves.remove(move)
        child = Node(new_board, player, self, move)
        self.children.append(child)
        return child
    def simulate(self):
        p = -self.player
        board = np.copy(self.board)
        while valid_moves(board):
            move = np.random.choice(valid_moves(board))
            play(board, move, p)
            if four_in_a_row(board, p):
                return p
            p = -p
        
        return 0 # DRAW

    def backpropagate(self, winner):
        node = self
        while node is not None:
            if winner == 0:   # draw
                node.num_wins += 0.5
            elif winner == node.player:
                node.num_wins += 1      
            node.num_visits += 1
            node = node.parent

In [33]:
def MCTS(board: np.ndarray, player: int, num_iterations: int = NUM_ITERATIONS):
    # the player in the node is the one who did the previous move
    root = Node(board, -player, parent=None, move=None)

    for _ in range(num_iterations):
        node = root

        # SELECTION (tree traversal)
        while len(node.children) != 0 and len(node.next_moves) == 0:  # until terminal or not fully expanded node
            node = node.selection()

        # EXPANSION        
        if len(node.next_moves) > 0:
            move = np.random.choice(node.next_moves)
            node = node.expand(move)

        # SIMULATION (ROLLOUT)
        winner = terminal_state(node.board)
        if winner is None:
            winner = node.simulate()

        # BACKPROPAGATION
        node.backpropagate(winner)
            
    # Return most promising move from root (highest score)
    best_node = max(root.children, key=lambda x: x.num_wins/x.num_visits)
    return best_node.move

In [34]:
PLAYERS = {1: "A", -1: "B"}
MAX_ROUNDS = NUM_COLUMNS * COLUMN_HEIGHT
MAX_DEPTH = 3
MC_ITERATIONS = 20
SEARCH_ORDER = [3, 2, 4, 1, 5, 0, 6]
def initialize_board():
    return np.zeros((NUM_COLUMNS, COLUMN_HEIGHT), dtype=np.byte)
    
def display(board):
    for j in reversed(range(COLUMN_HEIGHT)):
        for i in range(NUM_COLUMNS):
            cell = board[i][j]
            if cell == 1:
                print(PLAYERS[1], end=" ")
            elif cell == -1:
                print(PLAYERS[-1], end=" ")
            else:
                print("-", end=" ")
        print()


def round_number(board):
    return np.count_nonzero(board)
#minimax
def can_win_next_move(board, player, moves=None, round_num=None):
    if moves is None:
        moves = valid_moves(board)
    
    if round_num is None:
        round_num = round_number(board)

    for m in moves:
        play(board, m, player)
        score = None
        if four_in_a_row(board, player):
            score = (MAX_ROUNDS - (round_num - 1)) // 2
        take_back(board, m)
        if score:
            return score, m

    return None, None
def mc_simulation(board: np.ndarray, player: int):
    best_score = -MAX_ROUNDS
    best_move = None
    
    for _ in range(MC_ITERATIONS):
        move = np.random.choice(valid_moves(board))
        
        tmp_board = np.copy(board)
        tmp_player = player
        tmp_move = move
        while True:
            play(tmp_board, tmp_move, tmp_player)

            # Terminal conditions
            if round_number(tmp_board) == MAX_ROUNDS:
                score = 0
                break
            if four_in_a_row(tmp_board, tmp_player):
                if tmp_player == player:
                    score = (MAX_ROUNDS - (round_number(tmp_board) - 1)) // 2
                else:
                    score = -((MAX_ROUNDS - (round_number(tmp_board) - 1)) // 2)
                break

            tmp_move = np.random.choice(valid_moves(tmp_board))
            tmp_player = -tmp_player

        if score > best_score:
            best_score = score
            best_move = move
        
    return best_score, best_move
def minimax(board: np.ndarray, player: int, depth: int, alpha: int, beta: int, max_depth: int = MAX_DEPTH):
    round_num = round_number(board)

    if round_num == MAX_ROUNDS:
        return 0, None

    moves = valid_moves(board)
    score, m = can_win_next_move(board, player, moves, round_num)

    if score:
        return score, m

    max_score = (MAX_ROUNDS - (round_num + 1)) // 2
    beta = min(beta, max_score)

    if alpha >= beta:
        return beta, None

    if depth > max_depth:
        return mc_simulation(board, player)

    best_move = None
    for m in SEARCH_ORDER:
        if m in moves:
            play(board, m, player)
            score, _ = minimax(board, -player, depth + 1, -beta, -alpha)
            score = -score
            take_back(board, m)

            if score >= beta:
                return score, m

            if score > alpha:
                alpha = score
                best_move = m

    return alpha, best_move


In [None]:
import random

class QLearningAgent:
    def __init__(self, alpha=0.5, gamma=0.9, epsilon=0.1):
        self.alpha = alpha
        self.gamma = gamma
        self.epsilon = epsilon
        self.q_table = {}

    def state_key(self, board):
        return board.tobytes()

    def select_action(self, board, valid_actions):
        state_key = self.state_key(board)
        if state_key not in self.q_table:
            self.q_table[state_key] = np.zeros(NUM_COLUMNS)

        if random.random() < self.epsilon:
            return random.choice(valid_actions)
        else:
            q_values = self.q_table[state_key]
            best_actions = [action for action, value in enumerate(q_values) if action in valid_actions and value == max(q_values[valid_actions])]
            return random.choice(best_actions)

    def update(self, old_board, action, reward, new_board):
        old_state_key = self.state_key(old_board)
        new_state_key = self.state_key(new_board)

        if new_state_key not in self.q_table:
            self.q_table[new_state_key] = np.zeros(NUM_COLUMNS)

        old_q_value = self.q_table[old_state_key][action]
        max_new_q_value = np.max(self.q_table[new_state_key])

        self.q_table[old_state_key][action] += self.alpha * (reward + self.gamma * max_new_q_value - old_q_value)


def play_game(agent):
    board = initialize_board()
    player = 1
    while terminal_state(board) is None:
        valid_actions = valid_moves(board)
        action = agent.select_action(board, valid_actions)
        old_board = np.copy(board)
        play(board, action, player)
        reward = 0
        if terminal_state(board) is not None:
            if terminal_state(board) == player:
                reward = 1
            elif terminal_state(board) == -player:
                reward = -1
        agent.update(old_board, action, reward, board)
        player = -player

# Training the agent
num_episodes = 10000
agent = QLearningAgent()
for i in range(num_episodes):
    play_game(agent)

# You can now use the trained agent to play the Connect 4 game


In [35]:
def choose_move(board: np.ndarray, player: int,max_depth: int = MAX_DEPTH):
    # FIRST/SECOND MOVE -> always play central
    if not board.any() or round_number(board) == 1:
        play(board, 3, player)
        return 3

    # THIRD MOVE -> always play in one of the 3 cells in the center
    if round_number(board) == 2:
        move = np.random.choice([2, 3, 4])
        play(board, move, player)
        return move

    # MCTS for 1 Minimax for -1
    if(player==1):
        move = MCTS(board, player, NUM_ITERATIONS)
    if(player==-1):
         _, move = minimax(board, player, depth=1, alpha=-1000, beta=1000, max_depth=max_depth)
    play(board, move, player)

    return move


In [36]:
def play_game():
    board = initialize_board()
    player = 1

    while True:
        move = choose_move(board, player, max_depth=1)

        if move is None:
            print("DRAW")
            return

        print(f"{PLAYERS[player]} TURN -> {move + 1}")
        display(board)

        if four_in_a_row(board, player):
            print(f"\nPlayer {PLAYERS[player]} WON")
            return

        print()
        player = -player

play_game()


A TURN -> 4
- - - - - - - 
- - - - - - - 
- - - - - - - 
- - - - - - - 
- - - - - - - 
- - - A - - - 

B TURN -> 4
- - - - - - - 
- - - - - - - 
- - - - - - - 
- - - - - - - 
- - - B - - - 
- - - A - - - 

A TURN -> 3
- - - - - - - 
- - - - - - - 
- - - - - - - 
- - - - - - - 
- - - B - - - 
- - A A - - - 

B TURN -> 5
- - - - - - - 
- - - - - - - 
- - - - - - - 
- - - - - - - 
- - - B - - - 
- - A A B - - 

A TURN -> 5
- - - - - - - 
- - - - - - - 
- - - - - - - 
- - - - - - - 
- - - B A - - 
- - A A B - - 



KeyboardInterrupt: ignored