# Introduction

This notebook is based con the exercise [Exercise: N-Step Lookahead](https://www.kaggle.com/alexisbcook/exercise-n-step-lookahead) from the course [Intro to Game AI and Reinforcement Learning](https://www.kaggle.com/learn/intro-to-game-ai-and-reinforcement-learning) by [Alexis Cook](https://www.kaggle.com/alexisbcook).

It improves on the provided code implementing the [Alpha-Beta pruning](https://en.wikipedia.org/wiki/Alpha–beta_pruning) algorithm. 

**In version 2 of this notebook** I also improved the algorithm evaluating the board. Now it counts in the same pass all the combination of valid windows for both players. As a result it is 37% faster than version 1, and richer and better heuristics were also implemented.

To avoid timeout errors in the ConnectX competition server the agent starts with 4-step lookahead and increase to 5 and 6-step lookahead later in the game.

The MiniMax agent 3-steps lookahead is also provided for comparison and testing.

## MiniMax 3-Steps Lookahead Agent

In [None]:
def minimax_agent(obs, config):
    import random
    import numpy as np
    
    print("Step {}: MiniMax Agent moving".format(obs.step))

    # Gets board at next step if agent drops piece in selected column
    def drop_piece(grid, col, mark, config):
        next_grid = grid.copy()
        for row in range(config.rows-1, -1, -1):
            if next_grid[row][col] == 0:
                break
        next_grid[row][col] = mark
        return next_grid

    # Helper function for get_heuristic: checks if window satisfies heuristic conditions
    def check_window(window, num_discs, piece, config):
        return (window.count(piece) == num_discs and window.count(0) == config.inarow-num_discs)

    # Helper function for get_heuristic: counts number of windows satisfying specified heuristic conditions
    def count_windows(grid, num_discs, piece, config):
        num_windows = 0
        # horizontal
        for row in range(config.rows):
            for col in range(config.columns-(config.inarow-1)):
                window = list(grid[row, col:col+config.inarow])
                if check_window(window, num_discs, piece, config):
                    num_windows += 1
        # vertical
        for row in range(config.rows-(config.inarow-1)):
            for col in range(config.columns):
                window = list(grid[row:row+config.inarow, col])
                if check_window(window, num_discs, piece, config):
                    num_windows += 1   
        # positive diagonal
        for row in range(config.rows-(config.inarow-1)):
            for col in range(config.columns-(config.inarow-1)):
                window = list(grid[range(row, row+config.inarow), range(col, col+config.inarow)])
                if check_window(window, num_discs, piece, config):
                    num_windows += 1
        # negative diagonal
        for row in range(config.inarow-1, config.rows):
            for col in range(config.columns-(config.inarow-1)):
                window = list(grid[range(row, row-config.inarow, -1), range(col, col+config.inarow)])
                if check_window(window, num_discs, piece, config):
                    num_windows += 1
        return num_windows
                
    # Helper function for minimax: calculates value of heuristic for grid
    def get_heuristic(grid, mark, config):
        num_threes = count_windows(grid, 3, mark, config)
        num_fours = count_windows(grid, 4, mark, config)
        num_threes_opp = count_windows(grid, 3, mark%2+1, config)
        num_fours_opp = count_windows(grid, 4, mark%2+1, config)
        score = num_threes - 1e2*num_threes_opp - 1e4*num_fours_opp + 1e6*num_fours
        return score
    
    # Uses minimax to calculate value of dropping piece in selected column
    def score_move(grid, col, mark, config, nsteps):
        next_grid = drop_piece(grid, col, mark, config)
        score = minimax(next_grid, nsteps-1, False, mark, config)
        return score

    # Helper function for minimax: checks if agent or opponent has four in a row in the window
    def is_terminal_window(window, config):
        return window.count(1) == config.inarow or window.count(2) == config.inarow

    # Helper function for minimax: checks if game has ended
    def is_terminal_node(grid, config):
        # Check for draw 
        if list(grid[0, :]).count(0) == 0:
            return True     
        # Check for win: horizontal, vertical, or diagonal
        # horizontal 
        for row in range(config.rows):
            for col in range(config.columns-(config.inarow-1)):
                window = list(grid[row, col:col+config.inarow])
                if is_terminal_window(window, config):
                    return True
        # vertical
        for row in range(config.rows-(config.inarow-1)):
            for col in range(config.columns):
                window = list(grid[row:row+config.inarow, col])
                if is_terminal_window(window, config):
                    return True              
        # positive diagonal
        for row in range(config.rows-(config.inarow-1)):
            for col in range(config.columns-(config.inarow-1)):
                window = list(grid[range(row, row+config.inarow), range(col, col+config.inarow)])
                if is_terminal_window(window, config):
                    return True
        # negative diagonal
        for row in range(config.inarow-1, config.rows):
            for col in range(config.columns-(config.inarow-1)):
                window = list(grid[range(row, row-config.inarow, -1), range(col, col+config.inarow)])
                if is_terminal_window(window, config):
                    return True
        return False

    # Minimax implementation
    def minimax(node, depth, maximizingPlayer, mark, config):
        is_terminal = is_terminal_node(node, config)
        valid_moves = [c for c in range(config.columns) if node[0][c] == 0]
        if depth == 0 or is_terminal:
            return get_heuristic(node, mark, config)
        if maximizingPlayer:
            value = -np.Inf
            for col in valid_moves:
                child = drop_piece(node, col, mark, config)
                value = max(value, minimax(child, depth-1, False, mark, config))
            return value
        else:
            value = np.Inf
            for col in valid_moves:
                child = drop_piece(node, col, mark%2+1, config)
                value = min(value, minimax(child, depth-1, True, mark, config))
            return value

    # Get list of valid moves
    valid_moves = [c for c in range(config.columns) if obs.board[c] == 0]
    # Convert the board to a 2D grid
    grid = np.asarray(obs.board).reshape(config.rows, config.columns)
    # Use the heuristic to assign a score to each possible board in the next step
    scores = dict(zip(valid_moves, [score_move(grid, col, obs.mark, config, 3) for col in valid_moves]))
    print("Scores:", scores, end=' - ')
    # Get a list of columns (moves) that maximize the heuristic
    max_cols = [key for key in scores.keys() if scores[key] == max(scores.values())]
    # Select at random from the maximizing columns
    move = random.choice(max_cols)
    print("Selected move:", move)
    return move

## Alpha-beta pruning 4-Steps Lookahead Agent

In [None]:
def alphabeta_agent(obs, config):
    import random
    import numpy as np
    
    print("Step {}: AphaBeta Agent moving".format(obs.step))

    # Gets board at next step if agent drops piece in selected column
    def drop_piece(grid, col, mark):
        next_grid = grid.copy()
        for row in range(config.rows-1, -1, -1):
            if next_grid[row][col] == 0:
                break
        next_grid[row][col] = mark
        return next_grid

    # Get the number of pieces of the same mark in a window
    def pieces_in_window(window, piece):
        return window.count(piece) * (window.count(piece) + window.count(0) == config.inarow)
    
    # Counts number of pieces for both players for every possible window
    def count_windows(grid):
        windows = {piece: [0 for i in range(config.inarow+1)] for piece in [1, 2]}
        
        # horizontal
        for row in range(config.rows):
            for col in range(config.columns-(config.inarow-1)):
                window = list(grid[row, col:col+config.inarow])
                windows[1][pieces_in_window(window, 1)]+=1
                windows[2][pieces_in_window(window, 2)]+=1

        # vertical
        for row in range(config.rows-(config.inarow-1)):
            for col in range(config.columns):
                window = list(grid[row:row+config.inarow, col])
                windows[1][pieces_in_window(window, 1)]+=1
                windows[2][pieces_in_window(window, 2)]+=1

        # positive diagonal
        for row in range(config.rows-(config.inarow-1)):
            for col in range(config.columns-(config.inarow-1)):
                window = list(grid[range(row, row+config.inarow), range(col, col+config.inarow)])
                windows[1][pieces_in_window(window, 1)]+=1
                windows[2][pieces_in_window(window, 2)]+=1

        # negative diagonal
        for row in range(config.inarow-1, config.rows):
            for col in range(config.columns-(config.inarow-1)):
                window = list(grid[range(row, row-config.inarow, -1), range(col, col+config.inarow)])
                windows[1][pieces_in_window(window, 1)]+=1
                windows[2][pieces_in_window(window, 2)]+=1
        return windows
                
    # Calculates value of heuristic for grid
    def get_heuristic(grid, mark):
        windows=count_windows(grid)
        score =  windows[mark][1] + windows[mark][2]*3 + windows[mark][3]*9 + windows[mark][4]*81 - windows[mark%2+1][1] - windows[mark%2+1][2]*3 - windows[mark%2+1][3]*9 - windows[mark%2+1][4]*81
        return score
    
    # Uses alphabeta to calculate value of dropping piece in selected column
    def score_move(grid, col, mark, nsteps):
        next_grid = drop_piece(grid, col, mark)
        score = alphabeta(next_grid, nsteps-1, -np.Inf, np.Inf, False, mark)
        return score

    # Checks if game has ended
    def is_terminal_node(grid):
        windows=count_windows(grid)
        return windows[1][config.inarow] + windows[2][config.inarow] > 0

    # Alpha Beta pruning implementation
    def alphabeta(node, depth, a, b, maximizingPlayer, mark):
        is_terminal = is_terminal_node(node)
        valid_moves = [c for c in range(config.columns) if node[0][c] == 0]
        if depth == 0 or is_terminal:
            return get_heuristic(node, mark)
        if maximizingPlayer:
            value = -np.Inf
            for col in valid_moves:
                child = drop_piece(node, col, mark)
                value = max(value, alphabeta(child, depth-1, a, b, False, mark))
                a = max(a, value)
                if a >= b:
                    break # β cutoff
            return value
        else:
            value = np.Inf
            for col in valid_moves:
                child = drop_piece(node, col, mark%2+1)
                value = min(value, alphabeta(child, depth-1, a, b, True, mark))
                b = min(b, value)
                if b <= a:
                    break # α cutoff
            return value

    # Get list of valid moves
    valid_moves = [c for c in range(config.columns) if obs.board[c] == 0]
    # Convert the board to a 2D grid
    grid = np.asarray(obs.board).reshape(config.rows, config.columns)
    n_steps = 4 if obs.board.count(0)>len(obs.board)*2/3 else 5 if obs.board.count(0)>len(obs.board)/3 else 6
    # Use the heuristic to assign a score to each possible board in the next step
    scores = dict(zip(valid_moves, [score_move(grid, col, obs.mark, n_steps) for col in valid_moves]))
    print("Scores:", scores, end=' - ')
    # Get a list of columns (moves) that maximize the heuristic
    max_cols = [key for key in scores.keys() if scores[key] == max(scores.values())]
    # Select at random from the maximizing columns
    move = random.choice(max_cols)
    print("Selected move:", move)
    return move

## Test Agents one against the other

In [None]:
from kaggle_environments import make, evaluate

# Create the game environment
env = make("connectx", debug=True)

# Two random agents play one game round
env.run([minimax_agent, alphabeta_agent])

# Print debug messages
status = "Agent 1 won!" if env.state[0].reward==1 else "Agent 2 won!" if env.state[1].reward==1 else "It's a tie."
env.debug_print("The match is over: " + status)

In [None]:
# Show the game
env.render(mode="ipython")

### Get win percentages against `random` agent

In [None]:
def get_win_percentages(agent1, agent2, n_rounds=100):
    import time
    from joblib import Parallel, delayed
    from kaggle_environments import evaluate
    import multiprocessing as mp
 
    # Use default Connect Four setup
    config = {'rows': 6, 'columns': 7, 'inarow': 4}
    
    cores = mp.cpu_count()
    half_rounds_per_core = n_rounds // cores // 2

    def evaluate_per_core(half_rounds):
        outcomes = evaluate("connectx", [agent1, agent2], config, [], half_rounds)
        outcomes += [[b,a] for [a,b] in evaluate("connectx", [agent2, agent1], config, [], half_rounds)]
        return outcomes  
    
    start_time=time.time()
    results = Parallel(n_jobs=mp.cpu_count())(delayed(evaluate_per_core)(half_rounds_per_core) for i in range(cores))
    total_time=time.time()-start_time

    outcomes = [result for results_per_job in results for result in results_per_job]

    print("In total, {} episodes have been evaluated using {} CPU's cores.".format(len(outcomes), cores))
    print("Total time: {:.2f} minutes ({:.2f} seconds per match on average)".format(total_time/60, total_time/n_rounds))
    print("Agent 1 Won: {:.2%}".format(outcomes.count([1,-1])/len(outcomes)))
    print("Agent 2 Won: {:.2%}".format(outcomes.count([-1,1])/len(outcomes)))
    print("Ties:        {:.2%}".format(outcomes.count([0,0])/len(outcomes)))
    print("Invalid Plays by Agent 1:", outcomes.count([None, 0]))
    print("Invalid Plays by Agent 2:", outcomes.count([0, None]))

### Agent `random` VS Agent `negamax`

In [None]:
get_win_percentages(agent1="random", agent2="negamax", n_rounds=20)

### Agent `negamax` VS MiniMax 3-step Lookahead Agent

In [None]:
get_win_percentages(agent1="negamax", agent2=minimax_agent, n_rounds=20)

### MiniMax 3-step Lookahead Agent VS Alpha-beta pruning 4-step Lookahead Agent


In [None]:
get_win_percentages(agent1=minimax_agent, agent2=alphabeta_agent, n_rounds=20)

Note: `alphabeta_agen` may generate some invalid plays because it runs out of time, but the competition server is faster.

## Create file for submission

In [None]:
import inspect
import os

def write_agent_to_file(function, file):
    with open(file, "a" if os.path.exists(file) else "w") as f:
        f.write(inspect.getsource(function))
        print(function, "written to", file)

write_agent_to_file(alphabeta_agent, "alpha-beta_agent.py")