# Alpha-Beta Prunning

This code is based on the [Intro to Game AI and Reinforcement Learning Micro-Course](https://www.kaggle.com/learn/intro-to-game-ai-and-reinforcement-learning). The main idea is to run a faster minmax algorithm. The alpha-beta pruning is given by the following algorithm (taken from [wikipedia](https://en.wikipedia.org/wiki/Alpha%E2%80%93beta_pruning)):

```
function alphabeta(node, depth, α, β, maximizingPlayer) is
    if depth = 0 or node is a terminal node then
        return the heuristic value of node
    if maximizingPlayer then
        value := −∞
        for each child of node do
            value := max(value, alphabeta(child, depth − 1, α, β, FALSE))
            α := max(α, value)
            if α ≥ β then
                break (* β cut-off *)
        return value
    else
        value := +∞
        for each child of node do
            value := min(value, alphabeta(child, depth − 1, α, β, TRUE))
            β := min(β, value)
            if β ≤ α then
                break (* α cut-off *)
        return value
        
```

![tree](https://upload.wikimedia.org/wikipedia/commons/thumb/9/91/AB_pruning.svg/1280px-AB_pruning.svg.png)

In [None]:
def my_agent(obs, config):
    import numpy as np
    import random
    
    
    # Gets board at next step if agent drops piece in selected column
    def drop_piece(grid, col, mark, config):
        next_grid = grid.copy()
        for row in range(config.rows-1, -1, -1):
            if next_grid[row][col] == 0:
                break
        next_grid[row][col] = mark
        return next_grid

    # Helper function for get_heuristic: checks if window satisfies heuristic conditions
    def check_window(window, num_discs, piece, config):
        return (window.count(piece) == num_discs and window.count(0) == config.inarow-num_discs)

    # Helper function for get_heuristic: counts number of windows satisfying specified heuristic conditions
    def count_windows(grid, num_discs, piece, config):
        num_windows = 0
        # horizontal
        for row in range(config.rows):
            for col in range(config.columns-(config.inarow-1)):
                window = list(grid[row, col:col+config.inarow])
                if check_window(window, num_discs, piece, config):
                    num_windows += 1
        # vertical
        for row in range(config.rows-(config.inarow-1)):
            for col in range(config.columns):
                window = list(grid[row:row+config.inarow, col])
                if check_window(window, num_discs, piece, config):
                    num_windows += 1
        # positive diagonal
        for row in range(config.rows-(config.inarow-1)):
            for col in range(config.columns-(config.inarow-1)):
                window = list(grid[range(row, row+config.inarow), range(col, col+config.inarow)])
                if check_window(window, num_discs, piece, config):
                    num_windows += 1
        # negative diagonal
        for row in range(config.inarow-1, config.rows):
            for col in range(config.columns-(config.inarow-1)):
                window = list(grid[range(row, row-config.inarow, -1), range(col, col+config.inarow)])
                if check_window(window, num_discs, piece, config):
                    num_windows += 1
        return num_windows
    
    # Helper function for minimax: calculates value of heuristic for grid
    def get_heuristic(grid, mark, config):
        num_threes = count_windows(grid, 3, mark, config)
        num_fours = count_windows(grid, 4, mark, config)
        num_threes_opp = count_windows(grid, 3, mark%2+1, config)
        num_fours_opp = count_windows(grid, 4, mark%2+1, config)        
        score = 1e2*num_threes - 1e2*num_threes_opp - 1e9*num_fours_opp + 1e9*num_fours
        return score
    
    # Uses minimax to calculate value of dropping piece in selected column
    def score_move(grid, col, mark, config, nsteps):
        next_grid = drop_piece(grid, col, mark, config)        
        score = minimax(next_grid, nsteps - 1, False, mark, config)
        return score

    # Helper function for minimax: checks if agent or opponent has four in a row in the window
    def is_terminal_window(window, config):
        return window.count(1) == config.inarow or window.count(2) == config.inarow

    # Helper function for minimax: checks if game has ended
    def is_terminal_node(grid, config):
        # Check for draw 
        if list(grid[0, :]).count(0) == 0:
            return True
        
        # Check for win: horizontal, vertical, or diagonal
        # horizontal 
        for row in range(config.rows):
            for col in range(config.columns-(config.inarow-1)):
                window = list(grid[row, col:col+config.inarow])
                if is_terminal_window(window, config):
                    return True
        # vertical
        for row in range(config.rows-(config.inarow-1)):
            for col in range(config.columns):
                window = list(grid[row:row+config.inarow, col])
                if is_terminal_window(window, config):
                    return True
        # positive diagonal
        for row in range(config.rows-(config.inarow-1)):
            for col in range(config.columns-(config.inarow-1)):
                window = list(grid[range(row, row+config.inarow), range(col, col+config.inarow)])
                if is_terminal_window(window, config):
                    return True
        # negative diagonal
        for row in range(config.inarow-1, config.rows):
            for col in range(config.columns-(config.inarow-1)):
                window = list(grid[range(row, row-config.inarow, -1), range(col, col+config.inarow)])
                if is_terminal_window(window, config):
                    return True
        return False

    # Minimax implementation
    def minimax(node, depth, maximizingPlayer, mark, config, alpha=-np.Inf, beta=np.Inf):        
        is_terminal = is_terminal_node(node, config)
        
        valid_moves = [c for c in range(config.columns) if node[0][c] == 0]
        
        if depth == 0 or is_terminal:
            return get_heuristic(node, mark, config)                        
        
        if maximizingPlayer:
            value = -np.Inf
            for col in valid_moves:
                child = drop_piece(node, col, mark, config)
                value = max(value, minimax(child, depth-1, False, mark, config))
                alpha = max(alpha, value)              
                if alpha >= beta:                    
                    break
            return value        
        
        else:
            value = np.Inf
            for col in valid_moves:
                child = drop_piece(node, col, mark%2+1, config)
                value = min(value, minimax(child, depth-1, True, mark, config))
                beta = min(beta, value)               
                if alpha >= beta:                    
                    break
            return value
    
    valid_moves = [col for col in range(config.columns) if obs.board[col] == 0]
    
    grid = np.asarray(obs.board).reshape(config.rows, config.columns)
    
    #Get scores for each branch
    N_STEPS = 2
    
    scores = dict(zip(valid_moves, [score_move(grid, col, obs.mark, config, N_STEPS) for col in valid_moves]))        
    
    debugging = False
    
    if debugging:
        print(scores)

    
    #Get the highest score value    
    max_cols = [key for key in scores.keys() if scores[key] == max(scores.values())]    
    return random.choice(max_cols)

## Let's see how our agent plays

In [None]:
from kaggle_environments import make, evaluate

# Create the game environment
env = make("connectx", debug=True)

# Two random agents play one game round
env.run([my_agent, "negamax"])

# Show the game
env.render(mode="ipython")

Now we can estimate what is the win probability of our agent against a random agent and a negamax agent, the probability corresponds to the first player, if it is positive, the first player wins, if it is negative, the first player loses.

In [None]:
def mean_reward(rewards):
    return sum(r[0] for r in rewards) / len(rewards)

# Run multiple episodes to estimate agent's performance.
print("My Agent vs Random Agent:", mean_reward(evaluate("connectx", [my_agent, "random"], num_episodes=10)))
print("My Agent vs Negamax Agent:", mean_reward(evaluate("connectx", [my_agent, "negamax"], num_episodes=10)))
print("Negamax Agent vs My Agent:", mean_reward(evaluate("connectx", ["negamax", my_agent], num_episodes=10)))

# Creating a submission

In order to submit, we need to make sure that N_STEPS in the function is equal to 2, if it is greater than 2 it will take too long and the submission will not be valid.

In [None]:
import inspect
import os

def write_agent_to_file(function, file):
    with open(file, "a" if os.path.exists(file) else "w") as f:
        f.write(inspect.getsource(function))
        print(function, "written to", file)

write_agent_to_file(my_agent, "submission.py")

In [None]:
import sys
from kaggle_environments import utils, agent

out = sys.stdout
agent = utils.read_file("/kaggle/working/submission.py")
sys.stdout = out

env = make("connectx", debug=False)
env.run([agent, agent])
env.render(mode="ipython")
print("Success!" if env.state[0].status == env.state[1].status == "DONE" else "Failed...")