## Introduction:

This notebook contains a fast implementation of the scoring heuristic explained in "Intro to Game AI and Reinforcement Learning" course

The algorithm first **converts the grid into a string** (for rows, columns, and diagonals). Then search the number of occurance of the target cases like threes or twos.

It performs **5 times faste**r than the default heuristic.

In [None]:
from learntools.core import binder
binder.bind(globals())
from learntools.game_ai.ex1 import *
import numpy as np
import random
from tqdm.notebook import tqdm
import time
from kaggle_environments import make, evaluate

## The heuristic given in "Intro to Game AI and Reinforcement Learning" course

In [None]:
# Gets board at next step if agent drops piece in selected column
def drop_piece(grid, col, piece, config):
    next_grid = grid.copy()
    for row in range(config.rows-1, -1, -1):
        if next_grid[row][col] == 0:
            break
    next_grid[row][col] = piece
    return next_grid

def score_move(grid, col, mark, config, func):
    next_grid = drop_piece(grid, col, mark, config)
    score = func(next_grid, mark, config)
    return score

def get_heuristic(grid, mark, config):
    num_threes = count_windows(grid, 3, mark, config)
    num_fours = count_windows(grid, 4, mark, config)
    num_threes_opp = count_windows(grid, 3, mark%2+1, config)
    num_twos = count_windows(grid, 2, mark, config)
    num_twos_opp = count_windows(grid, 2, mark%2+1, config)
    score =  1e2*num_twos - 2e2*num_twos_opp  + 1e4*num_threes - 1e5*num_threes_opp + 1e8*num_fours
    return score

def check_window(window, num_discs, piece, config):
    return (window.count(piece) == num_discs and window.count(0) == config.inarow-num_discs)
    
# Helper function for get_heuristic: counts number of windows satisfying specified heuristic conditions
def count_windows(grid, num_discs, piece, config):
    num_windows = 0
    # horizontal
    for row in range(config.rows):
        for col in range(config.columns-(config.inarow-1)):
            window = list(grid[row, col:col+config.inarow])
            if check_window(window, num_discs, piece, config):
                num_windows += 1
    # vertical
    for row in range(config.rows-(config.inarow-1)):
        for col in range(config.columns):
            window = list(grid[row:row+config.inarow, col])
            if check_window(window, num_discs, piece, config):
                num_windows += 1
    # positive diagonal
    for row in range(config.rows-(config.inarow-1)):
        for col in range(config.columns-(config.inarow-1)):
            window = list(grid[range(row, row+config.inarow), range(col, col+config.inarow)])
            if check_window(window, num_discs, piece, config):
                num_windows += 1
    # negative diagonal
    for row in range(config.inarow-1, config.rows):
        for col in range(config.columns-(config.inarow-1)):
            window = list(grid[range(row, row-config.inarow, -1), range(col, col+config.inarow)])
            if check_window(window, num_discs, piece, config):
                num_windows += 1
    return num_windows

## The fast heuristic (string search)

In [None]:
def get_fast_heuristic(grid, mark, config):

#Convert the grid into a string (default + 90deg rotated + all possible diagonals)     
    horizontal_str=str(grid)
    vertical_str=str(grid.transpose())
    diagonal_str=""
    for col in range(0,4):
        for row in range(0,3):    
            for i in range(4):
                diagonal_str+=str(grid[row+i][col+i])+" "
            diagonal_str+=","
        for row in range(3,6):    
            for i in range(4):
                diagonal_str+=str(grid[row-i][col+i])+" "
            diagonal_str+=","

#Combine all strings into a single one         
    combined_str= horizontal_str + vertical_str + diagonal_str
    # print(combined_str)

    str_piece=str(mark)
    search_str4=str_piece+" "+str_piece+" "+str_piece+" "+str_piece    
    num_fours=combined_str.count(search_str4)
    # print(num_fours)

#All possible three cases    
    search_str3_1=str_piece+" "+str_piece+" "+str_piece+" 0"    
    search_str3_2=str_piece+" "+str_piece+" 0 "+str_piece 
    search_str3_3=str_piece+" 0 "+str_piece+" "+str_piece 
    search_str3_4="0 "+str_piece+" "+str_piece+" "+str_piece

    num_threes = combined_str.count(search_str3_1)+combined_str.count(search_str3_2)+combined_str.count(search_str3_3)+combined_str.count(search_str3_4)
    # print(num_threes)

#All possible two cases   
    search_str2_1=str_piece+" "+str_piece+" 0 0"    
    search_str2_2=str_piece+" 0 0 "+str_piece 
    search_str2_3="0 0 "+str_piece+" "+str_piece 
    search_str2_4="0 "+str_piece+" 0 "+str_piece
    search_str2_5=str_piece+" 0 "+str_piece+" 0"
    search_str2_6="0 "+str_piece+" "+str_piece+" 0"

    num_twos = combined_str.count(search_str2_1)+combined_str.count(search_str2_2)+combined_str.count(search_str2_3)+combined_str.count(search_str2_4)+combined_str.count(search_str2_5)+combined_str.count(search_str2_6)
    # print(num_twos)

#All possible three cases  (opp)
    str_piece=str(mark%2+1)
    search_str3_1=str_piece+" "+str_piece+" "+str_piece+" 0"    
    search_str3_2=str_piece+" "+str_piece+" 0 "+str_piece 
    search_str3_3=str_piece+" 0 "+str_piece+" "+str_piece 
    search_str3_4="0 "+str_piece+" "+str_piece+" "+str_piece

    num_threes_opp = combined_str.count(search_str3_1)+combined_str.count(search_str3_2)+combined_str.count(search_str3_3)+combined_str.count(search_str3_4)
    # print(num_threes_opp)

#All possible two cases  (opp)  
    search_str2_1=str_piece+" "+str_piece+" 0 0"    
    search_str2_2=str_piece+" 0 0 "+str_piece 
    search_str2_3="0 0 "+str_piece+" "+str_piece 
    search_str2_4="0 "+str_piece+" 0 "+str_piece
    search_str2_5=str_piece+" 0 "+str_piece+" 0"
    search_str2_6="0 "+str_piece+" "+str_piece+" 0"

    num_twos_opp = combined_str.count(search_str2_1)+combined_str.count(search_str2_2)+combined_str.count(search_str2_3)+combined_str.count(search_str2_4)+combined_str.count(search_str2_5)+combined_str.count(search_str2_6)
    # print(num_twos_opp)

#Calculate score 
    score =  1e2*num_twos - 2e2*num_twos_opp  + 1e4*num_threes - 1e5*num_threes_opp + 1e8*num_fours
    return score

In [None]:
def get_win_percentages(agent1, agent2, n_rounds=100):
    # Use default Connect Four setup
    config = {'rows': 6, 'columns': 7, 'inarow': 4}
    # Agent 1 goes first (roughly) half the time          
    outcomes = evaluate("connectx", [agent1, agent2], config, [], n_rounds//2)
#     print("First half:")
#     print("Agent 1 Win Percentage:", np.round(outcomes.count([1,-1])/len(outcomes), 2), "Wins:",outcomes.count([1,-1]) )
#     print("Agent 2 Win Percentage:", np.round(outcomes.count([-1,1])/len(outcomes), 2), "Wins:",outcomes.count([-1,1]))
    # Agent 2 goes first (roughly) half the time      
    outcomes += [[b,a] for [a,b] in evaluate("connectx", [agent2, agent1], config, [], n_rounds-n_rounds//2)]
#     print("Full result:")
    print("Agent 1 Win Percentage:", np.round(outcomes.count([1,-1])/len(outcomes), 2), "Wins:",outcomes.count([1,-1]))
    print("Agent 2 Win Percentage:", np.round(outcomes.count([-1,1])/len(outcomes), 2), "Wins:",outcomes.count([-1,1]))
#     print("Number of Invalid Plays by Agent 1:", outcomes.count([None, 0]))
#     print("Number of Invalid Plays by Agent 2:", outcomes.count([0, None]))

In [None]:
# The agent uses default heuristic given in the class examples
def agent(obs, config):
    # Get list of valid moves
    valid_moves = [c for c in range(config.columns) if obs.board[c] == 0]
    # Convert the board to a 2D grid
    grid = np.asarray(obs.board).reshape(config.rows, config.columns)
    # Use the heuristic to assign a score to each possible board in the next turn
    scores = dict(zip(valid_moves, [score_move(grid, col, obs.mark, config, get_heuristic) for col in valid_moves]))
    # Get a list of columns (moves) that maximize the heuristic
    max_cols = [key for key in scores.keys() if scores[key] == max(scores.values())]
    # Select at random from the maximizing columns
    return random.choice(max_cols)

In [None]:
# The agent uses fast heuristic implementation
def fast_agent(obs, config):
    # Get list of valid moves
    valid_moves = [c for c in range(config.columns) if obs.board[c] == 0]
    # Convert the board to a 2D grid
    grid = np.asarray(obs.board).reshape(config.rows, config.columns)
    # Use the heuristic to assign a score to each possible board in the next turn
    scores = dict(zip(valid_moves, [score_move(grid, col, obs.mark, config, get_fast_heuristic) for col in valid_moves]))
    # Get a list of columns (moves) that maximize the heuristic
    max_cols = [key for key in scores.keys() if scores[key] == max(scores.values())]
    # Select at random from the maximizing columns
    return random.choice(max_cols)

## Compare the elapsed times for each agent while playing against itself

In [None]:
start = time.time()
get_win_percentages(agent1=agent, agent2=agent, n_rounds=100)
end = time.time()
print("\nTime for default Agent:",(end - start),"\n")

start = time.time()
get_win_percentages(agent1=fast_agent, agent2=fast_agent, n_rounds=100)
end = time.time()
print("\nTime for fast Agent:",(end - start))

## Check the performances of two agent against each other (should be around 50%-50%)

In [None]:
# Default Agent vs Fast Agent
get_win_percentages(agent1=agent, agent2=fast_agent, n_rounds=100)