In [None]:
# ConnectX environment was defined in v0.1.6
!pip install 'kaggle-environments>=0.1.6'

# Conventional Agent with MinMax Algorithm
My baseline for future agents that work with networks.
Features:
* Use a data structure to keep track of the possible 4-connected lines. Hopefully this reduces computation effort and allows higher search depth with MinMax.
* Variable depth search - as the game progesses you need to compute less and hence search deeper
* Hard-coded the default connect4 board sizes, but this should be easy to overcome.
* Player1 is 1, Player2 is -1 (as opposed to 2 in the obs), to make computations easier.

Score is currently somewhere between 1150 and 1200 and can surely be improved.


In [None]:
import os
import random

from kaggle_environments import make, evaluate
from kaggle_environments import agent as utils

# Data Structure
Keep track of all possible winning lines and update with each move. Since we cannot pass easily data between moves, this feature is mainly used for the positions created in the seatch tree.
The connect4 board is a list of 42 integers, a line is list of 4 integers (e.g. 35,36,37,38 = the line on the bottom left), and we encode that line to 35363738.

lines_by_index: key is the position (e.g. 36), values are encoded lines that contain the position 36. This data will never change and can be computed once.

count_by_line: key = line, value = how many discs of a color are in the row. e.g. if 35363738 has a value of -2, it means that 2 discs of player2 (= -1) are i that line. If a line has contains discs of both players it gets irrelevant for connecting 4, and is hence dropped. 
count_by_line is updated with every move.


In [None]:
def encode_line(a,b,c,d) :
    return int(a + b*1e2 + c*1e4 + d*1e6)

def create_lines_by_index():
    lines_by_index = {}
    for i in range(42):
        lines_by_index[i] = []
    for r in range(6):
        for c in range(7):
            for direction in [ (0, 1), (1,1), (1,0), (1,-1) ]:
                dx,dy = direction
                for start in [-3, -2, -1, 0]:
                    line = []
                    for l in range(4):
                        _r = r + (start+l) * dy
                        _c = c + (start+l) * dx
                        if _r > -1 and _r < 6 and _c > -1 and _c < 7:
                            line.append( _r * 7 + _c)
                    if len(line) == 4:
                        encoded_line = encode_line(*sorted(line))
                        for idx in line:
                            if not encoded_line in lines_by_index[idx]:
                                lines_by_index[idx].append(encoded_line)                      
    return lines_by_index

def create_count_by_line(lines_by_index):
    lines = sorted({x for v in lines_by_index.values() for x in v})
    return dict(zip(lines , [0] * len(lines)))
    
def  update_count_by_line(count_by_lines, lines_by_index, idx, color):
    for encoded_line in lines_by_index[idx] :
        if encoded_line in count_by_lines :
            if color * count_by_lines[encoded_line] >= 0 : 
                count_by_lines[encoded_line] = count_by_lines[encoded_line] + color
            else :
                count_by_lines.pop(encoded_line, None)
    return count_by_lines

def compute_count_by_line(count_by_lines, lines_by_index, board):
    for idx in reversed(range(42)):
        if board[idx] != 0 :
            count_by_lines = update_count_by_line(count_by_lines, lines_by_index, idx, 1 if  board[idx] == 1 else -1)
    return count_by_lines

# Some helper methods
the heuristic method assigns point values to lines, the more of your color are in the line, the more points you get. The heuristic will select moves that create possible 4connects for you and limit the possibilites for the opponent.

In [None]:
def get_heuristic_score(count_by_lines) :
    scores = { -3 : -50, -2 : -10, -1 : -1, 0: 0, 1 : 1, 2 : 10, 3 : 50  }
    return sum( scores[x] for x in count_by_lines.values())

def get_winner(count_by_lines):
    return 1 if count_by_lines and max(count_by_lines.values()) == 4 else -1 if count_by_lines and min(count_by_lines.values()) == -4 else 0
    
def get_free_row(board, col):
    for row in reversed(range(6)) :
        if  board[row*7 + col] == 0:
            return row
    return -1

def get_valid_moves(board):
    return [ (get_free_row(board, col), col) for col in range(7) if get_free_row(board, col) != -1 ]
    
def make_move(board, moves, color, r, c):
    new_moves = moves.copy()
    new_moves.append(c)
    new_board = board.copy()
    new_board[r*7+c] = color
    return new_board, new_moves

# How deep can you calculate?
If you take too long to make a move you will get a timeout. So we make a estimation how deep we can go.
We start with a base depth and go deeper
* as the game progesses
* as columns are closed (hence less possible moves)
The method is just pure guess and can be definetely be improved.
The search depth is also limited by the number of open spots on the board.

In [None]:
def estimate_depth(board, default_depth):
    depth = default_depth
    move_cnt = sum(x != 0 for x in board)
    if move_cnt > 25:
        depth = depth +  int((move_cnt-25) / 5)
    open_row_cnt = len(get_valid_moves(board))
    depth = depth + (7 - open_row_cnt)
    depth = min(42 - move_cnt, depth)
    return depth

# The MinMax algorithm

* Player 1 searches for the max, Player2 for the min
* Data structures are set up in the beginning and passed in the search 
* the search is done via recursive calls (depth first search)
* Color is the disc that is played, and will change, side will not change
* the search default depth of 5 produces reasonable results, in the end game the depth will go up to 15

In [None]:
def lookahead(board, lines_by_index, count_by_lines, search_max, depth, side, moves) :
    if depth == 0:
        return moves, get_heuristic_score(count_by_lines)
    color = 1 if search_max else -1
    best_score, best_moves = -1e16 if search_max else 1e16, []
    for r,c in get_valid_moves(board):
        new_board, new_moves = make_move(board, moves, color, r, c)
        new_count_by_lines = update_count_by_line(count_by_lines.copy(), lines_by_index, r*7 + c, color)
        if get_winner(new_count_by_lines) == color:
            score = (1e8-len(new_moves)) * color 
        else :
            new_moves, score = lookahead(new_board, lines_by_index, new_count_by_lines, 
                not search_max, depth-1, side, new_moves)
        if  (search_max and  score > best_score) or  (not search_max and score < best_score):
            best_score, best_moves  = score, new_moves
    #print("LEVEL = " + str(depth) + " return " + str(best_moves) + " with score " + str(best_score))
    return best_moves, best_score         

def lookahead_agent(obs, config, depth=3):
    depth = estimate_depth(obs.board, depth)
    if sum(x != 0 for x in obs.board) < 2:
        return 3
    lines_by_index = create_lines_by_index()
    count_by_line = create_count_by_line(lines_by_index)
    count_by_line = compute_count_by_line(count_by_line, lines_by_index, obs.board)    
    moves, score  = lookahead(obs.board.copy(), lines_by_index, count_by_line, 
        True if obs.mark == 1 else False, depth , 1 if obs.mark == 1 else -1, [])       
    return moves[0]

def agent_depth6(obs, config):
    return lookahead_agent(obs, config, 6)
def agent_depth5(obs, config):
    return lookahead_agent(obs, config, 5)
def agent_depth3(obs, config):
    return lookahead_agent(obs, config, 3)
def agent_depth1(obs, config):
    return lookahead_agent(obs, config, 1)


# Try out the agent

In [None]:
env =  make("connectx", debug=True)

env.reset()
# Play as the first agent against default "random" agent.
#env.run([lookahead_agent, "negamax"])
env.run([ agent_depth5, agent_depth3 ])
env.render(mode="ipython", width=500, height=450)

env.reset()
env.play([agent_depth5, None ], width=500, height=450)
