Trying similar experiments to tic-tac-toe, but with connect 4.

Prediction/hope: because there's more complexity in connect 4, the results we saw in tic-tac-toe will be stronger (i.e. utility AI will suffer more generalization loss).

In [16]:
import random

In [49]:
### Defining Connect4 Game
### Thanks ChatGPT

class Connect4:
    def __init__(self, rows = 6, columns = 7):
        self.rows = rows
        self.columns = columns
        self.board = [[0 for _ in range(self.columns)] for _ in range(self.rows)]
        self.turn = 1  # Player 1 starts

    def print_board(self):
        for row in reversed(self.board):
            print(row)
        print()
    
    def board_to_string(self):
        return self.board.__str__()

    def drop_disc(self, column, player):
        """ Drops a disc into the specified column for the given player """
        for row in range(self.rows): #searching "upwards"
            if self.board[row][column] == 0:
                self.board[row][column] = player
                return (row, column)
        return None  # Column is full

    def is_full(self):
        """ Checks if the board is full """
        return all(self.board[self.rows - 1][col] != 0 for col in range(self.columns))

    def is_winner(self, player):
        """ Checks if the specified player has won """
        # Check horizontal
        for row in range(self.rows):
            for col in range(self.columns - 3):
                if all(self.board[row][col + i] == player for i in range(4)):
                    return True

        # Check vertical
        for row in range(self.rows - 3):
            for col in range(self.columns):
                if all(self.board[row + i][col] == player for i in range(4)):
                    return True

        # Check diagonal (upwards and downwards)
        for row in range(self.rows - 3):
            for col in range(self.columns - 3):
                if all(self.board[row + i][col + i] == player for i in range(4)):
                    return True
                if all(self.board[row + 3 - i][col + i] == player for i in range(4)):
                    return True

        return False

# Let's create a game instance and print the empty board to verify
game = Connect4()
game.print_board()  # This should print a 6x7 grid of zeros.
print(game.board_to_string())

[0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0]

[[0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0]]


In [41]:
### Utility AI

def utility(game, player):
    """ Simple utility function: +1 for a win, -1 for a loss, 0 otherwise """
    if game.is_winner(player):
        return 1
    elif game.is_winner(3 - player):  # 3 - player switches between 1 and 2
        return -1
    else:
        return 0

def generate_moves(game):
    """ Generate a list of valid moves (column numbers) """
    return [col for col in range(game.columns) if game.board[game.rows - 1][col] == 0]

def minimax(game, depth, alpha, beta, maximizing_player, utility_table = {}):
    """ Minimax algorithm with alpha-beta pruning """
    if depth == 0 or game.is_full() or game.is_winner(1) or game.is_winner(2):
        utility_table[game.board_to_string()] = utility(game, 1)
        return utility(game, 1)

    if maximizing_player: # True
        max_eval = float('-inf')
        for move in generate_moves(game):
            new_chip = game.drop_disc(move, 1)  # Try the move
            assert new_chip[1] == move
            eval = minimax(game, depth - 1, alpha, beta, False, utility_table) if (game.board_to_string() not in utility_table) else utility_table[game.board_to_string()]

            game.board[new_chip[0]][new_chip[1]] = 0  # Undo the move
            max_eval = max(max_eval, eval)
            alpha = max(alpha, eval)
            if beta <= alpha:
                break
        utility_table[game.board_to_string()] = max_eval
        return max_eval
    else: # False
        min_eval = float('inf')
        for move in generate_moves(game):
            new_chip = game.drop_disc(move, 2)  # Try the move
            assert new_chip[1] == move
            eval = minimax(game, depth - 1, alpha, beta, True, utility_table) if (game.board_to_string() not in utility_table) else utility_table[game.board_to_string()]
            game.board[new_chip[0]][new_chip[1]] = 0  
            # Undo the move
            min_eval = min(min_eval, eval)
            beta = min(beta, eval)
            if beta <= alpha:
                break
        
        utility_table[game.board_to_string()] = min_eval
        return min_eval

# Testing the minimax function with a basic scenario
game = Connect4()
game.drop_disc(3, 1)  # Player 1's move
game.drop_disc(4, 2)  # Player 2's move
game.print_board()

best_move = max(generate_moves(game), key=lambda col: minimax(game, 10, float('-inf'), float('inf'), True))
print("Best move for Player 1:", best_move)


[0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 1, 2, 0, 0]

Best move for Player 1: 0


(Caution: the utility AI takes a long time, especially if you run it for each move of a game. Intuitively, since there are seven possible moves each turn, the utility AI with a depth of four needs to run O(2000) operations each turn, which adds up over time)

In [66]:
### Heuristic AI

def evaluate_window(window, player):
    score = 0
    opp_player = 3 - player

    if window.count(player) == 4:
        score += 100
    elif window.count(player) == 3 and window.count(0) == 1:
        score += 5
    elif window.count(player) == 2 and window.count(0) == 2:
        score += 2

    if window.count(opp_player) == 3 and window.count(0) == 1:
        score -= 40  # Block opponent's 3

    return score

def heuristic_score(game, player):
    score = 0
    center_column = [row[game.columns//2] for row in game.board]
    center_count = center_column.count(player)
    score += center_count * 3  # Center column preference

    # Score horizontal, vertical, and diagonal lines
    for row in range(game.rows):
        for col in range(game.columns - 3):
            window = [game.board[row][col + i] for i in range(4)]
            score += evaluate_window(window, player)

    for col in range(game.columns):
        for row in range(game.rows - 3):
            window = [game.board[row + i][col] for i in range(4)]
            score += evaluate_window(window, player)

    for row in range(game.rows - 3):
        for col in range(game.columns - 3):
            window = [game.board[row + i][col + i] for i in range(4)]
            score += evaluate_window(window, player)

    for row in range(3, game.rows):
        for col in range(game.columns - 3):
            window = [game.board[row - i][col + i] for i in range(4)]
            score += evaluate_window(window, player)

    return score

def heuristic_best_move(game, player):
    valid_moves = generate_moves(game)
    best_score = float('-inf')
    best_move = None
    for move in valid_moves:
        new_chip = game.drop_disc(move, player)
        score = heuristic_score(game, player)
        assert new_chip[1] == move
        game.board[new_chip[0]][move] = 0
        if score > best_score:
            best_score = score
            best_move = move
    return best_move

# Testing the heuristic-based AI
game = Connect4()
game.drop_disc(3, 1)  # Player 1's move
game.drop_disc(4, 2)  # Player 2's move
game.print_board()
best_move_heuristic = heuristic_best_move(game, 1)
print("Best heuristic move for Player 1:", best_move_heuristic)

[0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 1, 2, 0, 0]

Best heuristic move for Player 1: 3


In [45]:
### Lookup table generation
UTILITY_TABLE_1 = {} #going first
UTILITY_TABLE_2 = {} #going second
game = Connect4()
minimax(game, 20, float('-inf'), float('inf'), True, UTILITY_TABLE_1)
game = Connect4()
minimax(game, 20, float('-inf'), float('inf'), False, UTILITY_TABLE_2)
print(UTILITY_TABLE_1[Connect4().board_to_string()])

## It seems like to fully solve the game (at least depth >=20), it will take a long time 
## (> 15 minutes) to generate the lookup table.

KeyboardInterrupt: 

In [67]:
### Game setup + Optimality Test

# Debug
# util_table_debug = {}
# minimax(Connect4(), 10, float('-inf'), float('inf'), True, util_table_debug)
# print(util_table_debug.values()) # there should be values other than 0

NUM_ROWS = 5
NUM_COLUMNS = 6

# Playing games with reduced depth
NUM_GAMES = 1
MINIMAX_DEPTH = 10

def util_best_move(game, player, minimax_depth = MINIMAX_DEPTH):
    valid_moves = generate_moves(game)
    best_score = float('-inf')
    best_move = None
    for move in valid_moves:
        new_chip = game.drop_disc(move, player)
        score = minimax(game, minimax_depth, float('-inf'), float('inf'), True)
        print(score)
        assert new_chip[1] == move
        game.board[new_chip[0]][move] = 0 #undo the move
        if score > best_score or (score == best_score and random.choice([True, False])):
            best_score = score
            best_move = move
    return best_move

def play_game(first_player_minimax: bool, minimax_depth = 4):
    game = Connect4(NUM_ROWS, NUM_COLUMNS)
    turn = 0  # 0 for minimax, 1 for heuristic
    minimax_player = 1 if first_player_minimax else 2

    while not game.is_full() and not game.is_winner(1) and not game.is_winner(2): #game loop
        if (turn == 0 and first_player_minimax) or (turn == 1 and not first_player_minimax):
            best_move = util_best_move(game, minimax_player, minimax_depth)
            # print()
            game.drop_disc(best_move, 1)
        else:
            best_move = heuristic_best_move(game, 2)
            game.drop_disc(best_move, 2)

        turn = 1 - turn  # Switch turn
        game.print_board()

    if game.is_winner(1):
        return "minimax"
    elif game.is_winner(2):
        return "heuristic"
    else:
        return "draw"

results_reduced_depth = {"minimax": 0, "heuristic": 0, "draw": 0}
for _ in range(NUM_GAMES):
    first_player_minimax = random.choice([True, False])  # Randomly choose who starts
    result = play_game(first_player_minimax, MINIMAX_DEPTH)
    results_reduced_depth[result] += 1

results_reduced_depth


0
0
0
0
0
0
[0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 1, 0]

[0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0]
[0, 0, 0, 2, 1, 0]

0
0
0
0
0
0
[0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 1, 0]
[0, 0, 0, 2, 1, 0]

[0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0]
[0, 0, 0, 2, 1, 0]
[0, 0, 0, 2, 1, 0]

0
0
0
0
1
1
[0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0]
[0, 0, 0, 2, 1, 0]
[0, 0, 0, 2, 1, 1]

[0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0]
[0, 0, 0, 2, 0, 0]
[0, 0, 0, 2, 1, 0]
[0, 0, 0, 2, 1, 1]

0
0
0
0
1
1
[0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0]
[0, 0, 0, 2, 1, 0]
[0, 0, 0, 2, 1, 0]
[0, 0, 0, 2, 1, 1]

[0, 0, 0, 0, 0, 0]
[0, 0, 0, 2, 0, 0]
[0, 0, 0, 2, 1, 0]
[0, 0, 0, 2, 1, 0]
[0, 0, 0, 2, 1, 1]



{'minimax': 0, 'heuristic': 1, 'draw': 0}

Maybe generating the entire Connect-4 lookup table takes too long; someone else did it with ~2T entries: https://www.reddit.com/r/boardgames/comments/12bkis3/a_complete_lookup_table_for_connect4/

The other option is to use a pre-generated lookup table, but that wouldn't allow us to directly manipulate the process generating the lookup table (which is what gave the results in our Tic-Tac-Toe notebook). We can still try randomization with a pre-generated lookup table, but I would expect that the performance loss from X% randomization of utility versus heuristic AI would be the same.