In [3]:
import random
import math
from IPython.display import display
import pandas as pd
import numpy as np
import time
import pickle

In [4]:
def original_board(rows, columns):
    return np.zeros((rows, columns))

def val_turn(c4_board, column):
    return c4_board[len(c4_board)-1][column] == 0

def get_next_row(c4_board, column):
    return next((row for row in range(len(c4_board)) if c4_board[row][column] == 0), None)

def get_allowed_moves(c4_board):
    return [column for column in range(c4_board.shape[1]) if val_turn(c4_board, column)]

def get_next_position(c4_board, letter):
    rows, cols = c4_board.shape
    for row, row_vals in enumerate(c4_board):
        for col, col_val in enumerate(row_vals[:-3]):
            if all(elem == letter for elem in row_vals[col:col+4]):
                return row, col
        for col, col_vals in zip(range(cols), (c4_board[r][col] for r in range(row, min(row+4, rows)))):
            if all(elem == letter for elem in col_vals):
                return row, col
        for col, col_vals in enumerate(row_vals[:-3]):
            if row < rows-3 and col < cols-3:
                diag_vals = [c4_board[row+i][col+i] for i in range(4)]
                if all(elem == letter for elem in diag_vals):
                    return row, col
        for col, col_vals in enumerate(row_vals[:-3]):
            if row >= 3 and col < cols-3:
                diag_vals = [c4_board[row-i][col+i] for i in range(4)]
                if all(elem == letter for elem in diag_vals):
                    return row, col
    return -1, -1

def val_success(c4_board, letter):
    rows, columns = c4_board.shape
    for row in range(rows):
        for col in range(columns - 3):
            if all(c4_board[row][col + i] == letter for i in range(4)):
                return True

    for row in range(rows - 3):
        for col in range(columns):
            if all(c4_board[row + i][col] == letter for i in range(4)):
                return True

    for row in range(rows - 3):
        for col in range(columns - 3):
            if all(c4_board[row + i][col + i] == letter for i in range(4)):
                return True

    for row in range(3, rows):
        for col in range(columns - 3):
            if all(c4_board[row - i][col + i] == letter for i in range(4)):
                return True

    return False

def first_turn_toss():
    choices = [1,2]
    return random.choice(choices)

def val_final_turn(c4_board, SI_Agent_Letter, MinMax_Letter):
    return any(val_success(c4_board, letter) for letter in (SI_Agent_Letter, MinMax_Letter)) or not get_allowed_moves(c4_board)


In [5]:
def get_pos(positions):
    return int(''.join([str(int(position)) for position in positions.flatten()]))

def get_q_values_for_action(ql_states, current_board, current_position):
    position = get_pos(current_board)
    if position not in ql_states:
        ql_states[(position, current_position)] = 0
    return ql_states[(position, current_position)]

def get_best_action_from_q_values(ql_states, current_board, possible_positions, epsilon):
    return random.choice(possible_positions) if random.random() < epsilon else max([(get_q_values_for_action(ql_states, current_board, position), position) for position in possible_positions], key=lambda x: x[0])[1]

def load_q_model(file_path="Connect4QLModel.pickle"):
    with open(file_path, "rb") as file:
        ql_states = pickle.load(file)
    return ql_states

In [6]:
def calculate_score(c4_board, letter, SIAgentLetter, MinMaxLetter):
    score = 0
    OtherPlayerLetter = MinMaxLetter if letter == SIAgentLetter else SIAgentLetter
    rows, cols = c4_board.shape

    for i in range(rows):
        row_array = [int(x) for x in list(c4_board[i,:])]
        col_array = [int(x) for x in list(c4_board[:,i])]
        for j in range(cols-3):
            sub_row = row_array[j:j+4]
            sub_col = col_array[j:j+4]
            if sub_row.count(letter) == 4:
                score += 1000
            elif sub_row.count(letter) == 3 and sub_row.count(0) == 1:
                score += 100
            elif sub_row.count(letter) == 2 and sub_row.count(0) == 2:
                score += 10
            if sub_row.count(OtherPlayerLetter) == 3 and sub_row.count(0) == 1:
                score -= 10
            if sub_col.count(letter) == 4:
                score += 1000
            elif sub_col.count(letter) == 3 and sub_col.count(0) == 1:
                score += 100
            elif sub_col.count(letter) == 2 and sub_col.count(0) == 2:
                score += 10
            if sub_col.count(OtherPlayerLetter) == 3 and sub_col.count(0) == 1:
                score -= 10

    for i in range(rows-3):
        for j in range(cols-3):
            sub_diagonal1 = [c4_board[i+k][j+k] for k in range(4)]
            sub_diagonal2 = [c4_board[i+3-k][j+k] for k in range(4)]
            if sub_diagonal1.count(letter) == 4:
                score += 1000
            elif sub_diagonal1.count(letter) == 3 and sub_diagonal1.count(0) == 1:
                score += 100
            elif sub_diagonal1.count(letter) == 2 and sub_diagonal1.count(0) == 2:
                score += 10
            if sub_diagonal1.count(OtherPlayerLetter) == 3 and sub_diagonal1.count(0) == 1:
                score -= 10
            if sub_diagonal2.count(letter) == 4:
                score += 1000
            elif sub_diagonal2.count(letter) == 3 and sub_diagonal2.count(0) == 1:
                score += 100
            elif sub_diagonal2.count(letter) == 2 and sub_diagonal2.count(0) == 2:
                score += 10
            if sub_diagonal2.count(OtherPlayerLetter) == 3 and sub_diagonal2.count(0) == 1:
                score -= 10

    return score

def min_max_with_alpha_beta_pruning_and_depth(c4_board, current_depth, isMinMaxMove, MinMaxLetter, SIAgentLetter, alpha, beta):

    if val_final_turn(c4_board, SIAgentLetter, MinMaxLetter):

        if val_success(c4_board, MinMaxLetter):
            return (None, 10000000)

        elif val_success(c4_board, SIAgentLetter):
            return (None, -10000000)

        else:
            return (None, 0)

    if current_depth == 0:
        return (None, calculate_score(c4_board, MinMaxLetter, SIAgentLetter, MinMaxLetter))

    possible_positions = get_allowed_moves(c4_board)

    if isMinMaxMove:
        optimisedScore = -math.inf
        optimisedPosition = random.choice(possible_positions)

        for position in possible_positions:
            random_row = get_next_row(c4_board, position)
            new_c4_board = c4_board.copy()
            new_c4_board[random_row][position] = MinMaxLetter
            current_minmax_score = min_max_with_alpha_beta_pruning_and_depth(new_c4_board, current_depth - 1, False, MinMaxLetter, SIAgentLetter, alpha, beta)[1]

            if current_minmax_score > optimisedScore:
                optimisedScore = current_minmax_score
                optimisedPosition = position

            alpha = max(optimisedScore, alpha)

            if alpha >= beta:
                break

        return optimisedPosition, optimisedScore

    else:
        optimisedScore = math.inf
        optimisedPosition = random.choice(possible_positions)

        for position in possible_positions:
            random_row = get_next_row(c4_board, position)
            new_c4_board = c4_board.copy()
            new_c4_board[random_row][position] = MinMaxLetter
            current_minmax_score = min_max_with_alpha_beta_pruning_and_depth(new_c4_board, current_depth - 1, True, MinMaxLetter, SIAgentLetter, alpha, beta)[1]

            if current_minmax_score < optimisedScore:
                optimisedScore = current_minmax_score
                optimisedPosition = position

            beta = min(beta, optimisedScore)

            if alpha >= beta:
                break

        return optimisedPosition, optimisedScore



In [7]:
def play_connect4_game(MinMaxPlaysFirst, qLearningPlayer, minmaxPlayer, c4Game):
    QLearningLetter = 1
    MinMaxLetter = 2
    
    while True:
        if MinMaxPlaysFirst:
            MinMaxPossible_Positions = get_allowed_moves(c4Game)

            if len(MinMaxPossible_Positions) == 0:
                return "Draw"

            minmax_chosen_column, _ = min_max_with_alpha_beta_pruning_and_depth(c4Game, 8, True, MinMaxLetter, QLearningLetter, -math.inf, math.inf)

            minmax_chosen_row = get_next_row(c4Game, minmax_chosen_column)
            c4Game[minmax_chosen_row][minmax_chosen_column] = MinMaxLetter

            if val_success(c4Game, MinMaxLetter):
                return "MinMaxWon"

            if val_success(c4Game, QLearningLetter):
                return "QLearningWon"

            if len(get_allowed_moves(c4Game)) == 0:
                return "Draw"

            QLearningPossible_Positions = get_allowed_moves(c4Game)

            if len(QLearningPossible_Positions) == 0:
                return "Draw"

            QLearning_chosen_column = get_best_action_from_q_values(qLearningPlayer, c4Game, QLearningPossible_Positions)
            QLearning_chosen_row = get_next_row(c4Game, QLearning_chosen_column)
            c4Game[QLearning_chosen_row][QLearning_chosen_column] = QLearningLetter

            if val_success(c4Game, QLearningLetter):
                return "QLearningWon"

            if val_success(c4Game, MinMaxLetter):
                return "MinMaxWon"

            if len(get_allowed_moves(c4Game)) == 0:
                return "Draw"
        else:
            QLearningPossible_Positions = get_allowed_moves(c4Game)

            if len(QLearningPossible_Positions) == 0:
                return "Draw"

            QLearning_chosen_column = get_best_action_from_q_values(qLearningPlayer, c4Game, QLearningPossible_Positions,epsilon=1.0)
            QLearning_chosen_row = get_next_row(c4Game, QLearning_chosen_column)
            c4Game[QLearning_chosen_row][QLearning_chosen_column] = QLearningLetter

            if val_success(c4Game, QLearningLetter):
                return "QLearningWon"

            if val_success(c4Game, MinMaxLetter):
                return "MinMaxWon"

            if len(get_allowed_moves(c4Game)) == 0:
                return "Draw"

            MinMaxPossible_Positions = get_allowed_moves(c4Game)

            if len(MinMaxPossible_Positions) == 0:
                return "Draw"

            minmax_chosen_column, _ = min_max_with_alpha_beta_pruning_and_depth(c4Game, 8, True, MinMaxLetter, QLearningLetter, -math.inf, math.inf)

            minmax_chosen_row = get_next_row(c4Game, minmax_chosen_column)
            c4Game[minmax_chosen_row][minmax_chosen_column] = MinMaxLetter

            if val_success(c4Game, MinMaxLetter):
                return "MinMaxWon"

            if val_success(c4Game, QLearningLetter):
                return "QLearningWon"

            if len(get_allowed_moves(c4Game)) == 0:
                return "Draw"


In [8]:
def play_connect4_game(MinMaxPlaysFirst, qLearningPlayer, MinMaxLetter, QLearningLetter, c4Game, depth=8, epsilon=0.1):
    while True:
        current_player_first = MinMaxPlaysFirst
        for _ in range(2):  # Each loop actually allows both players to play once, order depends on MinMaxPlaysFirst
            if current_player_first:
                # MinMax's turn
                possible_positions = get_allowed_moves(c4Game)
                if not possible_positions:
                    return "Draw"
                chosen_column, _ = min_max_with_alpha_beta_pruning_and_depth(c4Game, depth, True, MinMaxLetter, QLearningLetter, -math.inf, math.inf)
                chosen_row = get_next_row(c4Game, chosen_column)
                c4Game[chosen_row][chosen_column] = MinMaxLetter
                if val_success(c4Game, MinMaxLetter):
                    return "MinMaxWon"
            else:
                # QLearning's turn
                possible_positions = get_allowed_moves(c4Game)
                if not possible_positions:
                    return "Draw"
                chosen_column = get_best_action_from_q_values(qLearningPlayer, c4Game, possible_positions, epsilon)
                chosen_row = get_next_row(c4Game, chosen_column)
                c4Game[chosen_row][chosen_column] = QLearningLetter
                if val_success(c4Game, QLearningLetter):
                    return "QLearningWon"
            
            # Switch players after each move
            current_player_first = not current_player_first

            # Check for draw after each move
            if not get_allowed_moves(c4Game):
                return "Draw"


Fisrt is Random Move between MinMax and Qlearning

In [9]:
import random

def simulate_games(games, rows, columns):
    MinMaxWin = QLearningWin = Draw = 0
    qLearningPlayer = load_q_model()
    print(f"Current Q Learning model has {len(qLearningPlayer)} states")
    
    MinMaxLetter = 2  # MinMax Player
    QLearningLetter = 1  

    for _ in range(games):
        c4Game = original_board(rows, columns)
        
        # Determine who plays first by random choice
        MinMaxPlaysFirst = random.choice([True, False])
        
        # Playing a game of Connect4
        winner = play_connect4_game(MinMaxPlaysFirst, qLearningPlayer, MinMaxLetter, QLearningLetter, c4Game, depth=8, epsilon=0.1)
        if winner == 'QLearningWon':
            QLearningWin += 1
        elif winner == 'MinMaxWon':
            MinMaxWin += 1
        else:
            Draw += 1
    
    print("Results:")
    print(f"MinMax wins: {MinMaxWin}")
    print(f"QLearning wins: {QLearningWin}")
    print(f"Draws: {Draw}")


games = 5
simulate_games(games, 6, 7)


Current Q Learning model has 87890237 states
Results:
MinMax wins: 3
QLearning wins: 1
Draws: 1


Current Q Learning model has 87890237 states
Results:
MinMax wins: 143
QLearning wins: 46
Draws: 11

In [7]:
import random

def simulate_games_minmax(games, rows, columns):
    MinMaxWin = QLearningWin = Draw = 0
    qLearningPlayer = load_q_model()
    print(f"Current Q Learning model has {len(qLearningPlayer)} states")
    
    MinMaxLetter = 2  
    QLearningLetter = 1  

    for _ in range(games):
        c4Game = original_board(rows, columns)
        
        # MinMax always plays first
        MinMaxPlaysFirst = True
        
        
        winner = play_connect4_game(MinMaxPlaysFirst, qLearningPlayer, MinMaxLetter, QLearningLetter, c4Game, depth=8, epsilon=0.1)
        if winner == 'QLearningWon':
            QLearningWin += 1
        elif winner == 'MinMaxWon':
            MinMaxWin += 1
        else:
            Draw += 1
    
    print("Results:")
    print(f"MinMax wins: {MinMaxWin}")
    print(f"QLearning wins: {QLearningWin}")
    print(f"Draws: {Draw}")


games = 200
simulate_games_minmax(games, 6, 7)


Current Q Learning model has 87890237 states
Results:
MinMax wins: 139
QLearning wins: 49
Draws: 12


In [8]:
import random

def simulate_games(games, rows, columns):
    MinMaxWin = QLearningWin = Draw = 0
    qLearningPlayer = load_q_model()
    print(f"Current Q Learning model has {len(qLearningPlayer)} states")
    
    MinMaxLetter = 2  
    QLearningLetter = 1  

    for _ in range(games):
        c4Game = original_board(rows, columns)
        
        # QLearning always plays first
        MinMaxPlaysFirst = False
        
        
        winner = play_connect4_game(MinMaxPlaysFirst, qLearningPlayer, MinMaxLetter, QLearningLetter, c4Game, depth=8, epsilon=0.1)
        if winner == 'QLearningWon':
            QLearningWin += 1
        elif winner == 'MinMaxWon':
            MinMaxWin += 1
        else:
            Draw += 1
    
    print("Results:")
    print(f"MinMax wins: {MinMaxWin}")
    print(f"QLearning wins: {QLearningWin}")
    print(f"Draws: {Draw}")


games = 200
simulate_games(games, 6, 7)


Current Q Learning model has 87890237 states
Results:
MinMax wins: 145
QLearning wins: 47
Draws: 8
