In [1]:
import numpy as np
import random
import pickle

def original_board(rows=6, columns=7):
    return np.zeros((rows, columns))

def val_turn(connect4board, column):
    return connect4board[len(connect4board)-1][column] == 0

def get_next_row(connect4board, column):
    return next((row for row in range(len(connect4board)) if connect4board[row][column] == 0), None)

def get_correct_turn(connect4board):
    return [column for column in range(connect4board.shape[1]) if val_turn(connect4board, column)]

def get_next_pos(connect4board, letter):
    rows, cols = connect4board.shape
    for row, row_vals in enumerate(connect4board):
        for col, col_val in enumerate(row_vals[:-3]):
            if all(elem == letter for elem in row_vals[col:col+4]):
                return row, col
        for col, col_vals in zip(range(cols), (connect4board[r][col] for r in range(row, min(row+4, rows)))):
            if all(elem == letter for elem in col_vals):
                return row, col
        for col, col_vals in enumerate(row_vals[:-3]):
            if row < rows-3 and col < cols-3:
                diag_vals = [connect4board[row+i][col+i] for i in range(4)]
                if all(elem == letter for elem in diag_vals):
                    return row, col
        for col, col_vals in enumerate(row_vals[:-3]):
            if row >= 3 and col < cols-3:
                diag_vals = [connect4board[row-i][col+i] for i in range(4)]
                if all(elem == letter for elem in diag_vals):
                    return row, col
    else:
        return -1, -1

def val_success(connect4board, letter):
    rows, cols = connect4board.shape
    for row in range(rows):
        for col in range(cols - 3):
            if all(connect4board[row][col + i] == letter for i in range(4)):
                return True

    for row in range(rows - 3):
        for col in range(cols):
            if all(connect4board[row + i][col] == letter for i in range(4)):
                return True

    for row in range(rows - 3):
        for col in range(cols - 3):
            if all(connect4board[row + i][col + i] == letter for i in range(4)):
                return True

    for row in range(3, rows):
        for col in range(cols - 3):
            if all(connect4board[row - i][col + i] == letter for i in range(4)):
                return True

    return False

def first_turn():
    choices = [1, 2]
    return random.choice(choices)

def val_final_turn(connect4board, si_agent_letter, minmax_letter):
    return any(val_success(connect4board, letter) for letter in (si_agent_letter, minmax_letter)) or not get_correct_turn(connect4board)

def si_agent_turn( connect4board, si_agent_letter, minmax_letter):
    if val_final_turn(connect4board, si_agent_letter, minmax_letter):
        siagent_row, siagent_col = get_next_pos(connect4board, si_agent_letter)
        if siagent_row != -1:
            return siagent_row, siagent_col
        else:
            minmax_row, minmax_col = get_next_pos(connect4board, minmax_letter)
            if minmax_row != -1:
                return minmax_row, minmax_col
            else:
                possible_positions = get_correct_turn(connect4board)
                random_row = get_next_row(connect4board, random.choice(possible_positions))
                random_col = random.choice(possible_positions)
                return random_row, random_col
    else:
        possible_positions = get_correct_turn(connect4board)
        random_row = get_next_row(connect4board, random.choice(possible_positions)) 
        random_col = random.choice(possible_positions)

        return random_row, random_col

def get_pos(positions):
    return int(''.join([str(int(position)) for position in positions.flatten()]))

def get_q_values_for_action(ql_states, current_connect4board, current_position):
    position = get_pos(current_connect4board)
    if position not in ql_states:
        ql_states[(position, current_position)] = 0
    return ql_states[(position, current_position)]

def get_best_action_from_q_values(ql_states, current_connect4board, possible_positions, epsilon):
    return random.choice(possible_positions) if random.random() < epsilon else max([(get_q_values_for_action(ql_states, current_connect4board, position), position) for position in possible_positions], key=lambda x: x[0])[1]

def update_q_model(ql_states, current_connect4board, current_position, reward, successive_connect4board, possible_positions, alpha=0.1, gamma=0.99):
    best_q_value = max([get_q_values_for_action(ql_states, successive_connect4board, next_position) for next_position in possible_positions], default=0)
    optimised_q_value = get_q_values_for_action(ql_states, current_connect4board, current_position) + alpha * ((reward + gamma * best_q_value) - get_q_values_for_action(ql_states, current_connect4board, current_position))
    position = get_pos(current_connect4board)
    ql_states[(position, current_position)] = optimised_q_value

def update_epsilon(epsilon):
    return max(epsilon * 0.999, 0.1)

'''def save_q_model(ql_states, file_path="Connect4QLModel.pickle"):
    with open(file_path, "wb") as file:
        pickle.dump(ql_states, file)

def train_q_model():
    ql_states = {}
    ql_win = si_agent_win = draw = 0
    ql_learning_letter = 1
    si_agent_letter = 2
    total_episodes = 1000000
    
    for episode in range(total_episodes):
        connect4board = original_board()
        epsilon = 1.0

        while True:
            ql_possible_positions = get_correct_turn(connect4board)

            if len(ql_possible_positions) == 0:
                break

            ql_chosen_column = get_best_action_from_q_values(ql_states, connect4board, ql_possible_positions, epsilon)
            ql_chosen_row = get_next_row(connect4board, ql_chosen_column)
            connect4board[ql_chosen_row][ql_chosen_column] = ql_learning_letter

            if val_success(connect4board, ql_learning_letter):
                ql_win += 1
                update_q_model(ql_states, connect4board, ql_chosen_column, 1, connect4board, [])
                break
            elif val_success(connect4board, si_agent_letter):
                si_agent_win += 1
                update_q_model(ql_states, connect4board, ql_chosen_column, -1, connect4board, [])
                break
            elif len(get_correct_turn(connect4board)) == 0:
                draw += 1
                update_q_model(ql_states, connect4board, ql_chosen_column, 0, connect4board, [])
                break
            else:
                update_q_model(ql_states, connect4board, ql_chosen_column, 0, connect4board, get_correct_turn(connect4board))

            si_agent_chosen_row, si_agent_chosen_column = si_agent_turn(connect4board, connect4board, si_agent_letter, ql_learning_letter)
            connect4board[si_agent_chosen_row][si_agent_chosen_column] = si_agent_letter

            if val_success(connect4board, ql_learning_letter):
                ql_win += 1
                update_q_model(ql_states, connect4board, si_agent_chosen_column, 1, connect4board, [])
                break
            elif val_success(connect4board, si_agent_letter):
                si_agent_win += 1
                update_q_model(ql_states, connect4board, si_agent_chosen_column, -1, connect4board, [])
                break
            elif len(get_correct_turn(connect4board)) == 0:
                draw += 1
                update_q_model(ql_states, connect4board, si_agent_chosen_column, 0, connect4board, [])
                break
            else:
                update_q_model(ql_states, connect4board, si_agent_chosen_column, 0, connect4board, get_correct_turn(connect4board))

            epsilon = update_epsilon(epsilon)

        if episode % 1000 == 0:
            print(f"Episode {episode}: QLearning wins - {ql_win}, SI Agent wins - {si_agent_win}, Draws - {draw}")

    return ql_states, ql_win, si_agent_win, draw, total_episodes

ql_states, ql_win, si_agent_win, draw, total_episodes = train_q_model()

print(f"QLearning wins: {ql_win}")
print(f"SI Agent wins: {si_agent_win}")
print(f"Draws: {draw}")
print(f"Total Episodes: {total_episodes}")

save_q_model(ql_states)'''

'def save_q_model(ql_states, file_path="Connect4QLearningModel.pickle"):\n    with open(file_path, "wb") as file:\n        pickle.dump(ql_states, file)\n\ndef train_q_model():\n    ql_states = {}\n    ql_win = si_agent_win = draw = 0\n    ql_learning_letter = 1\n    si_agent_letter = 2\n    total_episodes = 1000000\n    \n    for episode in range(total_episodes):\n        connect4board = original_board()\n        epsilon = 1.0\n\n        while True:\n            ql_possible_positions = get_correct_turn(connect4board)\n\n            if len(ql_possible_positions) == 0:\n                break\n\n            ql_chosen_column = get_best_action_from_q_values(ql_states, connect4board, ql_possible_positions, epsilon)\n            ql_chosen_row = get_next_row(connect4board, ql_chosen_column)\n            connect4board[ql_chosen_row][ql_chosen_column] = ql_learning_letter\n\n            if val_success(connect4board, ql_learning_letter):\n                ql_win += 1\n                update_q_mode

In [2]:
import random
import numpy as np

def load_q_learning_model(file_path="Connect4QLModel.pickle"):
    try:
        with open(file_path, "rb") as file:
            ql_states = pickle.load(file)
            return ql_states
    except FileNotFoundError:
        print("Error: Could not find the Q-learning model file.")
        return None


def play_connect4(SIAgent_plays_first, connect4_connect4board, si_agent_turn, get_correct_turn, val_success):
    QLearningLetter = 1
    SIAgentLetter = 2

    while True:
        if SIAgent_plays_first:
            
            SIAgentPossible_Positions = get_correct_turn(connect4_connect4board)

            if len(SIAgentPossible_Positions) == 0:
                return "Draw"

            SIAgent_chosen_row, SIAgent_chosen_column = si_agent_turn(connect4_connect4board, SIAgentLetter, QLearningLetter)
            connect4_connect4board[SIAgent_chosen_row][SIAgent_chosen_column] = SIAgentLetter
            
            if val_success(connect4_connect4board, SIAgentLetter): 
                return "SIAgentWon"

            if val_success(connect4_connect4board, QLearningLetter):
                return "QLearningWon"

            if len(get_correct_turn(connect4_connect4board)) == 0:
                return "Draw"
            
            QLearningPossible_Positions = get_correct_turn(connect4_connect4board)
                
            if len(QLearningPossible_Positions) == 0:
                return "Draw"
                
            QLearning_chosen_column = random.choice(QLearningPossible_Positions)
            QLearning_chosen_row = get_next_row(connect4_connect4board, QLearning_chosen_column)
            connect4_connect4board[QLearning_chosen_row][QLearning_chosen_column] = QLearningLetter
            
            if val_success(connect4_connect4board, SIAgentLetter): 
                return "SIAgentWon"

            if val_success(connect4_connect4board, QLearningLetter):
                return "QLearningWon"

            if len(get_correct_turn(connect4_connect4board)) == 0:
                return "Draw"
            
        else:
            QLearningPossible_Positions = get_correct_turn(connect4_connect4board)
                
            if len(QLearningPossible_Positions) == 0:
                return "Draw"
                
            QLearning_chosen_column = random.choice(QLearningPossible_Positions)
            QLearning_chosen_row = get_next_row(connect4_connect4board, QLearning_chosen_column)
            connect4_connect4board[QLearning_chosen_row][QLearning_chosen_column] = QLearningLetter
            
            if val_success(connect4_connect4board, SIAgentLetter): 
                return "SIAgentWon"

            if val_success(connect4_connect4board, QLearningLetter):
                return "QLearningWon"

            if len(get_correct_turn(connect4_connect4board)) == 0:
                return "Draw"


            SIAgentPossible_Positions = get_correct_turn(connect4_connect4board)

            if len(SIAgentPossible_Positions) == 0:
                return "Draw"

            SIAgent_chosen_row, SIAgent_chosen_column = si_agent_turn(connect4_connect4board, SIAgentLetter, QLearningLetter)
            connect4_connect4board[SIAgent_chosen_row][SIAgent_chosen_column] = SIAgentLetter
            
            if val_success(connect4_connect4board, SIAgentLetter): 
                return "SIAgentWon"

            if val_success(connect4_connect4board, QLearningLetter):
                return "QLearningWon"

            if len(get_correct_turn(connect4_connect4board)) == 0:
                return "Draw"




In [6]:


games = 5
SIAgentWin = QLearningWin = Draw = 0

si_agent = si_agent_turn
ql_states = load_q_learning_model()

print(f"Current Q Learning model has {len(ql_states)} states")

for _ in (range(games)):
    connect4_connect4board = original_board()
    
    SIAgent_plays_first = False
    if first_turn() == 1:
        SIAgent_plays_first = True
    else:
        SIAgent_plays_first = False
    
    winner = play_connect4(SIAgent_plays_first, connect4_connect4board, si_agent_turn, get_correct_turn, val_success)

    if winner == 'QLearningWon':
        QLearningWin += 1
    elif winner == 'SIAgentWon':
        SIAgentWin += 1
    else:
        Draw += 1


Current Q Learning model has 87890237 states


In [7]:

print("Results:")
print(f"Semi-Intelligent Agent wins: {SIAgentWin}")
print(f"Q-Learning Agent wins: {QLearningWin}")
print(f"Draws: {Draw}")

Results:
Semi-Intelligent Agent wins: 2
Q-Learning Agent wins: 3
Draws: 0


Results:
Semi-Intelligent Agent wins: 698
Q-Learning Agent wins: 1276
Draws: 26

In [14]:
games = 2000
SIAgentWin = QLearningWin = Draw = 0

si_agent = si_agent_turn
ql_states = load_q_learning_model()

print(f"Current Q Learning model has {len(ql_states)} states")

# Set Q-Learning agent to make the first move
SIAgent_plays_first = False

for _ in range(games):
    connect4_connect4board = original_board()
    
    
    winner = play_connect4(SIAgent_plays_first, connect4_connect4board, si_agent_turn, get_correct_turn, val_success)

    if winner == 'QLearningWon':
        QLearningWin += 1
    elif winner == 'SIAgentWon':
        SIAgentWin += 1
    else:
        Draw += 1

print("Results:")
print(f"Semi-Intelligent Agent wins: {SIAgentWin}")
print(f"Q-Learning Agent wins: {QLearningWin}")
print(f"Draws: {Draw}")


Current Q Learning model has 87890237 states
Results:
Semi-Intelligent Agent wins: 645
Q-Learning Agent wins: 1326
Draws: 29


In [15]:
games = 2000
SIAgentWin = QLearningWin = Draw = 0

si_agent = si_agent_turn
ql_states = load_q_learning_model()

print(f"Current Q Learning model has {len(ql_states)} states")

# Set semi-intelligent agent to make the first move
SIAgent_plays_first = True

for _ in range(games):
    connect4_connect4board = original_board()
    
    
    winner = play_connect4(SIAgent_plays_first, connect4_connect4board, si_agent_turn, get_correct_turn, val_success)

    if winner == 'QLearningWon':
        QLearningWin += 1
    elif winner == 'SIAgentWon':
        SIAgentWin += 1
    else:
        Draw += 1

print("Results:")
print(f"Semi-Intelligent Agent wins: {SIAgentWin}")
print(f"Q-Learning Agent wins: {QLearningWin}")
print(f"Draws: {Draw}")


Current Q Learning model has 87890237 states
Results:
Semi-Intelligent Agent wins: 774
Q-Learning Agent wins: 1198
Draws: 28
