In [1]:
import random
import math
from IPython.display import display
import pandas as pd
import pickle
import numpy as np

In [2]:
def original_board():
    ttt_tictac_board = {
        1: ' ', 2: ' ', 3: ' ',
        4: ' ', 5: ' ', 6: ' ',
        7: ' ', 8: ' ', 9: ' '
    }
    SI_Agent_Letter = 'X'
    MinMax_Letter = 'O'
    return ttt_tictac_board, SI_Agent_Letter, MinMax_Letter

def show_board(tictac_board,agent):
    print("\n")
    print(f"{agent}'s turn:")
    for row in range(3):
        for col in range(3):
            cell = row * 3 + col + 1
            print(tictac_board[cell], end="")
            if col < 2:
                print(" | ", end="")
        print()
        if row < 2:
            print("---------")
    print()

def first_turn():
    choices = [1, 2]  # Assuming 1 for one player and 2 for another player
    return random.choice(choices)

def val_turn(tictac_board, move):
    if move in tictac_board:
        return tictac_board[move] == ' '
    else:
        return False  # This handles the case where the move is not a valid key in the tictac_board dictionary


def val_draw(tictac_board):
    return ' ' not in tictac_board.values() and not val_success(tictac_board)

def val_success(tictac_board):
    win_combinations = [
        (1, 2, 3), (4, 5, 6), (7, 8, 9),
        (1, 4, 7), (2, 5, 8), (3, 6, 9),
        (1, 5, 9), (7, 5, 3)
    ]

    for combo in win_combinations:
        if tictac_board[combo[0]] == tictac_board[combo[1]] == tictac_board[combo[2]] != ' ':
            return True

    return False

def val_success_for_letter(tictac_board, mark):
    winning_positions = [
        (1, 2, 3), (4, 5, 6), (7, 8, 9),
        (1, 4, 7), (2, 5, 8), (3, 6, 9),
        (1, 5, 9), (7, 5, 3)
    ]

    for pos in winning_positions:
        if all(tictac_board[i] == mark for i in pos):
            return True
    return False

def get_random_turns(tictac_board):
    position = random.randint(1, 9)
    if val_turn(tictac_board, position):
        return position
    else:
        return get_random_turns(tictac_board)


In [3]:
epsilon = 1.0
q_learning_states = {}

def get_pos(current_tictac_board):
    return tuple(tuple(current_tictac_board[i+j] for j in range(3)) for i in range(1, 10, 3))


def get_q_values_for_action(current_tictac_board, current_position):
    position = get_pos(current_tictac_board)
    if position not in q_learning_states:
        q_learning_states[position] = np.zeros(9)
    return q_learning_states[position][current_position - 1]

def get_best_action_from_q_values(current_tictac_board, possible_positions):
    global epsilon
    if random.random() < epsilon:
        return random.choice(possible_positions)
    else:
        return max(possible_positions, key=lambda x: get_q_values_for_action(current_tictac_board, x))
def load_q_values(filename="TicTacToeQL_Model3.pickle"):
    global q_learning_states
    with open(filename, "rb") as file:
        q_learning_states = pickle.load(file)

In [4]:
import math
import random


import random

def min_max_with_alpha_beta_pruning(ttt_game, MinMax_Letter, QLearning_Letter, randomness=0.1):
    """Apply MinMax strategy with a chance to make a random move."""
    if random.random() < randomness:
        # Random Move
        positions = [pos for pos in range(1, 10) if ttt_game[pos] == ' ']
        return random.choice(positions) if positions else None

    optimised_score = -float('inf')
    optimised_position = None

    for possible_position in range(1, 10):
        if ttt_game[possible_position] == ' ':
            ttt_game[possible_position] = MinMax_Letter
            current_score = evaluate_min_max_score(
                ttt_game, MinMax_Letter, QLearning_Letter, False, -float('inf'), float('inf'))
            ttt_game[possible_position] = ' '

            if current_score > optimised_score:
                optimised_score = current_score
                optimised_position = possible_position

    return optimised_position if optimised_position is not None else get_random_turns(ttt_game)


def evaluate_min_max_score(ttt_game, MinMax_Letter, QLearning_Letter, is_min_max_move, alpha, beta):
    if val_success_for_letter(ttt_game, MinMax_Letter):
        return 1
    elif val_success_for_letter(ttt_game, QLearning_Letter):
        return -1
    elif val_draw(ttt_game):
        return 0

    if is_min_max_move:
        optimised_score = -math.inf

        for possible_position in ttt_game.keys():

            if ttt_game[possible_position] == ' ':
                ttt_game[possible_position] = MinMax_Letter
                current_score = evaluate_min_max_score(ttt_game, MinMax_Letter, QLearning_Letter, False, alpha, beta)
                ttt_game[possible_position] = ' '

                optimised_score = max(optimised_score, current_score)
                alpha = max(alpha, optimised_score)

                if alpha >= beta:
                    break

        return optimised_score

    else:
        optimised_score = math.inf

        for possible_position in ttt_game.keys():
            if ttt_game[possible_position] == ' ':
                ttt_game[possible_position] = QLearning_Letter
                current_score = evaluate_min_max_score(ttt_game, MinMax_Letter, QLearning_Letter, True, alpha, beta)
                ttt_game[possible_position] = ' '

                optimised_score = min(optimised_score, current_score)
                beta = min(beta, optimised_score)

                if alpha >= beta:
                    break

        return optimised_score


def get_best_action_from_q_values_defensive(current_tictac_board, possible_positions, QLearning_Letter):
    """Prioritize blocking opponent from winning over picking the best Q-value move."""
    for position in possible_positions:
        current_tictac_board_copy = current_tictac_board.copy()
        current_tictac_board_copy[position] = QLearning_Letter
        if val_success_for_letter(current_tictac_board_copy, QLearning_Letter):
            return position  # Prioritize immediate win

    # Block opponent win
    for position in possible_positions:
        current_tictac_board_copy = current_tictac_board.copy()
        current_tictac_board_copy[position] = 'O'  # Assuming opponent is 'O'
        if val_success_for_letter(current_tictac_board_copy, 'O'):
            return position  # Block opponent winning move

    # Fallback to Q-learning decision
    return max(possible_positions, key=lambda x: get_q_values_for_action(current_tictac_board, x), default=None)


def play_tic_tac_toe(MinMaxPlaysFirst, QLearning, MinMax, ttt_game):
        MinMaxLetter = 'O'
        QLearning_Letter = 'X'

        while True:
            if MinMaxPlaysFirst:
                
                MinMaxPossible_Positions = [i for i in range(1, 10) if ttt_game.validateMove(i)]

                if len(MinMaxPossible_Positions) == 0:
                    return "Draw"
            
                MinMaxPosition = MinMax.min_max_with_alpha_beta_pruning(ttt_game, MinMaxLetter, QLearning_Letter)
                
                if ttt_game.validateMove(MinMaxPosition):
                    ttt_game.ttt_tictac_board[MinMaxPosition] = MinMaxLetter
      
                if ttt_game.validateWinForLetter(MinMaxLetter) : 
                    return "MinMaxWon"

                if ttt_game.validateDraw():
                    return "Draw"

                QLearningPossible_Positions = [i for i in range(1, 10) if ttt_game.validateMove(i)]

                if len(QLearningPossible_Positions) == 0:
                    break

                QLearningPosition = QLearning.getBestPositionFromQLearning(ttt_game.ttt_tictac_board, QLearningPossible_Positions)

                if ttt_game.validateMove(QLearningPosition):
                    ttt_game.ttt_tictac_board[QLearningPosition] = QLearning_Letter

                if ttt_game.validateWinForLetter(QLearning_Letter) : 
                    return "QLearningWon"

                if ttt_game.validateDraw():
                    return "Draw"

            else:
                QLearningPossible_Positions = [i for i in range(1, 10) if ttt_game.validateMove(i)]

                if len(QLearningPossible_Positions) == 0:
                    break

                QLearningPosition = QLearning.getBestPositionFromQLearning(ttt_game.ttt_tictac_board, QLearningPossible_Positions)

                if ttt_game.validateMove(QLearningPosition):
                    ttt_game.ttt_tictac_board[QLearningPosition] = QLearning_Letter

                if ttt_game.validateWinForLetter(QLearning_Letter) : 
                    return "QLearningWon"

                if ttt_game.validateDraw():
                    return "Draw"


                MinMaxPossible_Positions = [i for i in range(1, 10) if ttt_game.validateMove(i)]

                if len(MinMaxPossible_Positions) == 0:
                    return "Draw"
            
                MinMaxPosition = MinMax.min_max_with_alpha_beta_pruning(ttt_game, MinMaxLetter, QLearning_Letter)
                
                if ttt_game.validateMove(MinMaxPosition):
                    ttt_game.ttt_tictac_board[MinMaxPosition] = MinMaxLetter
      
                if ttt_game.validateWinForLetter(MinMaxLetter) : 
                    return "MinMaxWon"

                if ttt_game.validateDraw():
                    return "Draw"


First move is Random

In [9]:
def run_tic_tac_toe_games(num_games, epsilon=0.8, learning_rate=0.1, randomness=0.2):
    MinMaxWin = QLearningWin = Draw = 0
    
    for _ in range(num_games):
        ttt_tictac_board, _, _ = original_board()
        MinMaxPlaysFirst = first_turn() == 1

        game_in_progress = True
        while game_in_progress:
           
            if MinMaxPlaysFirst:
                show_board(ttt_tictac_board, "MinMax")
                position = min_max_with_alpha_beta_pruning(ttt_tictac_board, 'O', 'X', randomness)
                if position is None:
                    break
                ttt_tictac_board[position] = 'O'
                if val_success_for_letter(ttt_tictac_board, 'O'):
                    MinMaxWin += 1
                    break
                elif val_draw(ttt_tictac_board):
                    Draw += 1
                    break
            else:
                show_board(ttt_tictac_board, "QLearning")
                position = get_best_action_from_q_values_defensive(ttt_tictac_board, [i for i in range(1, 10) if val_turn(ttt_tictac_board, i)], 'X')
                if position is None:
                    break
                ttt_tictac_board[position] = 'X'
                if val_success_for_letter(ttt_tictac_board, 'X'):
                    QLearningWin += 1
                    break
                elif val_draw(ttt_tictac_board):
                    Draw += 1
                    break

            MinMaxPlaysFirst = not MinMaxPlaysFirst

    return MinMaxWin, QLearningWin, Draw


num_games = 2000
MinMaxWin, QLearningWin, Draw = run_tic_tac_toe_games(num_games)
print(f"Results after {num_games} games:")
print(f"MinMax wins: {MinMaxWin}")
print(f"QLearning wins: {QLearningWin}")
print(f"Draws: {Draw}")




QLearning's turn:
  |   |  
---------
  |   |  
---------
  |   |  



MinMax's turn:
X |   |  
---------
  |   |  
---------
  |   |  



QLearning's turn:
X |   |  
---------
  | O |  
---------
  |   |  



MinMax's turn:
X | X |  
---------
  | O |  
---------
  |   |  



QLearning's turn:
X | X | O
---------
  | O |  
---------
  |   |  



MinMax's turn:
X | X | O
---------
  | O |  
---------
X |   |  



QLearning's turn:
X | X | O
---------
O | O |  
---------
X |   |  



MinMax's turn:
X | X | O
---------
O | O | X
---------
X |   |  



QLearning's turn:
X | X | O
---------
O | O | X
---------
X | O |  



QLearning's turn:
  |   |  
---------
  |   |  
---------
  |   |  



MinMax's turn:
X |   |  
---------
  |   |  
---------
  |   |  



QLearning's turn:
X |   |  
---------
  | O |  
---------
  |   |  



MinMax's turn:
X | X |  
---------
  | O |  
---------
  |   |  



QLearning's turn:
X | X | O
---------
  | O |  
---------
  |   |  



MinMax's turn:
X | X |

Minmax goes first

In [10]:
def run_tic_tac_toe_games_minmax(num_games, epsilon=0.8, learning_rate=0.1, randomness=0.2):
    MinMaxWin = QLearningWin = Draw = 0
    
    for _ in range(num_games):
        ttt_tictac_board, _, _ = original_board()
        MinMaxPlaysFirst = True  # MinMax always starts first

        while True:  
            # MinMax agent's move
            show_board(ttt_tictac_board, "MinMax")
            position = min_max_with_alpha_beta_pruning(ttt_tictac_board, 'O', 'X', randomness)
            if position is None:
                if val_draw(ttt_tictac_board):  
                    Draw += 1
                break
            ttt_tictac_board[position] = 'O'
            if val_success_for_letter(ttt_tictac_board, 'O'):
                MinMaxWin += 1
                break
            elif val_draw(ttt_tictac_board):
                Draw += 1
                break

            show_board(ttt_tictac_board, "QLearning")
            # Q-learning agent's move
            QLearningPositions = [i for i in range(1, 10) if val_turn(ttt_tictac_board, i)]
            if not QLearningPositions:
                if val_draw(ttt_tictac_board):
                    Draw += 1
                break
            position = get_best_action_from_q_values_defensive(ttt_tictac_board, QLearningPositions, 'X')
            ttt_tictac_board[position] = 'X'
            if val_success_for_letter(ttt_tictac_board, 'X'):
                QLearningWin += 1
                break
            elif val_draw(ttt_tictac_board):
                Draw += 1
                break

    return MinMaxWin, QLearningWin, Draw

def get_best_action_from_q_values_defensive(current_tictac_board, possible_positions, QLearning_Letter):
    """Prioritize blocking opponent from winning over picking the best Q-value move."""
    for position in possible_positions:
        current_tictac_board_copy = current_tictac_board.copy()
        current_tictac_board_copy[position] = QLearning_Letter
        if val_success_for_letter(current_tictac_board_copy, QLearning_Letter):
            return position  # Prioritize immediate win

    # Block opponent win
    for position in possible_positions:
        current_tictac_board_copy = current_tictac_board.copy()
        current_tictac_board_copy[position] = 'O'  # Assuming opponent is 'O'
        if val_success_for_letter(current_tictac_board_copy, 'O'):
            return position  # Block opponent winning move

    # Fallback to Q-learning decision
    return max(possible_positions, key=lambda x: get_q_values_for_action(current_tictac_board, x), default=None)

num_games = 2000
MinMaxWin, QLearningWin, Draw = run_tic_tac_toe_games_minmax(num_games)
print(f"Results after {num_games} games:")
print(f"MinMax wins: {MinMaxWin}")
print(f"QLearning wins: {QLearningWin}")
print(f"Draws: {Draw}")




MinMax's turn:
  |   |  
---------
  |   |  
---------
  |   |  



QLearning's turn:
O |   |  
---------
  |   |  
---------
  |   |  



MinMax's turn:
O | X |  
---------
  |   |  
---------
  |   |  



QLearning's turn:
O | X |  
---------
O |   |  
---------
  |   |  



MinMax's turn:
O | X |  
---------
O |   |  
---------
X |   |  



QLearning's turn:
O | X |  
---------
O | O |  
---------
X |   |  



MinMax's turn:
O | X |  
---------
O | O | X
---------
X |   |  



MinMax's turn:
  |   |  
---------
  |   |  
---------
  |   |  



QLearning's turn:
O |   |  
---------
  |   |  
---------
  |   |  



MinMax's turn:
O | X |  
---------
  |   |  
---------
  |   |  



QLearning's turn:
O | X |  
---------
O |   |  
---------
  |   |  



MinMax's turn:
O | X |  
---------
O |   |  
---------
X |   |  



QLearning's turn:
O | X | O
---------
O |   |  
---------
X |   |  



MinMax's turn:
O | X | O
---------
O | X |  
---------
X |   |  



QLearning's turn:
O | X | O


QLearning goes First

In [5]:
def run_tic_tac_toe_games_q_learning_starts(num_games, epsilon=0.8, learning_rate=0.1, randomness=0.2):
    MinMaxWin = QLearningWin = Draw = 0
    
    for _ in range(num_games):
        ttt_tictac_board, _, _ = original_board()
        QLearningPlaysFirst = True  # Q-learning always starts first

        while True:
            show_board(ttt_tictac_board, "QLearning")
            # Q-learning agent's move
            QLearningPositions = [i for i in range(1, 10) if val_turn(ttt_tictac_board, i)]
            if not QLearningPositions:
                if val_draw(ttt_tictac_board):
                    Draw += 1
                break
            position = get_best_action_from_q_values_defensive(ttt_tictac_board, QLearningPositions, 'X')
            ttt_tictac_board[position] = 'X'
            if val_success_for_letter(ttt_tictac_board, 'X'):
                QLearningWin += 1
                break
            elif val_draw(ttt_tictac_board):
                Draw += 1
                break

            # MinMax agent's move
            show_board(ttt_tictac_board, "MinMax")
            MinMaxPositions = [i for i in range(1, 10) if val_turn(ttt_tictac_board, i)]
            if not MinMaxPositions:
                if val_draw(ttt_tictac_board):
                    Draw += 1
                break
            position = min_max_with_alpha_beta_pruning(ttt_tictac_board, 'O', 'X', randomness)
            ttt_tictac_board[position] = 'O'
            if val_success_for_letter(ttt_tictac_board, 'O'):
                MinMaxWin += 1
                break
            elif val_draw(ttt_tictac_board):
                Draw += 1
                break

    return MinMaxWin, QLearningWin, Draw

def get_best_action_from_q_values_defensive(current_tictac_board, possible_positions, QLearning_Letter):
    """Prioritize blocking opponent from winning over picking the best Q-value move."""
    for position in possible_positions:
        current_tictac_board_copy = current_tictac_board.copy()
        current_tictac_board_copy[position] = QLearning_Letter
        if val_success_for_letter(current_tictac_board_copy, QLearning_Letter):
            return position  # Prioritize immediate win

    # Block opponent win
    for position in possible_positions:
        current_tictac_board_copy = current_tictac_board.copy()
        current_tictac_board_copy[position] = 'O'  # Assuming opponent is 'O'
        if val_success_for_letter(current_tictac_board_copy, 'O'):
            return position  # Block opponent winning move

    # Fallback to Q-learning decision
    return max(possible_positions, key=lambda x: get_q_values_for_action(current_tictac_board, x), default=None)

def min_max_with_alpha_beta_pruning(tictac_board, MinMax_Letter, QLearning_Letter, randomness=0.2):
    """Simulate MinMax move with a chance for random decision to simulate errors."""
    if random.random() < randomness:  # Introduce some randomness to the MinMax decision
        return random.choice([pos for pos in range(1, 10) if tictac_board[pos] == ' '])
    # MinMax logic here
    # Placeholder for real MinMax algorithm
    return random.choice([pos for pos in range(1, 10) if tictac_board[pos] == ' '])

num_games = 10
MinMaxWin, QLearningWin, Draw = run_tic_tac_toe_games_q_learning_starts(num_games)
print(f"Results after {num_games} games:")
print(f"MinMax wins: {MinMaxWin}")
print(f"QLearning wins: {QLearningWin}")
print(f"Draws: {Draw}")




QLearning's turn:
  |   |  
---------
  |   |  
---------
  |   |  



MinMax's turn:
X |   |  
---------
  |   |  
---------
  |   |  



QLearning's turn:
X |   |  
---------
O |   |  
---------
  |   |  



MinMax's turn:
X | X |  
---------
O |   |  
---------
  |   |  



QLearning's turn:
X | X |  
---------
O |   | O
---------
  |   |  



QLearning's turn:
  |   |  
---------
  |   |  
---------
  |   |  



MinMax's turn:
X |   |  
---------
  |   |  
---------
  |   |  



QLearning's turn:
X |   |  
---------
  | O |  
---------
  |   |  



MinMax's turn:
X | X |  
---------
  | O |  
---------
  |   |  



QLearning's turn:
X | X |  
---------
  | O |  
---------
  | O |  



QLearning's turn:
  |   |  
---------
  |   |  
---------
  |   |  



MinMax's turn:
X |   |  
---------
  |   |  
---------
  |   |  



QLearning's turn:
X |   |  
---------
O |   |  
---------
  |   |  



MinMax's turn:
X | X |  
---------
O |   |  
---------
  |   |  



QLearning's turn:
X | 