In [2]:
import random
import math
from IPython.display import display
import pandas as pd
import pickle
import numpy as np

In [3]:
import random

def original_tictac_board():
    return {
        1: ' ', 2: ' ', 3: ' ',
        4: ' ', 5: ' ', 6: ' ',
        7: ' ', 8: ' ', 9: ' '
    }

def show_tictac_board(tictac_board, agent):

    print("\n")
    print(f"{agent}'s turn:")
    for row in range(3):
        for col in range(3):
            cell = row * 3 + col + 1
            print(tictac_board[cell], end="")
            if col < 2:
                print(" | ", end="")
        print()
        if row < 2:
            print("---------")
    print()

def first_turn():
    return random.choice([1, 2])

def val_turn(tictac_board, move):
    return tictac_board[move] == ' '

def val_draw(tictac_board):
    return all(value != ' ' for value in tictac_board.values())

def val_success(tictac_board):
    win_combinations = [
        (1, 2, 3), (4, 5, 6), (7, 8, 9), 
        (1, 4, 7), (2, 5, 8), (3, 6, 9),  
        (1, 5, 9), (7, 5, 3)
    ]

    for combo in win_combinations:
        if tictac_board[combo[0]] == tictac_board[combo[1]] == tictac_board[combo[2]] != ' ':
            return True
    return False

def val_success_for_letter(tictac_board, mark):
    winning_positions = [
        (1, 2, 3), (4, 5, 6), (7, 8, 9),
        (1, 4, 7), (2, 5, 8), (3, 6, 9),
        (1, 5, 9), (7, 5, 3)
    ]
    for pos in winning_positions:
        if all(tictac_board[i] == mark for i in pos):
            return True
    return False



In [4]:
import random
import numpy as np
import pickle
import os


epsilon = 1.0
q_learning_states = {}

def get_pos(current_tictac_board):
    return tuple(tuple(current_tictac_board[i+j] for j in range(3)) for i in range(1, 10, 3))

def get_q_values_for_action(current_tictac_board, current_position):
    position = get_pos(current_tictac_board)
    if position not in q_learning_states:
        q_learning_states[position] = np.zeros(9)
    return q_learning_states[position][current_position - 1]

def get_best_action_from_q_values(current_tictac_board, possible_positions):
    global epsilon
    if random.random() < epsilon:
        return random.choice(possible_positions)
    else:
        return max(possible_positions, key=lambda x: get_q_values_for_action(current_tictac_board, x))


def update_q_values(current_tictac_board, current_position, reward, successive_tictac_board, possible_positions):
    best_q_value = max([get_q_values_for_action(successive_tictac_board, pos) for pos in possible_positions], default=0)
    current_q_value = get_q_values_for_action(current_tictac_board, current_position)
    optimised_q_value = current_q_value + 0.1 * ((reward + 0.99 * best_q_value) - current_q_value)
    
    # Update the Q-values for all symmetrical states
    add_symmetrical_states(current_tictac_board, np.full(9, optimised_q_value))  

    # Normal update for current state
    position = get_pos(current_tictac_board)
    q_learning_states[position][current_position - 1] = optimised_q_value

def update_epsilon_decay(current_episode, total_episodes):
    global epsilon
    initial_epsilon=1.0
    min_epsilon = 0.1
    decay_rate = 0.001  
    #epsilon = max(min_epsilon, min(1, 1 - decay_rate * (current_episode / total_episodes)))
    epsilon = max(min_epsilon, initial_epsilon - (initial_epsilon - min_epsilon) * (current_episode / total_episodes))


def save_q_values(filename="TicTacToeQL_Model3.pickle"):
    try:
        with open(filename, "wb") as file:
            pickle.dump(q_learning_states, file)
        print(f"Model successfully saved to {os.path.abspath(filename)}")
    except Exception as e:
        print(f"Failed to save the model: {e}")

def load_q_values(filename="TicTacToeQL_Model3.pickle"):
    global q_learning_states
    with open(filename, "rb") as file:
        q_learning_states = pickle.load(file)



def train_q_learning(total_episodes=300000):
    global q_learning_states
    q_learning_win = si_agent_win = draw = 0

    for episode in range(total_episodes):
        current_tictac_board = original_tictac_board()

        while True:
            q_learning_possible_positions = [i for i in range(1, 10) if val_turn(current_tictac_board, i)]

            if not q_learning_possible_positions:
                break

            q_learning_position = get_best_action_from_q_values(current_tictac_board, q_learning_possible_positions)

            if val_turn(current_tictac_board, q_learning_position):
                current_tictac_board[q_learning_position] = 'X'

            if val_success_for_letter(current_tictac_board, 'X'):
                q_learning_win += 1
                update_q_values(current_tictac_board, q_learning_position, 1, {}, [])
                break

            if val_draw(current_tictac_board):
                draw += 1
                update_q_values(current_tictac_board, q_learning_position, 0, {}, [])
                break

            
            si_agent_possible_positions = [i for i in range(1, 10) if val_turn(current_tictac_board, i)]
            if si_agent_possible_positions:
                si_agent_position = random.choice(si_agent_possible_positions)
                current_tictac_board[si_agent_position] = 'O'

                if val_success_for_letter(current_tictac_board, 'O'):
                    si_agent_win += 1
                    update_q_values(current_tictac_board, q_learning_position, -1, {}, [])
                    break

                if val_draw(current_tictac_board):
                    draw += 1
                    update_q_values(current_tictac_board, q_learning_position, 0, {}, [])
                    break

        update_epsilon_decay(episode, total_episodes)

    print(f"QLearning Wins: {q_learning_win}, SIAgent Wins: {si_agent_win}, Draws: {draw}")
    return q_learning_win, si_agent_win, draw, total_episodes


def play_tic_tac_toe(si_agent_plays_first=True):
    current_tictac_board = original_tictac_board()
    show_tictac_board(current_tictac_board)

    while True:
        if si_agent_plays_first:
            si_agent_possible_positions = [i for i in range(1, 10) if val_turn(current_tictac_board, i)]
            if not si_agent_possible_positions:
                return "Draw"

            si_agent_position = random.choice(si_agent_possible_positions)
            current_tictac_board[si_agent_position] = 'O'
            show_tictac_board(current_tictac_board)

            if val_success_for_letter(current_tictac_board, 'O'):
                return "SIAgentWon"

            if val_draw(current_tictac_board):
                return "Draw"

        q_learning_possible_positions = [i for i in range(1, 10) if val_turn(current_tictac_board, i)]
        if not q_learning_possible_positions:
            return "Draw"

        q_learning_position = get_best_action_from_q_values(current_tictac_board, q_learning_possible_positions)
        current_tictac_board[q_learning_position] = 'X'
        show_tictac_board(current_tictac_board)

        if val_success_for_letter(current_tictac_board, 'X'):
            return "QLearningWon"

        if val_draw(current_tictac_board):
            return "Draw"

        si_agent_plays_first = not si_agent_plays_first  


def rotate_tictac_board(tictac_board):
    
    new_tictac_board = {}
    new_tictac_board[1] = tictac_board[7]
    new_tictac_board[2] = tictac_board[4]
    new_tictac_board[3] = tictac_board[1]
    new_tictac_board[4] = tictac_board[8]
    new_tictac_board[5] = tictac_board[5]
    new_tictac_board[6] = tictac_board[2]
    new_tictac_board[7] = tictac_board[9]
    new_tictac_board[8] = tictac_board[6]
    new_tictac_board[9] = tictac_board[3]
    return new_tictac_board

def flip_tictac_board(tictac_board):
    
    new_tictac_board = {}
    new_tictac_board[1] = tictac_board[3]
    new_tictac_board[2] = tictac_board[2]
    new_tictac_board[3] = tictac_board[1]
    new_tictac_board[4] = tictac_board[6]
    new_tictac_board[5] = tictac_board[5]
    new_tictac_board[6] = tictac_board[4]
    new_tictac_board[7] = tictac_board[9]
    new_tictac_board[8] = tictac_board[8]
    new_tictac_board[9] = tictac_board[7]
    return new_tictac_board

def add_symmetrical_states(tictac_board_state, q_value):
    
    states = [tictac_board_state, rotate_tictac_board(tictac_board_state), rotate_tictac_board(rotate_tictac_board(tictac_board_state)), rotate_tictac_board(rotate_tictac_board(rotate_tictac_board(tictac_board_state))), flip_tictac_board(tictac_board_state)]
    for state in states:
        position = get_pos(state)
        if position not in q_learning_states:
            q_learning_states[position] = np.zeros(9)
        
        q_learning_states[position] = q_value



def main():
    total_episodes = 300000  
    print("Starting Q-learning training...")
    #train_q_learning(total_episodes)
    print("Training complete.")

    
    print("Saving the trained model...")
    #save_q_values()

if __name__ == "__main__":
    main()


Starting Q-learning training...
Training complete.
Saving the trained model...


Starting Q-learning training...
QLearning Wins: 192555, SIAgent Wins: 76859, Draws: 30586
Training complete.
Saving the trained model...
Model successfully saved to d:\ML_TCD\AI\TicTacToeQLearningModel3.pickle

first is random

In [5]:
import random

def simulate_game(si_agent_plays_first):

    current_tictac_board = original_tictac_board()
    while True:
        current_player = 'O' if si_agent_plays_first else 'X'
        possible_positions = [i for i in range(1, 10) if val_turn(current_tictac_board, i)]
        
        if not possible_positions:
            if val_draw(current_tictac_board):
                return "Draw"
            return "GameIncomplete"

        if current_player == 'O':
            show_tictac_board(current_tictac_board, "SemiIntelligent")
            position = random.choice(possible_positions)
            current_tictac_board[position] = 'O'
            if val_success_for_letter(current_tictac_board, 'O'):
                return "SIAgentWon"
        else:
            show_tictac_board(current_tictac_board, "Q-Learning")
            position = get_best_action_from_q_values(current_tictac_board, possible_positions)
            current_tictac_board[position] = 'X'
            if val_success_for_letter(current_tictac_board, 'X'):
                return "QLearningWon"

        # Check for draw after the move
        if not any(val_turn(current_tictac_board, i) for i in range(1, 10)):
            if val_draw(current_tictac_board):
                return "Draw"

        si_agent_plays_first = not si_agent_plays_first  

    return "GameIncomplete"


In [11]:
games = 10
si_agent_win = q_learning_win = draw = 0

load_q_values()  
print(f"Current Q Learning model has {len(q_learning_states)} states")

for _ in range(games):
    si_agent_plays_first = random.choice([True, False])
    winner = simulate_game(si_agent_plays_first)

    if winner == 'QLearningWon':
        q_learning_win += 1
    elif winner == 'SIAgentWon':
        si_agent_win += 1
    elif winner == 'Draw':
        draw += 1

print(f"QLearning Wins: {q_learning_win}, SIAgent Wins: {si_agent_win}, Draws: {draw}")


Current Q Learning model has 3381 states


SemiIntelligent's turn:
  |   |  
---------
  |   |  
---------
  |   |  



Q-Learning's turn:
  |   |  
---------
  | O |  
---------
  |   |  



SemiIntelligent's turn:
  |   |  
---------
  | O | X
---------
  |   |  



Q-Learning's turn:
  |   |  
---------
  | O | X
---------
O |   |  



SemiIntelligent's turn:
X |   |  
---------
  | O | X
---------
O |   |  



Q-Learning's turn:
  |   |  
---------
  |   |  
---------
  |   |  



SemiIntelligent's turn:
  |   |  
---------
  |   | X
---------
  |   |  



Q-Learning's turn:
  |   |  
---------
  | O | X
---------
  |   |  



SemiIntelligent's turn:
X |   |  
---------
  | O | X
---------
  |   |  



Q-Learning's turn:
X |   |  
---------
  | O | X
---------
  |   | O



SemiIntelligent's turn:
X |   |  
---------
  | O | X
---------
  | X | O



Q-Learning's turn:
X | O |  
---------
  | O | X
---------
  | X | O



SemiIntelligent's turn:
X | O | X
---------
  | O | X
---------

first move is semi intelligent

In [9]:
import random

def find_winning_move_si(tictac_board, player):
    """Find a move that can win the game for the given player."""
    for position in range(1, 10):
        if val_turn(tictac_board, position):
            tictac_board_copy = tictac_board.copy()
            tictac_board_copy[position] = player
            if val_success_for_letter(tictac_board_copy, player):
                return position
    return None

def simulate_game_si(si_agent_plays_first):
    current_tictac_board = original_tictac_board()
    current_player = 'O' if si_agent_plays_first else 'X'  # SI agent plays first
    while True:
        possible_positions = [i for i in range(1, 10) if val_turn(current_tictac_board, i)]
        
        if not possible_positions:
            if val_draw(current_tictac_board):
                return "Draw"
            return "GameIncomplete"

        if current_player == 'O':  
            # First, check if a win is possible on this move.
            show_tictac_board(current_tictac_board, "SemiIntelligent")
            win_move = find_winning_move_si(current_tictac_board, 'O')
            if win_move:
                current_tictac_board[win_move] = 'O'
            else:
                # No immediate win, check if it needs to block the opponent's winning move
                block_move = find_winning_move_si(current_tictac_board, 'X')
                if block_move:
                    current_tictac_board[block_move] = 'O'
                else:
                    # Neither win nor block is possible, choose a strategic position
                    position = random.choice(possible_positions)
                    current_tictac_board[position] = 'O'

            if val_success_for_letter(current_tictac_board, 'O'):
                return "SIAgentWon"
        else:  # Q-learning agent
            show_tictac_board(current_tictac_board, "Q-Learning")
            position = get_best_action_from_q_values(current_tictac_board, possible_positions)
            current_tictac_board[position] = 'X'
            if val_success_for_letter(current_tictac_board, 'X'):
                return "QLearningWon"

        # Check for draw after the move
        if not any(val_turn(current_tictac_board, i) for i in range(1, 10)):
            if val_draw(current_tictac_board):
                return "Draw"

        current_player = 'X' if current_player == 'O' else 'O'  # Toggle turn

    return "GameIncomplete"


In [12]:
games = 10
si_agent_win = q_learning_win = draw = 0

load_q_values()  
print(f"Current Q Learning model has {len(q_learning_states)} states")

for _ in range(games):
    si_agent_plays_first = random.choice([True, False])
    winner = simulate_game_si(si_agent_plays_first)

    if winner == 'QLearningWon':
        q_learning_win += 1
    elif winner == 'SIAgentWon':
        si_agent_win += 1
    elif winner == 'Draw':
        draw += 1

print(f"QLearning Wins: {q_learning_win}, SIAgent Wins: {si_agent_win}, Draws: {draw}")


Current Q Learning model has 3381 states


SemiIntelligent's turn:
  |   |  
---------
  |   |  
---------
  |   |  



Q-Learning's turn:
  |   |  
---------
  |   |  
---------
  |   | O



SemiIntelligent's turn:
  |   |  
---------
  |   |  
---------
  | X | O



Q-Learning's turn:
O |   |  
---------
  |   |  
---------
  | X | O



SemiIntelligent's turn:
O |   |  
---------
  |   |  
---------
X | X | O



SemiIntelligent's turn:
  |   |  
---------
  |   |  
---------
  |   |  



Q-Learning's turn:
  |   |  
---------
  | O |  
---------
  |   |  



SemiIntelligent's turn:
  |   | X
---------
  | O |  
---------
  |   |  



Q-Learning's turn:
  |   | X
---------
  | O |  
---------
O |   |  



SemiIntelligent's turn:
  |   | X
---------
  | O |  
---------
O | X |  



Q-Learning's turn:
  |   | X
---------
O | O |  
---------
O | X |  



SemiIntelligent's turn:
  | X | X
---------
O | O |  
---------
O | X |  



SemiIntelligent's turn:
  |   |  
---------
  |   |  
----

In [13]:
def simulate_game_qllll(q_learning_plays_first):
    current_tictac_board = original_tictac_board()
    while True:
        current_player = 'X' if q_learning_plays_first else 'O'
        possible_positions = [i for i in range(1, 10) if val_turn(current_tictac_board, i)]
        
        if not possible_positions:
            if val_draw(current_tictac_board):
                return "Draw"
            return "GameIncomplete"

        if current_player == 'X':
            show_tictac_board(current_tictac_board, "Q-Learning")
            position = get_best_action_from_q_values(current_tictac_board, possible_positions)
            current_tictac_board[position] = 'X'
            if val_success_for_letter(current_tictac_board, 'X'):
                return "QLearningWon"
        else:
            show_tictac_board(current_tictac_board, "SemiIntelligent")
            position = random.choice(possible_positions)
            current_tictac_board[position] = 'O'
            if val_success_for_letter(current_tictac_board, 'O'):
                return "SIAgentWon"

        
        if not any(val_turn(current_tictac_board, i) for i in range(1, 10)):
            if val_draw(current_tictac_board):
                return "Draw"

        q_learning_plays_first = not q_learning_plays_first  

    return "GameIncomplete"


In [14]:
games = 10
si_agent_win = q_learning_win = draw = 0

load_q_values()  
print(f"Current Q Learning model has {len(q_learning_states)} states")

for _ in range(games):
    si_agent_plays_first = random.choice([True, False])
    winner = simulate_game_qllll(si_agent_plays_first)

    if winner == 'QLearningWon':
        q_learning_win += 1
    elif winner == 'SIAgentWon':
        si_agent_win += 1
    elif winner == 'Draw':
        draw += 1

print(f"QLearning Wins: {q_learning_win}, SIAgent Wins: {si_agent_win}, Draws: {draw}")


Current Q Learning model has 3381 states


Q-Learning's turn:
  |   |  
---------
  |   |  
---------
  |   |  



SemiIntelligent's turn:
  |   |  
---------
  |   |  
---------
X |   |  



Q-Learning's turn:
  | O |  
---------
  |   |  
---------
X |   |  



SemiIntelligent's turn:
  | O |  
---------
X |   |  
---------
X |   |  



Q-Learning's turn:
O | O |  
---------
X |   |  
---------
X |   |  



SemiIntelligent's turn:
O | O |  
---------
X |   |  
---------
X |   | X



SemiIntelligent's turn:
  |   |  
---------
  |   |  
---------
  |   |  



Q-Learning's turn:
  |   |  
---------
  |   |  
---------
  | O |  



SemiIntelligent's turn:
  |   |  
---------
  | X |  
---------
  | O |  



Q-Learning's turn:
  |   | O
---------
  | X |  
---------
  | O |  



SemiIntelligent's turn:
  |   | O
---------
  | X |  
---------
X | O |  



Q-Learning's turn:
  |   | O
---------
O | X |  
---------
X | O |  



SemiIntelligent's turn:
X |   | O
---------
O | X |  
---------