In [1]:
import pickle
from copy import deepcopy
import ipywidgets as widgets

In [2]:
def load_weights(difficulty):
    weight_files = {
        'easy': 'tic_tac_toe_weights_easy.pkl',
        'medium': 'tic_tac_toe_weights_medium.pkl',
        'hard': 'tic_tac_toe_weights_hard.pkl'
    }
    with open(weight_files[difficulty], 'rb') as f:
        return pickle.load(f)

In [3]:
class TicTacToe:
    def __init__(self):
        self.board = [0] * 9
        self.game_over = False
        self.winner = None

    def move(self, position, player):
        if self.board[position] == 0 and not self.game_over:
            self.board[position] = player
            self.check_game_over(player)

    def check_game_over(self, player):
        winning_positions = [
            [0, 1, 2], [3, 4, 5], [6, 7, 8], 
            [0, 3, 6], [1, 4, 7], [2, 5, 8],  
            [0, 4, 8], [2, 4, 6]            
        ]
        for positions in winning_positions:
            if all(self.board[pos] == player for pos in positions):
                self.game_over = True
                self.winner = player
                return
        if 0 not in self.board:
            self.game_over = True 

    def reset(self):
        self.board = [0] * 9
        self.game_over = False
        self.winner = None


In [4]:
def features(state):
    winning_combinations = [
        [0, 1, 2], [3, 4, 5], [6, 7, 8],  
        [0, 3, 6], [1, 4, 7], [2, 5, 8], 
        [0, 4, 8], [2, 4, 6]             
    ]
    x0 = 1
    x1 = x2 = x3 = x4 = x5 = x6 = 0

    x3 = 1 if state[4] == 1 else 0

    for corner in [0, 2, 6, 8]:
        if state[corner] == 1:
            x4 += 1

    for combo in winning_combinations:
        pieces = [state[i] for i in combo]
        if pieces.count(1) == 2 and pieces.count(0) == 1:
            x1 += 1
        elif pieces.count(-1) == 2 and pieces.count(0) == 1:
            x2 += 1
        elif pieces.count(1) == 1 and pieces.count(0) == 2:
            x5 += 1
        elif pieces.count(1) == 3:
            x6 += 1

    return [x0, x1, x2, x3, x4, x5, x6]

In [5]:
def value(state, weights):
    return sum(f*w for f, w in zip(features(state), weights))

In [6]:
def play_with_agent(game, weights):
    while not game.game_over:
        player_move = int(input("Your move (0-8): "))
        game.move(player_move, -1)
        if game.game_over:
            break

        best_move = None
        best_value = -float('inf')
        for move in range(9):
            if game.board[move] == 0:
                game_copy = deepcopy(game)
                game_copy.move(move, 1)
                move_value = value(game_copy.board, weights)
                if move_value > best_value:
                    best_value = move_value
                    best_move = move
        print(f"Agent moves at {best_move}")
        game.move(best_move, 1)

In [7]:
game = TicTacToe()
print("Starting a new game between two trained agents. Agent 1 is 'X', and Agent 2 is 'O'.")

def print_board(state):
    symbols = {0: " ", 1: "X", -1: "O"}
    print("\nBoard:")
    for i in range(3):
        print("|".join(symbols[state[j]] for j in range(i * 3, (i + 1) * 3)))
        if i < 2:
            print("-----")
def create_board_gui(play_game_with_agent_gui):
    buttons = [widgets.Button(description='', button_style='', layout=widgets.Layout(width='60px', height='60px')) for _ in range(9)]

    difficulty_dropdown = widgets.Dropdown(
        options=['easy', 'medium', 'hard'],
        value='medium',
        description='Difficulty:',
    )
    
    outcome_label = widgets.Label(value="Select difficulty to start the game.")

    for button in buttons:
        button.on_click(lambda b: play_game_with_agent_gui(b, buttons, difficulty_dropdown.value, outcome_label))

    game_board = widgets.GridBox(buttons, layout=widgets.Layout(grid_template_columns="repeat(3, 60px)"))
    display(difficulty_dropdown, game_board, outcome_label)

def agent_move(game, player, weights):
    best_move = None
    best_value = -float('inf') if player == 1 else float('inf')
    for move in range(9):
        if game.board[move] == 0:
            game_copy = deepcopy(game)
            game_copy.move(move, player)
            move_value = value(game_copy.board, weights)
            if (player == 1 and move_value > best_value) or (player == -1 and move_value < best_value):
                best_value = move_value
                best_move = move
    return best_move

def play_game_with_agent_gui(button, buttons, difficulty, outcome_label):
    index = buttons.index(button)
    
    if game.game_over or game.board[index] != 0:
        return
    
    game.move(index, -1)
    buttons[index].description = 'O'
    buttons[index].disabled = True
    
    if game.game_over:
        display_outcome(outcome_label)
        return
    
    global weights
    weights = load_weights(difficulty)
    agent_move_position = agent_move(game, 1, weights)
    game.move(agent_move_position, 1)
    buttons[agent_move_position].description = 'X'
    buttons[agent_move_position].disabled = True
    
    if game.game_over:
        display_outcome(outcome_label)

def display_outcome(outcome_label):
 
    if game.winner is None:
        outcome_label.value = "The game is a draw."
    elif game.winner == 1:
        outcome_label.value = "The agent wins!"
    else:
        outcome_label.value = "Congratulations, you win!"

Starting a new game between two trained agents. Agent 1 is 'X', and Agent 2 is 'O'.


In [8]:
game = TicTacToe()
weights = load_weights('medium')
create_board_gui(play_game_with_agent_gui)

Dropdown(description='Difficulty:', index=1, options=('easy', 'medium', 'hard'), value='medium')

GridBox(children=(Button(layout=Layout(height='60px', width='60px'), style=ButtonStyle()), Button(layout=Layou…

Label(value='Select difficulty to start the game.')