In [9]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import load_model 
from tensorflow.keras.losses import MeanSquaredError

class QAgent:
    def __init__(self):
        self.gamma = 0.95  # Discount factor for future rewards
        self.epsilon = 1.0  # Initial exploration rate
        self.epsilon_min = 0.01  # Minimum exploration rate
        self.epsilon_decay = 0.995  # Decay rate for exploration probability
        self.learning_rate = 0.001
        self.model = self.create_model()

    def create_model(self):
        model = Sequential([
            Dense(64, input_dim=9, activation='relu'),  # Input is the flattened board
            Dense(64, activation='relu'),
            Dense(9, activation='linear')  # Output the Q-values for each action
        ])
        # Use the class directly instead of the alias string
        model.compile(loss=MeanSquaredError(), optimizer=Adam(learning_rate=self.learning_rate))
        return model


    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return np.random.randint(0, 9)  # Explore action space
        act_values = self.model.predict(state)
        return np.argmax(act_values[0])  # Exploit learned values

    def train(self, state, action, reward, next_state, done):
        target = reward
        if not done:
            target = reward + self.gamma * np.amax(self.model.predict(next_state)[0])
        target_f = self.model.predict(state)
        target_f[0][action] = target
        self.model.fit(state, target_f, epochs=1, verbose=0)

        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

class TicTacToeEnv:
    def __init__(self):
        self.board = np.zeros(9, dtype=int)
        self.done = False

    def reset(self):
        self.board = np.zeros(9, dtype=int)
        self.done = False
        return self.board[np.newaxis, :]

    def step(self, action):
        if self.board[action] != 0:
            return self.board[np.newaxis, :], -10, True  # Penalize invalid moves
        self.board[action] = 1  # AI's move
        
        if self.check_winner(1):
            return self.board[np.newaxis, :], 10, True  # AI wins
        elif self.check_immediate_threat():
            return self.board[np.newaxis, :], 3, False  # AI blocks the player from winning
        elif self.check_potential_win_setup(action):
            return self.board[np.newaxis, :], 2, False  # AI sets up for a win
        
        if np.all(self.board != 0):
            return self.board[np.newaxis, :], 0, True  # Draw
        
        self.opponent_move()
        
        if self.check_winner(-1):
            return self.board[np.newaxis, :], -10, True  # AI loses
        elif np.all(self.board != 0):
            return self.board[np.newaxis, :], 0, True  # Draw
        
        return self.board[np.newaxis, :], 1, False  # Game continues

    def check_immediate_threat(self):
        # Check if the opponent was one move away from winning
        return self.is_one_move_away(-1)

    def check_potential_win_setup(self, action):
        # Temporarily make the move
        self.board[action] = 1
        win_next_move = self.is_one_move_away(1)
        self.board[action] = 0  # Reset the move
        return win_next_move

    def is_one_move_away(self, player):
        # Check if 'player' can win in the next move
        for combo in [(0,1,2), (3,4,5), (6,7,8), (0,3,6), (1,4,7), (2,5,8), (0,4,8), (2,4,6)]:
            if sum(self.board[pos] == player for pos in combo) == 2:
                if any(self.board[pos] == 0 for pos in combo):
                    return True
        return False

    def opponent_move(self):
        empty_cells = np.where(self.board == 0)[0]
        if len(empty_cells) > 0:
            self.board[np.random.choice(empty_cells)] = -1

    def check_winner(self, player):
        combos = [(0,1,2), (3,4,5), (6,7,8), (0,3,6), (1,4,7), (2,5,8), (0,4,8), (2,4,6)]
        return any(np.all(self.board[list(line)] == player) for line in combos)


In [11]:
# Train the agent or load an existing model
should_train = False

if should_train:
    def train_agent(episodes=1000):
        env = TicTacToeEnv()
        agent = QAgent()

        for e in range(episodes):
            state = env.reset()
            done = False

            while not done:
                action = agent.act(state)
                next_state, reward, done = env.step(action)
                agent.train(state, action, reward, next_state, done)
                state = next_state

            if (e + 1) % 100 == 0:
                print(f"Episode {e+1}/{episodes} - Epsilon: {agent.epsilon:.2f}")

        # After training, save the model
        agent.model.save('tic_tac_toe_model.h5')
        print("Model saved successfully.")

    # Initiate training
    train_agent(1000)
else:
    try:
        agent = QAgent()  # Ensure agent is defined
        # Load the model without needing to specify custom objects for standard cases
        agent.model = load_model('tic_tac_toe_model.h5')
        print("Model loaded successfully.")
    except Exception as e:
        print("Failed to load model. Error:", str(e))




Model loaded successfully.


In [12]:
import ipywidgets as widgets
from IPython.display import display, clear_output
import pandas as pd

class TicTacToeGameGUI:
    def __init__(self, agent):
        self.agent = agent
        self.game_records = [] 
        self.board = np.zeros(9, dtype=int)
        self.buttons = [widgets.Button(description='', button_style='', layout=widgets.Layout(height='60px', width='60px', border='1px solid black')) for _ in range(9)]
        for button in self.buttons:
            button.on_click(self.on_button_click)
        self.output = widgets.Output()
        self.reset_button = widgets.Button(description="Reset", button_style='info')
        self.reset_button.on_click(self.reset_board)
        self.grid = widgets.GridBox(self.buttons, layout=widgets.Layout(grid_template_columns="repeat(3, 100px)"))
        self.close_game_button = widgets.Button(description="Close Game", button_style='danger')
        self.close_game_button.on_click(self.close_game)
        # Modify the display line to include the new button
        display(self.grid, self.reset_button, self.close_game_button, self.output)
        
        self.game_over = False  # Initialize the game_over attribute here

    def on_button_click(self, b):
        if self.game_over:  # Check if the game is already over
            return  # Ignore clicks if the game is over

        index = self.buttons.index(b)
        if self.board[index] == 0:  # If the cell is empty
            self.board[index] = 1  # Player move
            self.buttons[index].description = 'X'
            self.buttons[index].button_style = 'success'
            self.check_game_status()
            if not self.game_over:
                self.agent_move()

    def agent_move(self):
        if self.game_over:
            return

        with self.output:
            clear_output(wait=True)
            state = self.board[np.newaxis, :]
            action = self.agent.act(state)

            # Keep choosing moves until a valid one is made
            while self.board[action] != 0:
                print(f"Invalid move attempted at {action}. Retrying.")
                action = self.agent.act(state)

            # Make the move
            self.board[action] = -1
            self.buttons[action].description = 'O'
            self.buttons[action].button_style = 'danger'
            self.check_game_status()

    def record_game(self, game_id, winner, moves):
        self.game_records.append({
            'game_id': game_id,
            'winner': winner,
            'moves': moves
        })

    def save_game_records(self, filename='game_data.csv'):
        df = pd.DataFrame(self.game_records)
        df.to_csv(filename, index=False)
        print(f"Game records saved to {filename}")


    # Update your game status check to record each game
    def check_game_status(self):
        winner = self.check_winner()
        if winner or np.all(self.board != 0):
            self.game_over = True
            winner_label = 'draw' if not winner else 'player' if winner == 1 else 'agent'
            result = "It's a draw!" if not winner else ("You win!" if winner == 1 else "You lose!")
            with self.output:
                clear_output(wait=True)
                print(result)
            # Record this game's outcome
            self.record_game(game_id=np.random.randint(1000, 9999), winner=winner_label, moves=np.count_nonzero(self.board))
        else:
            self.game_over = False


    def check_winner(self):
        combos = [(0,1,2), (3,4,5), (6,7,8), (0,3,6), (1,4,7), (2,5,8), (0,4,8), (2,4,6)]
        for player in [1, -1]:
            for combo in combos:
                if np.all(self.board[list(combo)] == player):
                    return player
        return None

    def reset_board(self, b=None):
        self.board = np.zeros(9, dtype=int)  # Reset the internal game board
        for button in self.buttons:
            button.description = ''  # Clear text - this should clear the "X" and "O" from the buttons
            button.button_style = ''  # Clear style
            button.disabled = False   # Re-enable the button if it was disabled
        self.game_over = False  # Ensure the game state is reset
        with self.output:
            clear_output(wait=True)  # Clear any messages displayed
            
    def close_game(self, b):
        # Save game records to a CSV file
        self.save_game_records()
        # Disable all buttons to stop the game
        for button in self.buttons:
            button.disabled = True
        self.reset_button.disabled = True
        self.close_game_button.disabled = True
        with self.output:
            clear_output(wait=True)
            print("Game has been closed and data saved.")


# Create and display the game GUI
agent = QAgent()  # Assuming the agent is defined and the model is loaded
game_gui = TicTacToeGameGUI(agent)


GridBox(children=(Button(layout=Layout(border_bottom='1px solid black', border_left='1px solid black', border_…

Button(button_style='info', description='Reset', style=ButtonStyle())

Button(button_style='danger', description='Close Game', style=ButtonStyle())

Output()

Game records saved to game_data.csv
