In [1]:
import numpy as np
import random
import pickle
import os

In [2]:
import numpy as np
import random
import pickle
import os

class TicTacToe:
    def __init__(self):
        self.board = np.zeros((3, 3), dtype=int)
        # 1 for X (player), -1 for O (agent)
        self.player_symbol = 1
        self.agent_symbol = -1
        self.reset()
        
    def reset(self):
        self.board = np.zeros((3, 3), dtype=int)
        self.current_state = self.get_state()
        self.game_over = False
        return self.current_state
    
    def get_state(self):
        return str(self.board.reshape(9))
    
    def get_valid_moves(self):
        valid_moves = []
        for i in range(3):
            for j in range(3):
                if self.board[i, j] == 0:
                    valid_moves.append((i, j))
        return valid_moves
    
    def make_move(self, position, symbol):
        if self.board[position] != 0 or self.game_over:
            return False
        
        self.board[position] = symbol
        self.current_state = self.get_state()
        
        if self.check_win(symbol):
            self.game_over = True
        elif len(self.get_valid_moves()) == 0:
            self.game_over = True
            
        return True
    
    def check_win(self, symbol):
        for i in range(3):
            if np.all(self.board[i, :] == symbol):
                return True
        
        for i in range(3):
            if np.all(self.board[:, i] == symbol):
                return True
        
        if np.all(np.diag(self.board) == symbol):
            return True
        if np.all(np.diag(np.fliplr(self.board)) == symbol):
            return True
        
        return False
    
    def get_reward(self, symbol):
        if self.check_win(symbol):
            return 1
        elif self.check_win(-symbol):
            return -1
        elif len(self.get_valid_moves()) == 0:
            return 0.5  # Draw
        else:
            return 0  # Game not over yet
    
    def print_board(self):
        symbols = {0: ' ', 1: 'X', -1: 'O'}
        for i in range(3):
            row = []
            for j in range(3):
                row.append(symbols[self.board[i, j]])
            print(' | '.join(row))
            if i < 2:
                print('---------')
        print()

In [3]:
class QLearningAgent:
    def __init__(self, epsilon=0.1, alpha=0.5, gamma=0.9):
        # epsilon: exploration rate
        # alpha: learning rate
        # gamma: discount factor
        self.epsilon = epsilon
        self.alpha = alpha
        self.gamma = gamma
        self.q_table = {}
    
    def get_q_value(self, state, action):
        if state not in self.q_table:
            self.q_table[state] = {}
        if action not in self.q_table[state]:
            self.q_table[state][action] = 0.0
        return self.q_table[state][action]
    
    def choose_action(self, state, valid_moves, is_training=True):
        if not valid_moves:
            return None
        
        # Exploration vs. exploitation
        if is_training and random.random() < self.epsilon:
            return random.choice(valid_moves)
        else:
            q_values = [self.get_q_value(state, action) for action in valid_moves]
            max_q = max(q_values)
            
            best_actions = [valid_moves[i] for i, q in enumerate(q_values) if q == max_q]
            return random.choice(best_actions)
    
    def update_q_value(self, state, action, reward, next_state, next_valid_moves):
        if not next_valid_moves:
            max_next_q = 0
        else:
            max_next_q = max([self.get_q_value(next_state, next_action) for next_action in next_valid_moves])
        
        current_q = self.get_q_value(state, action)
        new_q = current_q + self.alpha * (reward + self.gamma * max_next_q - current_q)
        
        if state not in self.q_table:
            self.q_table[state] = {}
        self.q_table[state][action] = new_q
    
    def save_model(self, filename='tictactoe_q_model.pkl'):
        #Save the Q-table to a file
        with open(filename, 'wb') as f:
            pickle.dump(self.q_table, f)
        print(f"Model saved to {filename}")
    
    def load_model(self, filename='tictactoe_q_model.pkl'):
        #Load the Q-table from a file
        if os.path.exists(filename):
            with open(filename, 'rb') as f:
                self.q_table = pickle.load(f)
            print(f"Model loaded from {filename}")
            return True
        return False

In [4]:
def train_agent(episodes=10000):
    env = TicTacToe()
    agent = QLearningAgent()
    
    print(f"Training for {episodes} episodes...")
    
    win_count = 0
    loss_count = 0
    draw_count = 0
    
    for episode in range(episodes):
        state = env.reset()
        done = False
        
        player_turn = random.choice([True, False])
        
        while not done:
            if player_turn:
                valid_moves = env.get_valid_moves()
                if valid_moves:
                    random_move = random.choice(valid_moves)
                    env.make_move(random_move, env.player_symbol)
                player_turn = False
            else:
                valid_moves = env.get_valid_moves()
                if not valid_moves:
                    done = True
                    continue
                
                action = agent.choose_action(state, valid_moves)
                
                old_state = state
                
                env.make_move(action, env.agent_symbol)
                
                new_state = env.get_state()
                
                reward = env.get_reward(env.agent_symbol)
                
                next_valid_moves = env.get_valid_moves()
                
                agent.update_q_value(old_state, action, reward, new_state, next_valid_moves)
                
                state = new_state
                player_turn = True
                
                if env.game_over:
                    done = True
        
        if env.check_win(env.agent_symbol):
            win_count += 1
        elif env.check_win(env.player_symbol):
            loss_count += 1
        else:
            draw_count += 1
        
        if (episode + 1) % 1000 == 0:
            print(f"Episode {episode + 1}/{episodes}")
            print(f"Wins: {win_count}, Losses: {loss_count}, Draws: {draw_count}")
            win_rate = win_count / 1000
            loss_rate = loss_count / 1000
            draw_rate = draw_count / 1000
            print(f"Win rate: {win_rate:.2f}, Loss rate: {loss_rate:.2f}, Draw rate: {draw_rate:.2f}")
            
            # Reset counts
            win_count = 0
            loss_count = 0
            draw_count = 0
    
    agent.save_model()
    return agent

In [5]:
agent = train_agent(episodes=50000)

Training for 50000 episodes...
Episode 1000/50000
Wins: 561, Losses: 364, Draws: 75
Win rate: 0.56, Loss rate: 0.36, Draw rate: 0.07
Episode 2000/50000
Wins: 654, Losses: 263, Draws: 83
Win rate: 0.65, Loss rate: 0.26, Draw rate: 0.08
Episode 3000/50000
Wins: 666, Losses: 246, Draws: 88
Win rate: 0.67, Loss rate: 0.25, Draw rate: 0.09
Episode 4000/50000
Wins: 684, Losses: 252, Draws: 64
Win rate: 0.68, Loss rate: 0.25, Draw rate: 0.06
Episode 5000/50000
Wins: 708, Losses: 222, Draws: 70
Win rate: 0.71, Loss rate: 0.22, Draw rate: 0.07
Episode 6000/50000
Wins: 735, Losses: 213, Draws: 52
Win rate: 0.73, Loss rate: 0.21, Draw rate: 0.05
Episode 7000/50000
Wins: 719, Losses: 209, Draws: 72
Win rate: 0.72, Loss rate: 0.21, Draw rate: 0.07
Episode 8000/50000
Wins: 701, Losses: 241, Draws: 58
Win rate: 0.70, Loss rate: 0.24, Draw rate: 0.06
Episode 9000/50000
Wins: 720, Losses: 221, Draws: 59
Win rate: 0.72, Loss rate: 0.22, Draw rate: 0.06
Episode 10000/50000
Wins: 751, Losses: 179, Draws: 

In [6]:
def play_game(agent, use_trained_model=True):
    env = TicTacToe()
    if use_trained_model:
        agent.load_model()
    
    print("New TicTacToe Game!")
    print("You are 'X', the agent is 'O'")
    print("Enter your move as row,col (e.g., '0,0' for top-left)")
    
    player_turn = True  
    
    done = False
    state = env.get_state()
    
    while not done:
        if player_turn:
            env.print_board()
            valid_moves = env.get_valid_moves()
            if not valid_moves:
                print("It's a draw!")
                done = True
                continue
            
            valid_input = False
            while not valid_input:
                try:
                    move_input = input("Your move (row,col): ")
                    row, col = map(int, move_input.split(','))
                    move = (row, col)
                    if move in valid_moves:
                        valid_input = True
                    else:
                        print("Invalid move! Try again.")
                except ValueError:
                    print("Invalid input! Please enter as 'row,col' (e.g., '0,0').")
            
            env.make_move(move, env.player_symbol)
            if env.check_win(env.player_symbol):
                env.print_board()
                print("You win!")
                done = True
            player_turn = False
        else:
            valid_moves = env.get_valid_moves()
            if not valid_moves:
                env.print_board()
                print("It's a draw!")
                done = True
                continue
            
            action = agent.choose_action(state, valid_moves, is_training=False)
            env.make_move(action, env.agent_symbol)
            print(f"Agent chose: {action}")
            
            if env.check_win(env.agent_symbol):
                env.print_board()
                print("Agent wins!")
                done = True
            
            state = env.get_state()
            player_turn = True

In [8]:
while True:
    play_game(agent)
    play_again = input("Play again? (y/n): ")
    if play_again.lower() != 'y':
        break

Model loaded from tictactoe_q_model.pkl
New TicTacToe Game!
You are 'X', the agent is 'O'
Enter your move as row,col (e.g., '0,0' for top-left)
  |   |  
---------
  |   |  
---------
  |   |  



Your move (row,col):  1,2


Agent chose: (0, 0)
O |   |  
---------
  |   | X
---------
  |   |  



Your move (row,col):  1,1


Agent chose: (2, 0)
O |   |  
---------
  | X | X
---------
O |   |  



Your move (row,col):  1,0


O |   |  
---------
X | X | X
---------
O |   |  

You win!


Play again? (y/n):  y


Model loaded from tictactoe_q_model.pkl
New TicTacToe Game!
You are 'X', the agent is 'O'
Enter your move as row,col (e.g., '0,0' for top-left)
  |   |  
---------
  |   |  
---------
  |   |  



Your move (row,col):  1,1


Agent chose: (0, 0)
O |   |  
---------
  | X |  
---------
  |   |  



Your move (row,col):  2,2


Agent chose: (1, 0)
O |   |  
---------
O | X |  
---------
  |   | X



Your move (row,col):  0,2


Agent chose: (2, 0)
O |   | X
---------
O | X |  
---------
O |   | X

Agent wins!


Play again? (y/n):  n
