<a href="https://colab.research.google.com/github/samp3209/personalprojects/blob/main/tictactoe.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

The main purpose of this project is to use reinforcement learning to create a model to solve tic tac toe in as few as games as possible


In [12]:
class TicTacToe:
    def __init__(self):
        # Initialize the board with None (empty cells)
        self.board = [None] * 9
        # Current player (0 for Player 1, 1 for Player 2)
        self.current_player = 0

    def print_board(self):
        # Print the current state of the board
        for i in range(3):
            row = self.board[i * 3 : (i + 1) * 3]
            print("|".join([" " if cell is None else str(cell) for cell in row]))
            if i < 2:
                print("-----")

    def make_move(self, position):
        # Make a move on the board
        if self.board[position] is None:
            self.board[position] = self.current_player
            self.current_player = 1 - self.current_player  # Switch player
            return True  # Move successful
        else:
            print("Invalid move. Cell already occupied.")
            return False  # Move unsuccessful

    def check_winner(self):
        # Check for a win condition after the 5th move
        if len([cell for cell in self.board if cell is not None]) >= 5:
            # Check rows, columns, and diagonals
            for i in range(3):
                # Check rows
                if (
                    self.board[i * 3] == self.board[i * 3 + 1] == self.board[i * 3 + 2] is not None
                ):
                    return True
                # Check columns
                if (
                    self.board[i] == self.board[i + 3] == self.board[i + 6] is not None
                ):
                    return True
            # Check diagonals
            if (
                self.board[0] == self.board[4] == self.board[8] is not None
                or self.board[2] == self.board[4] == self.board[6] is not None
            ):
                return True
        return False

# Example usage:
game = TicTacToe()
game.print_board()

# Make some moves
game.make_move(4)
game.make_move(0)
game.make_move(1)
game.make_move(8)
game.make_move(2)
game.make_move(3)
game.make_move(7)
# Check for a winner
if game.check_winner():
    print(f"Player {1 - game.current_player + 1} wins!")
else:
    print("No winner yet.")

game.print_board()

 | | 
-----
 | | 
-----
 | | 
Player 1 wins!
1|0|0
-----
1|0| 
-----
 |0|1


In [18]:
import pandas as pd

class TicTacToe:
    def __init__(self):
        # Initialize the board with None (empty cells)
        self.board = [None] * 9
        # Current player (0 for Player 1, 1 for Player 2)
        self.current_player = 0
        # List to store moves and outcomes for each game
        self.games_data = []

    def print_board(self):
        # Print the current state of the board
        for i in range(3):
            row = self.board[i * 3 : (i + 1) * 3]
            print("|".join([" " if cell is None else str(cell) for cell in row]))
            if i < 2:
                print("-----")

    def make_move(self, position):
        # Make a move on the board
        if self.board[position] is None:
            self.board[position] = self.current_player
            self.current_player = 1 - self.current_player  # Switch player
            return True  # Move successful
        else:
            print("Invalid move. Cell already occupied.")
            return False  # Move unsuccessful

    def check_winner(self):
        # Check for a win condition after the 5th move
        if len([cell for cell in self.board if cell is not None]) >= 5:
            # Check rows, columns, and diagonals
            for i in range(3):
                # Check rows
                if (
                    self.board[i * 3] == self.board[i * 3 + 1] == self.board[i * 3 + 2] is not None
                ):
                    return True
                # Check columns
                if (
                    self.board[i] == self.board[i + 3] == self.board[i + 6] is not None
                ):
                    return True
            # Check diagonals
            if (
                self.board[0] == self.board[4] == self.board[8] is not None
                or self.board[2] == self.board[4] == self.board[6] is not None
            ):
                return True
        return False

    def record_game_data(self, outcome):
        # Record move data and outcome for the current game
        moves = self.get_moves()
        game_data = {'Moves': moves, 'Outcome': outcome}
        self.games_data.append(game_data)

    def get_moves(self):
        return [i for i, cell in enumerate(self.board) if cell is not None]

# Example usage:
game = TicTacToe()
game.print_board()

# Make some moves
game.make_move(4)
game.make_move(0)
game.make_move(1)
game.make_move(8)
game.make_move(2)

# Check for a winner
if game.check_winner():
    print(f"Player {1 - game.current_player + 1} wins!")
    outcome = 'Win'
else:
    print("It's a draw.")
    outcome = 'Draw'

game.print_board()

# Record game data

# Example of playing another game
game2 = TicTacToe()
game2.make_move(3)
game2.make_move(0)
game2.make_move(4)
game2.make_move(1)
game2.make_move(5)
game2.make_move(2)
game2.make_move(6)

# Check for a winner in the second game
if game2.check_winner():
    print(f"Player {1 - game2.current_player + 1} wins!")
    outcome2 = 'Win'
else:
    print("It's a draw.")
    outcome2 = 'Draw'

# Record game data for the second game
game2.record_game_data(outcome2)

# Print the data for all games
df = pd.DataFrame(game2.games_data)
print(df)

 | | 
-----
 | | 
-----
 | | 
It's a draw.
1|0|0
-----
 |0| 
-----
 | |1
Player 1 wins!
                   Moves Outcome
0  [0, 1, 2, 3, 4, 5, 6]     Win


In [19]:
import pandas as pd

class TicTacToe:
    def __init__(self):
        # Initialize the board with None (empty cells)
        self.board = [None] * 9
        # Current player (0 for Player 1, 1 for Player 2)
        self.current_player = 0
        # DataFrame to store moves and outcomes for each game
        self.games_data = pd.DataFrame(columns=['Moves', 'Outcome'])

    def print_board(self):
        # Print the current state of the board
        for i in range(3):
            row = self.board[i * 3 : (i + 1) * 3]
            print("|".join([" " if cell is None else str(cell) for cell in row]))
            if i < 2:
                print("-----")

    def make_move(self, position):
        # Make a move on the board
        if self.board[position] is None:
            self.board[position] = self.current_player
            self.current_player = 1 - self.current_player  # Switch player
            return True  # Move successful
        else:
            print("Invalid move. Cell already occupied.")
            return False  # Move unsuccessful

    def check_winner(self):
        # Check for a win condition after the 5th move
        if len([cell for cell in self.board if cell is not None]) >= 5:
            # Check rows, columns, and diagonals
            for i in range(3):
                # Check rows
                if (
                    self.board[i * 3] == self.board[i * 3 + 1] == self.board[i * 3 + 2] is not None
                ):
                    return True
                # Check columns
                if (
                    self.board[i] == self.board[i + 3] == self.board[i + 6] is not None
                ):
                    return True
            # Check diagonals
            if (
                self.board[0] == self.board[4] == self.board[8] is not None
                or self.board[2] == self.board[4] == self.board[6] is not None
            ):
                return True
        return False

    def record_game_data(self, outcome):
        # Record move data and outcome for the current game
        moves = self.get_moves()
        self.games_data = self.games_data.append({'Moves': moves, 'Outcome': outcome}, ignore_index=True)

    def get_moves(self):
        return [i for i, cell in enumerate(self.board) if cell is not None]

# Example usage:
game = TicTacToe()

# Make some moves
game.make_move(4)
game.make_move(0)
game.make_move(1)
game.make_move(8)
game.make_move(2)

# Check for a winner
if game.check_winner():
    print(f"Player {1 - game.current_player + 1} wins!")
    outcome = 'Win'
else:
    print("It's a draw.")
    outcome = 'Draw'

game.print_board()

# Record game data
game.record_game_data(outcome)

# Print the data for all games
print(game.games_data)

It's a draw.
1|0|0
-----
 |0| 
-----
 | |1
             Moves Outcome
0  [0, 1, 2, 4, 8]    Draw


  self.games_data = self.games_data.append({'Moves': moves, 'Outcome': outcome}, ignore_index=True)


In [21]:
import numpy as np
import tensorflow as tf
import random

class QNetwork(tf.keras.Model):
    def __init__(self, num_actions):
        super(QNetwork, self).__init__()
        self.dense1 = tf.keras.layers.Dense(32, activation='relu')
        self.dense2 = tf.keras.layers.Dense(num_actions, activation='linear')

    def call(self, state):
        x = self.dense1(state)
        return self.dense2(x)

class TicTacToeAgent:
    def __init__(self, num_actions, epsilon=0.1, gamma=0.9, learning_rate=0.001):
        self.num_actions = num_actions
        self.epsilon = epsilon
        self.gamma = gamma
        self.q_network = QNetwork(num_actions)
        self.optimizer = tf.keras.optimizers.Adam(learning_rate)

    def choose_action(self, state):
        if random.uniform(0, 1) < self.epsilon:
            return random.randint(0, self.num_actions - 1)
        else:
            q_values = self.q_network.predict(state)
            return np.argmax(q_values)

    def train(self, state, action, reward, next_state, done):
        with tf.GradientTape() as tape:
            target = reward + self.gamma * np.max(self.q_network.predict(next_state)) * (1 - done)
            q_values = self.q_network(state)
            action_one_hot = tf.one_hot(action, self.num_actions)
            selected_q_value = tf.reduce_sum(tf.multiply(q_values, action_one_hot))
            loss = tf.losses.mean_squared_error(target, selected_q_value)

        grads = tape.gradient(loss, self.q_network.trainable_variables)
        self.optimizer.apply_gradients(zip(grads, self.q_network.trainable_variables))

# Example of using the agent to play Tic Tac Toe
env = TicTacToe()
agent = TicTacToeAgent(num_actions=len(env.action_space))
num_episodes = 1000

for episode in range(num_episodes):
    state = env.reset()
    state = np.reshape(state, [1, len(state)])
    total_reward = 0

    while True:
        action = agent.choose_action(state)
        next_state, reward, done = env.step(action)
        next_state = np.reshape(next_state, [1, len(next_state)])
        agent.train(state, action, reward, next_state, done)

        total_reward += reward
        state = next_state

        if done:
            print(f"Episode: {episode + 1}, Total Reward: {total_reward}")
            break

AttributeError: 'TicTacToe' object has no attribute 'action_space'