In [9]:
import random
import numpy as np
import tensorflow as tf
import pickle

In [38]:
class TicTac_withComputer():
    def __init__(self, learning_rate=0.2, discount_factor=0.9, exploration_rate=0.2, no_of_epochs=4):
        self.board = [str(i) for i in range(1, 10)]
        self.current_player = 'X'
        self.step = 0
        self.game_over = False
        self.q_table = self.load_q_table()  # Load the Q-table from file
        self.model = self.build_model()
        self.learning_rate = learning_rate
        self.no_of_epochs = no_of_epochs
        self.optimizer = tf.keras.optimizers.Adam(learning_rate)
        self.discount_factor = discount_factor
        self.exploration_rate = exploration_rate
        
    # Function to display the board
    def display_board(self):
        print('-------------')
        print(f'| {self.board[0]} | {self.board[1]} | {self.board[2]} |')
        print('-------------')
        print(f'| {self.board[3]} | {self.board[4]} | {self.board[5]} |')
        print('-------------')
        print(f'| {self.board[6]} | {self.board[7]} | {self.board[8]} |')
        print('-------------')
        
        
    # Function to check if any player has won
    def check_win(self, player):
        # Check rows
        for i in range(0, 9, 3):
            if self.board[i] == self.board[i+1] == self.board[i+2] == player:
                return True
        # Check columns
        for i in range(3):
            if self.board[i] == self.board[i+3] == self.board[i+6] == player:
                return True
        # Check diagonals
        if self.board[0] == self.board[4] == self.board[8] == player:
            return True
        if self.board[2] == self.board[4] == self.board[6] == player:
            return True
        return False

    def build_model(self):
        model = tf.keras.Sequential([
            tf.keras.layers.Dense(128, activation='relu', input_shape=(9,)),
            tf.keras.layers.Dense(128, activation='relu'),
            tf.keras.layers.Dense(9, activation='linear')
        ])
        model.compile(optimizer='adam', loss='mse')
        return model

    def get_state(self):
        state = []
        for i in range(1, 10):
            if self.board[i-1] == 'X':
                state.append(1)
            elif self.board[i-1] == 'O':
                state.append(-1)
            else:
                state.append(0)
        return np.array(state)

    def user_turn(self):
        try:
            position = int(input('Choose a position (1-9): ')) - 1
            if self.board[position] != 'O':
                self.board[position] = 'X'
                self.step += 1
            else:
                print('That position is already filled. Try again.')
                self.user_turn()
        except:
            print("Invalid move!! Try again")
            self.user_turn()


    def computer_turn(self):
        state = self.get_state()
        q_values = self.model.predict(np.array([state]))[0]
        valid_moves = [i for i, val in enumerate(self.board) if val != 'X' and val != 'O']

        if random.random() < self.exploration_rate:
            position = random.choice(valid_moves)
        else:
            valid_q_values = [q_values[i] for i in valid_moves]
            position = valid_moves[valid_q_values.index(max(valid_q_values))]

        self.board[position] = 'O'
        self.step += 1

    def update_q_values(self, reward):
        state = self.get_state()
        next_state = self.get_state()
        q_values = self.model.predict(np.array([state]))[0]
        next_q_values = self.model.predict(np.array([next_state]))[0]

        #if self.step == 9:
            #q_values *= 0.0
        #else:
        valid_moves = [i for i, val in enumerate(self.board) if val != 'X' and val != 'O']
        valid_q_values = [q_values[i] for i in valid_moves]
        action = valid_moves[valid_q_values.index(max(valid_q_values))]
        max_next_q = max(next_q_values)
        q_values[action] += self.learning_rate * (reward + self.discount_factor * max_next_q - q_values[action])

        self.model.fit(np.array([state]), np.array([q_values]), epochs=self.no_of_epochs, verbose=0)
        
    def save_q_table(self):
        with open("q_table.pkl", "wb") as f:
            pickle.dump(self.q_table, f)

    def load_q_table(self):
        try:
            with open("q_table.pkl", "rb") as f:
                return pickle.load(f)
        except FileNotFoundError:
            return {}
        
        # Main game loop
    def play_game(self):
        while not self.game_over:
            self.display_board()
            self.user_turn()

            if self.check_win('X'):
                self.display_board()
                print('Congratulations! You win!')
                self.update_q_values(1)  # Update Q-values for the user win
                self.game_over = True
            elif self.step == 9:
                self.display_board()
                print("It's a tie!")
                self.update_q_values(0.5)  # Update Q-values for a tie
                self.game_over = True
            else:
                self.computer_turn()
                if self.check_win('O'):
                    self.display_board()
                    print('The computer wins!')
                    self.update_q_values(0)  # Update Q-values for the computer win
                    self.game_over = True
                elif self.step == 9:
                    self.display_board()
                    print("It's a tie!")
                    self.update_q_values(0.5)  # Update Q-values for a tie
                    self.game_over = True
                else:
                    state = tuple(self.board)
                    if state not in self.q_table:
                        self.q_table[state] = [0] * 9
                        
    def reset(self):
        self.board = [str(i) for i in range(1, 10)]
        self.current_player = 'X'
        self.step = 0
        self.game_over = False

In [39]:
if __name__ == "__main__":
    play_again = True
    print('-----Tic-Tac-Toe (with Computer) by Rajat Sharma-----')
    no_of_epochs = int(input("Enter number of epochs = "))
    while play_again:
        play = TicTac_withComputer(no_of_epochs)
        q = play.q_table
        play.play_game()
        play.save_q_table()  # Save the Q-table after each game
        input_ = input("\nDo you want to play again(y/n)? ")
        if input_ == 'n' or input_ == 'N':
            play_again = False
        else:
            print(q == play.q_table)
            play.reset()  # Reset the game state

-----Tic-Tac-Toe (with Computer) by Rajat Sharma-----
Enter number of epochs = 8
-------------
| 1 | 2 | 3 |
-------------
| 4 | 5 | 6 |
-------------
| 7 | 8 | 9 |
-------------
Choose a position (1-9): 5
-------------
| 1 | O | 3 |
-------------
| 4 | X | 6 |
-------------
| 7 | 8 | 9 |
-------------
Choose a position (1-9): 1
-------------
| X | O | O |
-------------
| 4 | X | 6 |
-------------
| 7 | 8 | 9 |
-------------
Choose a position (1-9): 9
-------------
| X | O | O |
-------------
| 4 | X | 6 |
-------------
| 7 | 8 | X |
-------------
Congratulations! You win!

Do you want to play again(y/n)? n
