In [3]:
import numpy as np
import pandas as pd

In [4]:
import math
from sklearn.preprocessing import LabelEncoder

In [5]:
ai = 'X'
human = 'O'

In [6]:
board = [["b", "b", "b"], ["b", "b", "b"], ["b", "b", "b"]]

In [7]:
q_table = np.zeros((3**9, 9))

In [8]:
label_encoder = LabelEncoder()
label_encoder.fit(['b', 'O', 'X'])

LabelEncoder()

In [10]:
def print_board():
    for row in board:
        print("|", end='')
        for element in row:
            if element == "b":
                print(" ", '|', end='')
            else:
                print(element, '|', end='')
        print()

In [11]:
def check_game_status():
    for i in range(3):
        # Check rows
        if board[i][0] != 'b' and board[i][0] == board[i][1] == board[i][2]:
            return board[i][0]
        
        # Check columns
        if board[0][i] != 'b' and board[0][i] == board[1][i] == board[2][i]:
            return board[0][i]
    
    # Check diagonals
    if board[0][0] != 'b' and board[0][0] == board[1][1] == board[2][2]:
        return board[0][0]
    if board[0][2] != 'b' and board[0][2] == board[1][1] == board[2][0]:
        return board[0][2]
    
    # Check if the board is full (tie)
    if all(element != 'b' for row in board for element in row):
        return 'Tie'
    
    return None

In [12]:
def update_q_table(state, action, reward, new_state):
    max_future_q = np.max(q_table[new_state])
    current_q = q_table[state][action]
    new_q = (1 - learning_rate) * current_q + learning_rate * (reward + discount_factor * max_future_q)
    q_table[state][action] = new_q

In [13]:
def get_state_index(board):
    state_index = 0
    for i, row in enumerate(board):
        for j, element in enumerate(row):
            state_index += (3**(3*i + j)) * label_encoder.transform([element])[0]
    return state_index

In [14]:
learning_rate = 0.1
discount_factor = 0.9
exploration_rate = 1.0
max_exploration_rate = 1.0
min_exploration_rate = 0.01
exploration_decay_rate = 0.01

In [28]:
num_episodes = 5

In [29]:
for episode in range(num_episodes):
    # Reset the game board
    board = [["b", "b", "b"], ["b", "b", "b"], ["b", "b", "b"]]
    
    # Reset the current player
    current_player = ai
    
    # Reset the game status
    game_status = None
    
    while not game_status:
        if current_player == human:
            print("\nYour move")
            r = int(input("Select row position: "))
            c = int(input("Select column position: "))
            
            if board[r-1][c-1] != 'b':
                print("\nSelect an empty slot")
                continue
            
            board[r-1][c-1] = human
            print_board()
            current_player = ai
        else:
            print("\nAI Move")
            state = get_state_index(board)
            action = None
            # Exploration vs. Exploitation trade-off
            exploration_threshold = np.random.uniform(0, 1)
            if exploration_threshold > exploration_rate:
                action = np.argmax(q_table[state])
            else:
                available_moves = np.where(np.array(board) == 'b')
                action = np.random.choice(available_moves[0])
            row, col = action // 3, action % 3
            if board[row][col] == "b":
                board[row][col] = ai
            else:
                available_moves = np.where(np.array(board) == 'b')
                action = np.random.choice(available_moves[0])
                row, col = action // 3, action % 3
                board[row][col] = ai
            print_board()
            current_player = human
        
        game_status = check_game_status()
    
    if game_status == ai:
        print('AI wins')
        reward = 1
    elif game_status == human:
        print('You wins')
        reward = -1
    else:
        reward = 0
    
    new_state = get_state_index(board)
    update_q_table(state, action, reward, new_state)
    
    # Decay exploration rate
    exploration_rate = min_exploration_rate + (max_exploration_rate - min_exploration_rate) * np.exp(-exploration_decay_rate * episode)


AI Move
|X |  |  |
|  |  |  |
|  |  |  |

Your move
Select row position: 2
Select column position: 2
|X |  |  |
|  |O |  |
|  |  |  |

AI Move
|X |  |X |
|  |O |  |
|  |  |  |

Your move
Select row position: 1
Select column position: 2
|X |O |X |
|  |O |  |
|  |  |  |

AI Move
|X |X |X |
|  |O |  |
|  |  |  |
AI wins

AI Move
|X |  |  |
|  |  |  |
|  |  |  |

Your move
Select row position: 2
Select column position: 2
|X |  |  |
|  |O |  |
|  |  |  |

AI Move
|X |  |X |
|  |O |  |
|  |  |  |

Your move
Select row position: 1
Select column position: 2
|X |O |X |
|  |O |  |
|  |  |  |

AI Move
|X |X |X |
|  |O |  |
|  |  |  |
AI wins

AI Move
|  |X |  |
|  |  |  |
|  |  |  |

Your move
Select row position: 2
Select column position: 2
|  |X |  |
|  |O |  |
|  |  |  |

AI Move
|X |X |  |
|  |O |  |
|  |  |  |

Your move
Select row position: 2
Select column position: 1
|X |X |  |
|O |O |  |
|  |  |  |

AI Move
|X |X |X |
|O |O |  |
|  |  |  |
AI wins

AI Move
|X |  |  |
|  |  |  |
|  |  |  

# USE THE BELOW CODE ONLY AFTER TRAINING THE MODEL NUMBER OF TIMES

In [None]:
#After training the model with number of games we can play with it efficiently

board = [["b", "b", "b"], ["b", "b", "b"], ["b", "b", "b"]]
current_player = human

while True:
    if current_player == human:
        print("\nYour move")
        r = int(input("Select row position: "))
        c = int(input("Select column position: "))
        
        if board[r-1][c-1] != 'b':
            print("\nSelect an empty slot")
            continue
        
        board[r-1][c-1] = human
        print_board()
        current_player = ai
    else:
        print("\nAI Move")
        state = get_state_index(board)
        action = np.argmax(q_table[state])
        
        row, col = action // 3, action % 3
        board[row][col] = ai
        print_board()
        current_player = human
        
    game_status = check_game_status()
    if game_status:
        if game_status == ai:
            print("AI wins")
        elif game_status == human:
            print("You win")
        else:
            print("Tie")
        break