Copyright **`(c)`** 2023 Giovanni Squillero `<giovanni.squillero@polito.it>`  
[`https://github.com/squillero/computational-intelligence`](https://github.com/squillero/computational-intelligence)  
Free for personal or classroom use; see [`LICENSE.md`](https://github.com/squillero/computational-intelligence/blob/master/LICENSE.md) for details.

# LAB 4

Use reinforcement learning to devise a tic-tac-toe player.

Note: X player always moves first.

In [3684]:
# Imports
from itertools import combinations
from collections import namedtuple, defaultdict
from random import choice
from copy import deepcopy
from tqdm.auto import tqdm

In [3685]:
# State definition
State = namedtuple("State", ["x", "o"])

In [3686]:
# Creation of 2 dictionaries for storing X AI and O AI players training
# Accessible by value_dictionary["x"] and value_dictionary["o"]
value_dictionary = {"x": defaultdict(float), "o": defaultdict(float)}

In [3687]:
# Counters
counters = {"Wins": 0, "Loses": 0, "Draws": 0}

# Reset Counters
def reset_counters():
    global counters
    counters = {"Wins": 0, "Loses": 0, "Draws": 0}

# Update Counters
def update_counter(type):
    global counters
    counters[type] += 1

# Print Counters
def print_counters():
    global counters
    total = sum(counters.values())
    print(f'Wins: {counters["Wins"]} ({counters["Wins"]/total*100:.2f}%), Loses: {counters["Loses"]} ({counters["Loses"]/total*100:.2f}%), Draws: {counters["Draws"]} ({counters["Draws"]/total*100:.2f}%)')

# Print Counters for game between AI
def print_counters_between():
    global counters
    total = sum(counters.values())
    print(f'AI usign X Wins: {counters["Loses"]} ({counters["Loses"]/total*100:.2f}%), AI usign O Wins: {counters["Wins"]} ({counters["Wins"]/total*100:.2f}%), Draws: {counters["Draws"]} ({counters["Draws"]/total*100:.2f}%)')

In [3688]:
# Board definition
MAGIC = [2, 7, 6,
         9, 5, 1,
         4, 3, 8]

In [3689]:
# Print the board
def print_board(pos):
    for row in range(3):
        for column in range(3):
            i = row * 3 + column
            if MAGIC[i] in pos.x:
                print("X ", end="")
            elif MAGIC[i] in pos.o:
                print("O ", end="")
            else:
                print(". ", end="")
        print()
    print()

In [3690]:
# Check if elements is winning
def win(elements):
    return any(sum(c) == 15 for c in combinations(elements, 3))

In [3691]:
# Reward
# If player is x -> win +1/lose -1/draw 0
# If player is o -> win +3/lose -6/draw +2
def state_value(pos: State, player):
    if win(pos.x):
        return 1 if player == "x" else -6
    elif win(pos.o):
        return -1 if player == "x" else 3
    else:
        return 0 if player == "x" else 2

In [3692]:
# This function modularly implements the behavior of player (that could be "x" or "o")
def make_move(state, available, player):
    max_val = float("-inf")
    move = None
    move_found = False

    for move in available:
        # getattr used to retrieve the attribute of an object dynamically.
        # if player is x then state.x.union({move})
        # if player is o then state.o.union({move})
        next_state = getattr(state, player).union({move})
        hashable_next_state = (
            frozenset(next_state) if player == "x" else frozenset(state.x),
            frozenset(state.o) if player == "x" else frozenset(next_state),
        )

        if hashable_next_state in value_dictionary[player]:
            if value_dictionary[player][hashable_next_state] > max_val:
                max_val = value_dictionary[player][hashable_next_state]
                bestmove = move
                move_found = True

    # If this scenario has never been discovered then chose it randomly from the list of all the available moves
    # (with so many games for training phase it is almost impossible)
    if not move_found:
        bestmove = choice(list(available))

    return bestmove

In [3693]:
# Game for training purposes
# All moves are random
def training_game():
    trajectory = list()
    state = State(set(), set())
    available = set(range(1, 9 + 1))
    while available:

        # X turn
        move = choice(list(available))
        state.x.add(move)
        trajectory.append(deepcopy(state))
        available.remove(move)

        # Check for win or draw after X moves
        if win(state.x) or not available:
            break

        # O turn
        move = choice(list(available))
        state.o.add(move)
        trajectory.append(deepcopy(state))
        available.remove(move)

        # Check for win after O moves
        if win(state.o):
            break

    return trajectory

In [3694]:
# Training
def train(player):
    epsilon = 0.003

    for steps in tqdm(range(1_000_000)):
        trajectory = training_game()
        final_reward = state_value(trajectory[-1], player)
        for state in trajectory:
            hashable_state = (frozenset(state.x), frozenset(state.o))
            value_dictionary[player][hashable_state] = value_dictionary[player][
                hashable_state
            ] + epsilon * (final_reward - value_dictionary[player][hashable_state])

In [3695]:
# Define a game with AI usign "player" (x or o) vs random moves
def game(player):
    trajectory = list()
    state = State(set(), set())
    available = set(range(1, 9 + 1))
    while available:
        # X turn
        move = make_move(state, available, player) if player == "x" else choice(list(available))
        state.x.add(move)
        trajectory.append(deepcopy(state))
        available.remove(move)

        # Check for win or draw after X moves
        # If using O then it's a loss!
        if win(state.x):
            update_counter("Wins") if player == "x" else update_counter("Loses")
            break
        else:
            if not available:
                update_counter("Draws")
                break

        # O turn
        move = choice(list(available)) if player == "x" else make_move(state, available, player)
        state.o.add(move)
        trajectory.append(deepcopy(state))
        available.remove(move)

        # Check for win after O moves
        # If using X then it's a loss!
        if win(state.o):
            update_counter("Loses") if player == "x" else update_counter("Wins")
            break

    return trajectory

In [3696]:
# Define a game between AI
# Only "best moves" are used
def game_between_AI():
    trajectory = list()
    state = State(set(), set())
    available = set(range(1, 9 + 1))
    while available:
        # X turn
        move = make_move(state, available, "x")
        state.x.add(move)
        trajectory.append(deepcopy(state))
        available.remove(move)

        if win(state.x):
            update_counter("Loses")
            break
        else:
            if not available:
                update_counter("Draws")
                break

        # O turn
        move = make_move(state, available, "o")
        state.o.add(move)
        trajectory.append(deepcopy(state))
        available.remove(move)

        if win(state.o):
            update_counter("Wins")
            break

    return trajectory

### AI using X against random moves

In [3697]:
# AI using X against random moves
player = "x"
train(player)
for steps in tqdm(range(10_000)):
    trajectory = game(player)
print_counters()

  0%|          | 0/1000000 [00:00<?, ?it/s]

  0%|          | 0/10000 [00:00<?, ?it/s]

Wins: 9908 (99.08%), Loses: 0 (0.00%), Draws: 92 (0.92%)


In [3698]:
reset_counters()

### AI using O against random moves

In [3699]:
# AI using O against random moves
player = "o"
train(player)
for steps in tqdm(range(10_000)):
    trajectory = game(player)
print_counters()

  0%|          | 0/1000000 [00:00<?, ?it/s]

  0%|          | 0/10000 [00:00<?, ?it/s]

Wins: 8779 (87.79%), Loses: 42 (0.42%), Draws: 1179 (11.79%)


In [3700]:
reset_counters()

### Make AI plays against each other

In [3701]:
# AI playing against each others
for steps in tqdm(range(10_000)):
    trajectory = game_between_AI()
print_counters_between()

  0%|          | 0/10000 [00:00<?, ?it/s]

AI usign X Wins: 0 (0.00%), AI usign O Wins: 0 (0.00%), Draws: 10000 (100.00%)


### And if you want to challenge the AI?

In [3702]:
def get_move(player, available):
    while True:
        move = int(input(f"{player.capitalize()} player, make your move (1-9): "))
        if move in available:
            return move
        else:
            print("Invalid move. Please choose from available positions.")

In [3703]:
def interactive_game(player):
    trajectory = []
    state = State(set(), set())
    available = set(range(1, 9 + 1))

    while available:
        # X turn
        if player == "x":
            move = get_move(player, available)
        else:
            move = make_move(state, available, "x")
        state.x.add(move)
        trajectory.append(deepcopy(state))
        available.remove(move)

        print_board(state)

        # Check for win or draw after X moves
        if win(state.x):
            print("X player won!")
            break
        else:
            if not available:
                print("It's a draw!")
                break

        # O turn
        if player == "o":
            move = get_move(player, available)
        else:
            move = make_move(state, available, "o")
        state.o.add(move)
        trajectory.append(deepcopy(state))
        available.remove(move)

        print_board(state)
        
        # Check for win after O moves
        if win(state.o):
            print("O player won")
            break

    return trajectory

In [3704]:
def interactive_interface():
    print("Do you want to play against AI? y/n")
    play = input().lower()

    while play == "y":
        print("Do you want to play using X or O? x/o")
        player = input().lower()

        if player not in ["x", "o"]:
            print("Invalid choice. Please choose x or o.")
            continue

        print("******************************")
        print("GAME BEGIN")
        print("******************************")
        trajectory = interactive_game(player)

        print("******************************")
        print("Do you want to play again? y/n")
        play = input().lower()

    print("Goodbye!")

In [3705]:
interactive_interface()

Do you want to play against AI? y/n
Do you want to play using X or O? x/o
******************************
GAME BEGIN
******************************
. . . 
. X . 
. . . 

. . . 
. X . 
O . . 

X . . 
. X . 
O . . 

X . . 
. X . 
O . O 

X . . 
. X . 
O X O 

X O . 
. X . 
O X O 

X O . 
. X X 
O X O 

X O . 
O X X 
O X O 

X O X 
O X X 
O X O 

It's a draw!
******************************
Do you want to play again? y/n
Goodbye!
