In [1]:
import numpy as np
import matplotlib.pyplot as plt

### Set up a function to evaluate the board, to check if the game is over

In [17]:
def check_is_over(x):
    """
       Check if game is over
    """
    
    # Check if player 1 won
       #check each row first, each column, and each diagonal
    if (np.prod(x[0,:]) == 1) or \
       (np.prod(x[1,:]) == 1) or \
       (np.prod(x[2,:]) == 1) or \
       (np.prod(x[:,0]) == 1) or \
       (np.prod(x[:,1]) == 1) or \
       (np.prod(x[:,2]) == 1) or \
       (np.prod(np.diag(x)) == 1 ) or \
       (np.prod(np.diag(np.rot90(x))) == 1 ):
        
        print("PLAYER 1 WON!")
        return 1

    # Check if player 2 won
       # check each row first, each column, and each diagonal
    if (np.sum(x[0,:]) == 6) or \
       (np.sum(x[1,:]) == 6) or \
       (np.sum(x[2,:]) == 6) or \
       (np.sum(x[:,0]) == 6) or \
       (np.sum(x[:,1]) == 6) or \
       (np.sum(x[:,2]) == 6) or \
       (np.sum(np.diag(x)) == 6 ) or \
       (np.sum(np.diag(np.rot90(x))) == 6 ):
        
        print("PLAYER 2 WON!")
        return 2
    
    # Check if there is a draw
    if np.all(x):
        print("DRAW!!")
        return 0

    return -1

### Setting up the Tic-Tac-Toe Simulation

In [34]:
debug = True

# Keep track of the total of player 1 wins, player 2 wins, and draws
player1_wins = 0
player2_wins = 0
draws = 0

# Set the number of episodes to play
NUM_EPISODES = 10

for episode in range(NUM_EPISODES):
    print(f'Episode #{episode}')
    
    # Set is_done Bollean flag
    is_done = False
    
    controls = set()
    for i in range(9):
        controls.add(i)
        
    # Set initial board state
    x = np.zeros((3,3))
    k = 0

    # from pprint import pprint
    # Work on visualizing our board
    board = np.array([['-','-','-'],
                      ['-','-','-'],
                      ['-','-','-']])

    if debug: print(f'Episode #{episode}, k={k},\n', board)
    
    
    while not is_done:
        if k % 2 == 0: 
            # Agent 1 moves first
            u = np.random.choice(list(controls))
            
            # Remove the used control from the set
            controls.remove(u)
            
            # Update board state
            x = x.reshape(-1,)
            x[u] = 1
            x = x.reshape(3, 3)
            
            # Update board visualization
            board = board.reshape(-1,)
            board[u] = "X"
            board = board.reshape(3, 3)
            
            k += 1
            if debug: print(f'Episode #{episode}, k={k},\n', board)
            
            # Check if episode is over
            result = check_is_over(x)
            is_done = True
            if result == 1:
                player1_wins += 1
            elif result == 2:
                player2_wins += 1
            elif result == 0:
                draws += 1
            else:
                is_done = False
                
                
        else: 
            # Agent 2 moves
            u = np.random.choice(list(controls))
            
            # Remove the used control from the set
            controls.remove(u)
            
            # Update board state
            x = x.reshape(-1,)
            x[u] = 2
            x = x.reshape(3, 3)
            
            # Update board visualization
            board = board.reshape(-1,)
            board[u] = "O"
            board = board.reshape(3, 3)
            
            k += 1
            if debug: print(f'Episode #{episode}, k={k},\n', board)
            
            # Check if episode is over
            result = check_is_over(x)
            is_done = True
            if result == 1:
                player1_wins += 1
            elif result == 2:
                player2_wins += 1
            elif result == 0:
                draws += 1
            else:
                is_done = False
                
    if debug: print('\n')

Episode #0
Episode #0, k=0,
 [['-' '-' '-']
 ['-' '-' '-']
 ['-' '-' '-']]
Episode #0, k=1,
 [['-' '-' '-']
 ['-' '-' 'X']
 ['-' '-' '-']]
Episode #0, k=2,
 [['-' '-' 'O']
 ['-' '-' 'X']
 ['-' '-' '-']]
Episode #0, k=3,
 [['-' 'X' 'O']
 ['-' '-' 'X']
 ['-' '-' '-']]
Episode #0, k=4,
 [['-' 'X' 'O']
 ['-' '-' 'X']
 ['O' '-' '-']]
Episode #0, k=5,
 [['-' 'X' 'O']
 ['-' 'X' 'X']
 ['O' '-' '-']]
Episode #0, k=6,
 [['-' 'X' 'O']
 ['-' 'X' 'X']
 ['O' '-' 'O']]
Episode #0, k=7,
 [['-' 'X' 'O']
 ['X' 'X' 'X']
 ['O' '-' 'O']]
PLAYER 1 WON!


Episode #1
Episode #1, k=0,
 [['-' '-' '-']
 ['-' '-' '-']
 ['-' '-' '-']]
Episode #1, k=1,
 [['-' '-' 'X']
 ['-' '-' '-']
 ['-' '-' '-']]
Episode #1, k=2,
 [['-' '-' 'X']
 ['-' 'O' '-']
 ['-' '-' '-']]
Episode #1, k=3,
 [['-' 'X' 'X']
 ['-' 'O' '-']
 ['-' '-' '-']]
Episode #1, k=4,
 [['O' 'X' 'X']
 ['-' 'O' '-']
 ['-' '-' '-']]
Episode #1, k=5,
 [['O' 'X' 'X']
 ['-' 'O' '-']
 ['X' '-' '-']]
Episode #1, k=6,
 [['O' 'X' 'X']
 ['-' 'O' '-']
 ['X' '-' 'O']]
PL

In [32]:
print(f'Player 1 had {player1_wins} wins')
print(f'Player 2 had {player2_wins} wins')
print(f'There were {draws} draws')

Player 1 had 588 wins
Player 2 had 282 wins
There were 130 draws
