> Tic-Tac-Toe using Reinforcement learning

In [23]:
import json

In [24]:
def isTerminal(state):
    combinations = [(0, 1, 2), (3, 4, 5), (6, 7, 8),  # Rows
                    (0, 3, 6), (1, 4, 7), (2, 5, 8),  # Columns
                    (0, 4, 8), (2, 4, 6)]             # Diagonals

    for combo in combinations:
        if state[combo[0]] == state[combo[1]] == state[combo[2]] == 'X':
            return True, 1                            # It's a win
        elif state[combo[0]] == state[combo[1]] == state[combo[2]] == 'O':
            return True, -1                           # It's a loss
    if state.count('_') == 0:
        return True, 0                                # Draw
    else:
        return False, 0                               # not a terminal state

In [25]:
def determineTurn(state):
    """Return X if it's max's turn otherwise O."""
    num = state.count('_')
    if num % 2 != 0:
        return 'X'
    else:
        return 'O'

In [26]:
def toString(statelst):
    """Convert the statelst(list) into a string"""
    s = ''  # initialize
    for i in statelst:
        s += i
    return s

In [27]:
childParent = {}         # child as key, parent as value
terminals = []           # all the terminals
tree = {}                # game tree(nodes as keys, children as value)
stateScores = {}         # Storing scores of every state and turn of player
def generateTree(state):
    strState = toString(state)
    tree[strState] = []
    turn = determineTurn(state)

    if not isTerminal(state)[0]:
        for i in range(9):
            cState = state.copy()                            # copy for temparary use
            if state[i] == '_':
                cState[i] = turn
                cStateStr = toString(cState)                 # convert state into string
                tree[strState].append(cStateStr)             # add the state to tree

                if cStateStr not in childParent.keys():
                    childParent[cStateStr] = []              # add to childParent

                if strState not in childParent[cStateStr]:
                    childParent[cStateStr].append(strState)  # update childParent
                score = isTerminal(cStateStr)[1]
                stateScores[cStateStr] = score               # add score and turn
                generateTree(cState)                         # recursive call (DFS)
    else:
        if strState not in terminals:                        # adding state to terminals
            terminals.append(strState)

generateTree(['_']*9)                                        # initial call
childParent['_________'] = [None]                            # parent of initial state
stateScores['_________'] = 0                                 # score and turn for initial state

In [28]:
alpha = 0.41
def _helperUpdateValues(state, s1 = 0):
    global alpha
    s1 = stateScores[state]
    if state != '_________':
      parents = childParent[state]
      for parent in parents:
          s = stateScores[parent]
          value = s + alpha*(s1 - s)
          stateScores[parent] = value
          _helperUpdateValues(parent, s1)         # recursive call

def updateValues():
    for terminal in terminals*200:
        _helperUpdateValues(terminal)
updateValues()

In [29]:
# Storing the tree in a json file
jsonContent = {
    "tree": tree,
    "scores": stateScores
}
jsonData = json.dumps(jsonContent, indent=2)
with open('reinforcement.json', 'w') as json_file:
  json_file.write(jsonData)

In [30]:
def findMove(state):
    """Return the index of the best move of the computer."""
    state = toString(state)
    children = tree[state]
    scores = {}
    for i in children:
        scores[stateScores[i]] = i
    action = scores[min(scores.keys())]
    for i in range(9):
      if state[i] == '_' and action[i] != '_':
          return i

In [31]:
def _PrintGame(state):
    """Display the state given on the board"""
    for i in range(0, 9, 3):
      print(f"| {state[i]} | {state[i+1]} | {state[i+2]} |")
    print(" ")

In [32]:
def _declareResult(state):
    """prints the final result"""
    terminalvalue = isTerminal(state)
    if terminalvalue[1] == -1:                       # machine won
        print("You Lost")
    elif terminalvalue[1] == 1:                      # player won
        print("Congratulations! You won")
    elif terminalvalue[0] and terminalvalue[1] == 0: # ended in draw
        print("Its a draw")

In [36]:
def Play():
    """Function to play the above trained game using minmax algo"""
    state = ['_']*9                                  # initial state
    _PrintGame(state)                                # prints initial board game
    userTurn = True                                  # true if it's players turn
    indices = []                                     # played moves indices
    while not(isTerminal(state)[0]):
        if userTurn:
            try:
              index = int(input("Enter the move position(1,2,3,4,5,6,7,8,9): "))-1
            except:
              index = int(input("Please enter a valid integer: "))-1
            # asks for valid input till it is entered
            while index in indices:
              index = int(input("Please enter a valid(empty) position: "))-1
            indices.append(index)
            state[index] = 'X'
            userTurn = False
        else:
            index = findMove(state)
            indices.append(index)
            state[index] = 'O'
            userTurn = True
            _PrintGame(state)
        _declareResult(state)
Play()

| _ | _ | _ |
| _ | _ | _ |
| _ | _ | _ |
 
Enter the move position(1,2,3,4,5,6,7,8,9): 5
| _ | _ | _ |
| _ | X | _ |
| O | _ | _ |
 
Enter the move position(1,2,3,4,5,6,7,8,9): 3
| _ | _ | X |
| _ | X | _ |
| O | _ | O |
 
Enter the move position(1,2,3,4,5,6,7,8,9): 1
| X | _ | X |
| _ | X | _ |
| O | O | O |
 
You Lost
