# Minimax

In [None]:
from copy import deepcopy

X, O, E = 'X', 'O', ' '

def is_terminal(state):
  m = Minimax()
  r = m.reward(state)
  somebody_won = r != 0
  no_empty_cells = E not in (state[0] + state[1] + state[2])
  return somebody_won or no_empty_cells

def get_actions(state):
  res = []
  for i in range(3):
    for j in range(3):
      if state[i][j] == E:
        res.append((i, j))
  return res

def print_state(state):
  for row in state:
    print(row)

def take_action(state, loc, symbol):
  i, j = loc[0], loc[1]
  new_state = deepcopy(state)
  if state[i][j] == E:
    new_state[i][j] = symbol
  return new_state

In [None]:
class Minimax():
  '''
  A minimax class for playing the first player role (X).
  '''

  def __init__(self, player_type=X, max_depth=100):
    self.player_type = player_type
    self.enemy_type = self.enemy(player_type)
    self.first_player = X
    self.second_player = O
    self.max_depth = max_depth

  def __str__(self):
    return f'player type: {self.player_type}\n' + \
      f'enemy type: {self.enemy_type}\n'

  def enemy(self, player_type):
    if player_type == X:
      return O
    return X
  
  def reward(self, state):
    r = 0
    options = [
         [(0, 0), (0, 1), (0, 2)],
         [(1, 0), (1, 1), (1, 2)],
         [(2, 0), (2, 1), (2, 2)],

         [(0, 0), (1, 0), (2, 0)],
         [(0, 1), (1, 1), (2, 1)],
         [(0, 2), (1, 2), (2, 2)],

         [(0, 0), (1, 1), (2, 2)],
         [(2, 0), (1, 1), (0, 2)]
    ]

    for option in options:
      i0, j0, i1, j1, i2, j2 = option[0][0], option[0][1], option[1][0], option[1][1], option[2][0], option[2][1]
      if state[i0][j0] == self.enemy_type and \
         state[i1][j1] == self.enemy_type and \
         state[i2][j2] == self.enemy_type:
         r = -1
         break
      elif state[i0][j0] == self.player_type and \
         state[i1][j1] == self.player_type and \
         state[i2][j2] == self.player_type:
         r = 1
         break
      
    return r
      
  def uristika(self, state):
    return self.reward(state)  # todo: change this!

  def decision(self, state):
    alpha = float("-inf")
    beta = float("inf")
    value = float("-inf")
    depth = 0
    optimal_action = None
    succ = [(action, take_action(state, action, self.first_player)) for action in get_actions(state)]
    for (next_action, next_state) in succ:
        returned_value = self.min_value(next_state, depth + 1, alpha, beta)
        if returned_value > value:
            value = returned_value
            optimal_action = next_action
    return (value, optimal_action)

  def min_value(self, state, depth, alpha, beta):
    if is_terminal(state): return self.reward(state)
    if depth == self.max_depth: return self.uristika(state)
    value = float("inf")
    succ = [(action, take_action(state, action, self.second_player)) for action in get_actions(state)]
    for (next_action, next_state) in succ:
        returned_value = self.max_value(next_state, depth + 1, alpha, beta)
        if returned_value < value:
            value = returned_value
        # alpha beta pruning:
        if value <= alpha:
            return value
        beta = min(beta, value)
    return value

  def max_value(self, state, depth, alpha, beta):
    if is_terminal(state): return self.reward(state)
    if depth == self.max_depth: return self.uristika(state)
    value = float("-inf")
    succ = [(action, take_action(state, action, self.first_player)) for action in get_actions(state)]
    for (next_action, next_state) in succ:
        returned_value = self.min_value(next_state, depth + 1, alpha, beta)
        if returned_value > value:
            value = returned_value
        # alpha beta pruning:
        if value >= beta:
            return value
        alpha = max(alpha, value)
    return value

In [None]:
state = [
      [E, E, E],
      [E, E, E],
      [E, E, E]
]

def ask_for_input(state):
  while True:
    i = int(input("Enter row index [0-2]: "))
    j = int(input("Enter col index [0-2]: "))
    if not (i not in range(3) or j not in range(3) or state[i][j] is not E):
      return (i, j)
    else:
      print('Illegal coordinate!')

print('Getting started!')
print_state(state)
while True:
  print('\nComputer\'s turn: ')
  minimax = Minimax()
  v, a = minimax.decision(state)
  state = take_action(state, a, X)
  print_state(state)
  
  m = Minimax()
  r = m.reward(state)
  no_empty_cells = E not in (state[0] + state[1] + state[2])
  if r == 1:
    print('\nComputer won!')
    break

  if no_empty_cells:
    print('\nDraw!')
    break

  print('\nYour turn: ')
  a = ask_for_input(state)
  state = take_action(state, a, O)
  print_state(state)

  m = Minimax()
  r = m.reward(state)
  no_empty_cells = E not in (state[0] + state[1] + state[2])
  if r == -1:
    print('\nYou won!')
    break

  if no_empty_cells:
    print('\nDraw!')
    break

Getting started!
[' ', ' ', ' ']
[' ', ' ', ' ']
[' ', ' ', ' ']

Computer's turn: 
['X', ' ', ' ']
[' ', ' ', ' ']
[' ', ' ', ' ']

Your turn: 
Enter row index [0-2]: 1
Enter col index [0-2]: 1
['X', ' ', ' ']
[' ', 'O', ' ']
[' ', ' ', ' ']

Computer's turn: 
['X', 'X', ' ']
[' ', 'O', ' ']
[' ', ' ', ' ']

Your turn: 
Enter row index [0-2]: 0
Enter col index [0-2]: 2
['X', 'X', 'O']
[' ', 'O', ' ']
[' ', ' ', ' ']

Computer's turn: 
['X', 'X', 'O']
[' ', 'O', ' ']
['X', ' ', ' ']

Your turn: 
Enter row index [0-2]: 1
Enter col index [0-2]: 0
['X', 'X', 'O']
['O', 'O', ' ']
['X', ' ', ' ']

Computer's turn: 
['X', 'X', 'O']
['O', 'O', 'X']
['X', ' ', ' ']

Your turn: 
Enter row index [0-2]: 2
Enter col index [0-2]: 2
['X', 'X', 'O']
['O', 'O', 'X']
['X', ' ', 'O']

Computer's turn: 
['X', 'X', 'O']
['O', 'O', 'X']
['X', 'X', 'O']

Draw!
