In [1]:
import numpy as np
import random
import pickle
import json

In [51]:
class MinMaxAgent():
  def __init__(self):
    self.q_table = {}
    self.board = '         '

  def get_best_move(self, state):
    # Retrieve the Q-value for a specific state-action pair
    return self.q_table.get(state, -1)

  def update_best_move(self, state, action):
    self.q_table[state] = action

  def get_moves(self):
    return [m for m in range(9) if self.board[m]==' ']

  def check_winner(gameState):
    lines = [[0,1,2],[3,4,5],[6,7,8],[0,3,6],[1,4,7],[2,5,8],[0,4,8],[2,4,6]]
    winner = None
    for line in lines:
      st = gameState[line[0]] + gameState[line[1]] + gameState[line[2]]
      if  st == 'XXX':
        winner = 'X'
      elif st == 'OOO':
        winner = 'O'
    done = False
    if winner is not None or ' ' not in gameState:
      done = True

    return winner,done

  def get_op(self,CP):
    return 'X' if CP == 'O' else 'O'

  def trainMinMax(self,CP,computer):
    self.minmax(0,CP,computer)

  def minmax(self,depth,CP,computer):
    # if self.board == 'X   O X O':
    #   print('yes')
    winner,done = MinMaxAgent.check_winner(self.board)
    if done:
      if winner is not None and winner == computer:
        return 10 - depth,None
      elif winner is not None and winner != computer:
        return -10 + depth,None
      else:
        return 0,None

    moves = self.get_moves()
    best_move = None
    best_score = -200 if CP == computer else 200
    for m in moves:
      self.board = self.board[:m]+CP+self.board[m+1:]
      score,moves = self.minmax(depth+1,self.get_op(CP),computer)
      self.board = self.board[:m]+' '+self.board[m+1:]
      if CP == computer:
        if score > best_score:
          best_move = m
          best_score = score
      else:
        if score < best_score:
          best_move = m
          best_score = score
    # if self.board == 'X  OO X O':
    #   print(best_score,best_move)
    self.update_best_move(self.board,best_move)
    return best_score,best_move

  def save_agent(self, filename='MinMaxModel'):
    # Saving the Trained Model
    with open(filename+'.json', 'w') as json_file:
      json.dump(self.q_table, json_file)
    with open(filename, 'wb') as f:
      pickle.dump(self.q_table, f)

In [55]:
agent = MinMaxAgent()
# agent.board = 'XXO O XO '
# score,move = agent.minmax(2,'X','X')
# print(score,move)
# agent.trainMinMax('X','X')
agent.trainMinMax('X','X')
agent.save_agent('MinMaxModelPlayer2')
# print(agent.q_table)

In [None]:
class GamePlayer():
  def __init__(self):
    self.agent = None
    self.CNT = {}
    self.D = 0
    self.init()

  def init(self, filename="RLModel"):
    # Loading the Trained Model
    self.agent = QLearningAgent()
    with open(filename, 'rb') as f:
      self.agent.q_table = pickle.load(f)

  def play(self,i,computer,log=True):
    game = Game('X' if i%2==0 else 'O')
    done = False
    while not done:
      moves = game.get_moves()
      pState = game.get_state()
      move = None
      CP = game.CP
      move = None
      if CP != computer:
        move = random.choice(moves)
      else:
        move = self.agent.get_best_action(pState, moves, CP)
      nState,winner,reward,done = game.make_move(move,computer)
      # print(nState, winner)
      if log:
        game.print()
      if winner != None:
        if log:
          print("Player "+winner+" Won")
        self.CNT[winner] = self.CNT.get(winner,0)+1
        break
      elif done:
        if log:
          print("Game drawn")
        self.D = self.D + 1
        break
      if log:
        print()

  def printQ(self,state,CP,moves):
    for i in moves:
      print(self.agent.get_q_value(state,i,CP))

  def printStat(self):
    print("Player X won " + str(self.CNT.get('X')))
    print("Player O won " + str(self.CNT.get('O')))
    print("Draw " + str(self.D))