In [62]:
import numpy as np
import pickle

In [63]:
BOARD_ROWS=3
BOARD_COLS=3

In [64]:
class Player:
    """
        defines simple AI player
        methods:
        - getHash(board) - returns string representation of board passed
        - availablePositions(current_board) - returns empty cells on board passed
        - get_action(current_board) - returns (i,j) formatted player turn
        - is_done(reward) - give player a revard
        - savepolicy() - saves policy for player, after end of the game
    """
    def __init__(self, isZero, name = "Победитель", exp_rate=0.3):
        self.name = name
        self.symbol = 0 if isZero else 1
        self.states = []  # record all positions taken
        self.lr = 0.2
        self.exp_rate = exp_rate
        self.decay_gamma = 0.9
        self.states_value = {}  # state -> value

    def getHash(self, board):
        boardHash = str(board.reshape(BOARD_COLS * BOARD_ROWS))
        return boardHash
    
    def availablePositions(self, board):
        positions = []
        print(BOARD_ROWS)
        print(BOARD_COLS)
        for i in range(BOARD_ROWS):
            for j in range(BOARD_COLS):
                print(board[i,j])
                if board[i, j] == -1:
                    positions.append((i, j))  # need to be tuple
        return positions
    
    def get_action(self, current_board):
        positions = self.availablePositions(current_board)
        action = None
        if np.random.uniform(0, 1) <= self.exp_rate:
            idx = np.random.choice(len(positions))
            action = positions[idx]
        else:
            value_max = -999
            for p in positions:
                next_board = current_board.copy()
                next_board[p] = self.symbol
                next_boardHash = self.getHash(next_board)
                value = -1 if self.states_value.get(next_boardHash) is None\
                        else self.states_value.get(next_boardHash)
                if value >= value_max:
                    value_max = value
                    action = p
                    
        current_board[action] = self.symbol            
        self.addState(self.getHash(current_board))            
        return action
    
    def addState(self, state):
        self.states.append(state)
        
    # at the end of game, backpropagate and update states value
    def is_done(self, reward):
        for st in reversed(self.states):
            if self.states_value.get(st) is None:
                self.states_value[st] = -1
            self.states_value[st] += self.lr * (self.decay_gamma * reward - self.states_value[st])
            reward = self.states_value[st]
        self.reset()

    def reset(self):
        self.states = []

    def savePolicy(self):

        fw = open('policy_' + str(self.name), 'wb')
        pickle.dump(self.states_value, fw)
        fw.close()

    def loadPolicy(self, file):
        fr = open(file, 'rb')
        self.states_value = pickle.load(fr)
        fr.close()

In [65]:
p = Player(isZero = True)