In [1]:
import numpy as np
import pickle

N = 10
IN_A_ROW = 3
EMPTY_SPACE = None

class State:
    def __init__(self, p1, p2):
        self.board = np.zeros((N, N))
        self.p1 = p1
        self.p2 = p2
        self.isEnd = False
        self.boardHash = None
        # init p1 plays first
        self.playerSymbol = 1
        self.winner = None

    # get unique hash of current board state
    def getHash(self):
        self.boardHash = str(self.board.reshape(N * N))
        return self.boardHash
    
    def check_state(self, print_game=False):
        # check vertical
        for x in range(0, N):
            for y in range(0, N-4):
                if self.board[y][x] != EMPTY_SPACE:
                    curplayer = self.board[y][x]
                    score = 1
                    for j in range(4):
                        if self.board[y+j][x] == curplayer:
                            score+=1
                        else:
                            break

                        if score == IN_A_ROW:
                            if print_game:
                                print(curplayer, ' wins!')
                            return curplayer

        # check horizontal
        for x in range(0, N-4):
            for y in range(0, N-4):
                if self.board[y][x] != 0:
                    curplayer = self.board[y][x]
                    score = 1
                    for j in range(4):
                        if self.board[y][x+j] == curplayer:
                            score+=1
                        else:
                            break

                        if score == IN_A_ROW:
                            if print_game:
                                print(curplayer, ' wins!')
                            return curplayer

        # check diagonal \
        for x in range(0, N-4):
            for y in range(0, N-4):
                if self.board[y][x] != 0:
                    curplayer = self.board[y][x]
                    score = 1
                    for j in range(4):
                        if self.board[y+j][x+j] == curplayer:
                            score+=1
                        else:
                            break

                        if score == IN_A_ROW:
                            if print_game:
                                print(curplayer, ' wins!')
                            return curplayer

        # check diagonal /
        for x in range(0, N-4):
            for y in reversed(range(4, N)):
                if self.board[y][x] != 0:
                    curplayer = self.board[y][x]
                    score = 1
                    for j in range(4):
                        if self.board[y-j][x+j] == curplayer:
                            score+=1
                        else:
                            break

                        if score == IN_A_ROW:
                            if print_game:
                                print(curplayer, ' wins!')
                            return curplayer
            
        # tie
        # no available positions
        if len(self.availablePositions()) == 0:
            self.isEnd = True
            return 0
        # not end
        self.isEnd = False
        return None

    def availablePositions(self):
        positions = []
        for i in range(N):
            for j in range(N):
                if self.board[i, j] == 0:
                    positions.append((i, j))  # need to be tuple
        return positions

    def updateState(self, position):
        self.board[position] = self.playerSymbol
        # switch to another player
        self.playerSymbol = -1 if self.playerSymbol == 1 else 1

    # only when game ends
    def giveReward(self):
        result = self.check_state()
        # backpropagate reward
        if result == 1:
            self.p1.feedReward(1)
            self.p2.feedReward(0)
        elif result == -1:
            self.p1.feedReward(0)
            self.p2.feedReward(1)
        else:
            self.p1.feedReward(0.1)
            self.p2.feedReward(0.5)

    # board reset
    def reset(self):
        self.board = np.zeros((N, N))
        self.boardHash = None
        self.isEnd = False
        self.playerSymbol = 1

    def play(self, rounds=100):
        for i in range(rounds):
            print("Rounds {}".format(i))
            while not self.isEnd:
                # Player 1
                positions = self.availablePositions()
                p1_action = self.p1.chooseAction(positions, self.board, self.playerSymbol)
                # take action and upate board state
                self.updateState(p1_action)
                board_hash = self.getHash()
                self.p1.addState(board_hash)
                # check board status if it is end

                win = self.check_state()
                if win is not None:
                    # self.showBoard()
                    # ended with p1 either win or draw
                    self.giveReward()
                    self.p1.reset()
                    self.p2.reset()
                    self.reset()
                    break

                else:
                    # Player 2
                    positions = self.availablePositions()
                    p2_action = self.p2.chooseAction(positions, self.board, self.playerSymbol)
                    self.updateState(p2_action)
                    board_hash = self.getHash()
                    self.p2.addState(board_hash)

                    win = self.check_state()
                    if win is not None:
                        # self.showBoard()
                        # ended with p2 either win or draw
                        self.giveReward()
                        self.p1.reset()
                        self.p2.reset()
                        self.reset()
                        break

    # play with human
    def play2(self):
        while not self.isEnd:
            # Player 1
            positions = self.availablePositions()
            p1_action = self.p1.chooseAction(positions, self.board, self.playerSymbol)
            # take action and upate board state
            self.updateState(p1_action)
            self.showBoard()
            # check board status if it is end
            win = self.winner()
            if win is not None:
                if win == 1:
                    print(self.p1.name, "wins!")
                else:
                    print("tie!")
                self.reset()
                break

            else:
                # Player 2
                positions = self.availablePositions()
                p2_action = self.p2.chooseAction(positions)

                self.updateState(p2_action)
                self.showBoard()
                win = self.winner()
                if win is not None:
                    if win == -1:
                        print(self.p2.name, "wins!")
                    else:
                        print("tie!")
                    self.reset()
                    break

    def showBoard(self):
        # p1: x  p2: o
        for k in range(N):
            if k == 0:
                print('   ', end='')
            print(' ', k, '', end='' if k != N-1 else '\n')
            
        for k in range(N):
            if k == 0:
                print('   ', end='')
            print('----', end='' if k != N-1 else '-\n')
        
        for i in range(0, N):
            print(i, ' ', end='')
            out = '| '
            for j in range(0, N):
                if self.board[i, j] == 1:
                    token = 'x'
                if self.board[i, j] == -1:
                    token = 'o'
                if self.board[i, j] == 0:
                    token = ' '
                out += token + ' | '
            print(out)
            for k in range(N):
                if k == 0:
                    print('   ', end='')
                print('----', end='' if k != N-1 else '-\n')


class Player:
    def __init__(self, name, exp_rate=0.3):
        self.name = name
        self.states = []  # record all positions taken
        self.lr = 0.2
        self.exp_rate = exp_rate
        self.decay_gamma = 0.9
        self.states_value = {}  # state -> value

    def getHash(self, board):
        boardHash = str(board.reshape(N * N))
        return boardHash

    def chooseAction(self, positions, current_board, symbol):
        if np.random.uniform(0, 1) <= self.exp_rate:
            # take random action
            idx = np.random.choice(len(positions))
            action = positions[idx]
        else:
            value_max = -999
            for p in positions:
                next_board = current_board.copy()
                next_board[p] = symbol
                next_boardHash = self.getHash(next_board)
                value = 0 if self.states_value.get(next_boardHash) is None else self.states_value.get(next_boardHash)
                # print("value", value)
                if value >= value_max:
                    value_max = value
                    action = p
        # print("{} takes action {}".format(self.name, action))
        return action

    # append a hash state
    def addState(self, state):
        self.states.append(state)

    # at the end of game, backpropagate and update states value
    def feedReward(self, reward):
        for st in reversed(self.states):
            if self.states_value.get(st) is None:
                self.states_value[st] = 0
            self.states_value[st] += self.lr * (self.decay_gamma * reward - self.states_value[st])
            reward = self.states_value[st]

    def reset(self):
        self.states = []

    def savePolicy(self, rounds):
        fw = open(str(N) +'_dim_'+ str(rounds) + '_rounds', 'wb')
        pickle.dump(self.states_value, fw)
        fw.close()

    def loadPolicy(self, file):
        fr = open(file, 'rb')
        self.states_value = pickle.load(fr)
        fr.close()


class HumanPlayer:
    def __init__(self, name):
        self.name = name

    def chooseAction(self, positions):
        while True:
            row = int(input("Input your action row:"))
            col = int(input("Input your action col:"))
            action = (row, col)
            if action in positions:
                return action

    # append a hash state
    def addState(self, state):
        pass

    # at the end of game, backpropagate and update states value
    def feedReward(self, reward):
        pass

    def reset(self):
        pass


if __name__ == "__main__":
    # training
    p1 = Player("p1")
    p2 = Player("p2")

    st = State(p1, p2)
    print("training...")
    rounds = 500
    st.play(rounds)
    p1.savePolicy(rounds)

    # play with human
    p1 = Player("computer", exp_rate=0)
    p1.loadPolicy("10_dim_500_rounds")

    p2 = HumanPlayer("human")

    st = State(p1, p2)
    st.play2()

training...
Rounds 0
Rounds 1
Rounds 2
Rounds 3
Rounds 4
Rounds 5
Rounds 6
Rounds 7
Rounds 8
Rounds 9
Rounds 10
Rounds 11
Rounds 12
Rounds 13
Rounds 14
Rounds 15
Rounds 16
Rounds 17
Rounds 18
Rounds 19
Rounds 20
Rounds 21
Rounds 22
Rounds 23
Rounds 24
Rounds 25
Rounds 26
Rounds 27
Rounds 28
Rounds 29
Rounds 30
Rounds 31
Rounds 32
Rounds 33
Rounds 34
Rounds 35
Rounds 36
Rounds 37
Rounds 38
Rounds 39
Rounds 40
Rounds 41
Rounds 42
Rounds 43
Rounds 44
Rounds 45
Rounds 46
Rounds 47
Rounds 48
Rounds 49
Rounds 50
Rounds 51
Rounds 52
Rounds 53
Rounds 54
Rounds 55
Rounds 56
Rounds 57
Rounds 58
Rounds 59
Rounds 60
Rounds 61
Rounds 62
Rounds 63
Rounds 64
Rounds 65
Rounds 66
Rounds 67
Rounds 68
Rounds 69
Rounds 70
Rounds 71
Rounds 72
Rounds 73
Rounds 74
Rounds 75
Rounds 76
Rounds 77
Rounds 78
Rounds 79
Rounds 80
Rounds 81
Rounds 82
Rounds 83
Rounds 84
Rounds 85
Rounds 86
Rounds 87
Rounds 88
Rounds 89
Rounds 90
Rounds 91
Rounds 92
Rounds 93
Rounds 94
Rounds 95
Rounds 96
Rounds 97
Rounds 98
Rounds 9

TypeError: 'NoneType' object is not callable