In [1]:
import numpy as np

np.set_printoptions(linewidth=100000)

In [140]:
class ConnectFour():
    def __init__(self, n_games=1):
        self.board_dim = (6,7)
        self.n_games = n_games
        self.current_position = np.zeros(n_games, dtype=np.uint64)
        self.mask = np.zeros(n_games, dtype=np.uint64)
        self.moves = np.zeros(n_games, dtype=np.uint64)
        self.active_mask = np.ones(n_games, dtype=np.uint64)
        self.turn_mask = np.zeros(n_games, dtype=np.uint64)
        
        self.__pieces = np.reshape(2 << (np.array(np.linspace(0, 48, 49), dtype=np.uint64) - 1), (7, 7)).T
        self.__pieces[0][0] = 1
        self.__pieces = np.flip(self.__pieces, 0)
    
    def __top_mask(self, col):
        return np.left_shift(1, self.board_dim[0] - 1) << (col * (self.board_dim[0] + 1))

    def __bottom_mask(self, col):
        return np.left_shift(1, col * (self.board_dim[0] + 1))

    def __column_mask(self, col):
        return (np.left_shift(1, self.board_dim[0]) - 1) << (col * (self.board_dim[0] + 1))
    
    def __alignment(self, pos):
        m = pos & (pos >> (self.board_dim[0] + 1))
        horizontal = np.where(m & (m >> (2 * (self.board_dim[0] + 1))), 1, 0)

        m = pos & (pos >> self.board_dim[0])
        diag_a = np.where(m & (m >> (2 * self.board_dim[0])), 1, 0)

        m = pos & (pos >> (self.board_dim[0] + 2))
        diag_b = np.where(m & (m >> (2 * (self.board_dim[0] + 2))), 1, 0)

        m = pos & pos >> 1
        vertical = np.where(m & (m >> 2), 1, 0)
        
        alignment = horizontal | diag_a | diag_b | vertical
        
        return alignment
    
    def __update_active_mask(self):
        self.active_mask = np.where(self.is_alignment(), 0, 1)
    
    def is_alignment(self):
        return self.player_alignment() | self.opponent_alignment()
        
    def player_alignment(self):
        return self.__alignment(self.current_position)
    
    def opponent_alignment(self):
        pos = self.mask ^ self.current_position
        return self.__alignment(pos)
    
    def can_play(self, col):
        return (self.mask & self.__top_mask(col)) == 0
    
    def get_legal_moves(self):
        return np.squeeze(self.can_play(np.array([[0], [1], [2], [3], [4], [5], [6]], dtype=np.uint64))).T
    
    def get_key(self):
        return self.current_position + self.mask
    
    def play(self, col, verbose=False):
        is_legal = self.active_mask & self.can_play(col)
        
        self.current_position = np.where(is_legal, self.current_position ^ self.mask, self.current_position)
        self.mask = np.where(is_legal, self.mask | (self.mask + self.__bottom_mask(col)), self.mask)
        self.turn_mask = np.where(is_legal, 1 - self.turn_mask, self.turn_mask)
        
        self.__update_active_mask()
        
        if verbose:
            print("legality", is_legal)
            print("active", self.active_mask)
            print("turn", self.turn_mask)
            print("cpos", self.current_position)
            print("mask", self.mask)
    
    def to_array(self, game = 0):
        current_player = np.where(self.__pieces & self.current_position[game] != 0, self.turn_mask[game] + 1, 0)
        other_player = np.where(self.__pieces & (self.current_position ^ self.mask)[game] != 0, 2 - self.turn_mask[game], 0)
        return np.array(current_player + other_player, dtype=np.uint64)

In [256]:
def pure_random_ai(board):
    
    legal_moves = board.get_legal_moves()
    scores = np.random.uniform(size=(board.n_games, 7))
    scores = np.where(legal_moves, scores, np.nan)
    choices = np.array(np.nanargmax(scores, axis=-1), dtype=np.uint64)
    
    return choices

class Simulator():
    def __init__(self, AI, n_games=1):
        self.n_games = n_games
        self.board = ConnectFour(n_games)
        self.turn = 0
        self.AI = AI
        
    def start(self, verbose=False):
        for i in range(42):
            choices = self.AI[self.turn](self.board)

            if verbose:
                print("choices", choices)

            self.board.play(choices)
            self.turn = 1 - self.turn
        
        print(self.board.to_array())

In [278]:
sim = Simulator([pure_random_ai, pure_random_ai], 3_000_000)

In [279]:
%time sim.start(False)

[[0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0]
 [0 0 0 0 0 1 2]
 [0 1 0 0 0 1 1]
 [2 2 0 0 2 1 1]
 [1 1 0 2 2 2 2]]
Wall time: 41.7 s


In [283]:
winners = 1 - sim.board.turn_mask

In [284]:
print(np.count_nonzero(winners))

1332125


In [285]:
print(3_000_000 - np.count_nonzero(winners))

1667875
