In [1]:
import numpy as np
import matplotlib.pyplot as plt

In [13]:
def gym(players):
    n = len(players)
    base_coin = 21 - np.clip(n, 5, 7)*2
    replay = []
    finish = []
    place_value = list((np.arange(n) + 1)*20)
    place_value[0] -= 20
    place_value[-1] += 20

    def init():
        deck = np.arange(33) + 3
        np.random.shuffle(deck)
        deck = list(deck[:24])
        state = [[base_coin, []] for _ in range(n)]
        turn = np.random.randint(n)
        pot = None
        return state, turn, pot, deck
    
    def get_serial(state, turn, pot): # 0~55 (56) + 3~35 (33)
        s_rot = state[turn:] + state[:turn]
        out = np.zeros((n+1, 89), int)
        for i in range(n):
            out[i,s_rot[i][0]] = 1
            for c in s_rot[i][1]:
                out[i,c+53] = 1
        if pot is not None:
            out[-1,pot[0]] = 1
            out[-1,pot[1]+53] = 1
        return out    
    
    def play(state, turn, pot):
        sc = state[turn]
        s0 = get_serial(state, turn, pot)
        ans = sc[0] > 0 and players[turn](s0)
        if ans:
            sc[0] -= 1
            pot[0] += 1
        else:
            sc[0] += pot[0]
            sc[1].append(pot[1])
            sc[1].sort()
            pot = None
        replay.append((ans, s0, get_serial(state, turn, pot)))
        return ans
    
    def get_score(state, turn):
        sc = state[turn]
        out = sc[0]
        if len(sc[1]) > 0:
            out -= sc[1][0]
            for p, q in zip(sc[1][:-1], sc[1][1:]):
                if p+1 != q:
                    out -= q
        return out
        
    def run():
        state, turn, pot, deck = init()
        while len(deck) > 0:
            pot = [0, deck.pop()]
            while play(state, turn, pot): 
                turn = (turn + 1) % n
                
        scores = [get_score(state, i) for i in range(n)]
        for place, idx in enumerate(np.argsort(scores)):
            finish.append((get_serial(state, idx, None), scores[idx] + place_value[place]))
        
        return state, finish
            
    return run, replay, finish

In [14]:
run, rep, finish = gym([lambda x: True for _ in range(5)])

In [15]:
run()

([[0, [3, 5, 7, 11, 18]],
  [10, [4, 10, 12, 20, 23, 25, 28, 34]],
  [0, [9, 14, 21, 24, 29]],
  [45, [15]],
  [0, [6, 19, 27, 30, 33]]],
 [(array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
           0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
           0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1,
           0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1,
           0],
          [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
           0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
           0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
           0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
           0],
          [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
           0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
           0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 