In [1]:
import numpy as np
from ipypb import irange
from sklearn.ensemble import RandomForestClassifier

In [2]:
def get0():
    state = np.zeros((6,89), bool)
    state[:5,11] = True
    state[5,0] = True
    return state

def disp(s):
    print('0    .    1    .    2    .    3    .    4    .    5    . 3 .    1    .    2    .    3    .')
    for line in s.astype(int).astype(str):
        print(''.join(line[:56]) + ' ' + ''.join(line[56:]))

def pack(s):
    return np.packbits(s)

def unpack(p):
    return np.unpackbits(p, count=534, axis=1).astype(bool)
    
def parse(s):
    s = [[i for i in range(89) if s_[i]] for s_ in s]
    s = [s_[:1] + [c - 53 for c in s_[1:]] for s_ in s]
    pot = s[-1][0]
    top = s[-1][1]
    coin = s[0][0]  
    return pot, top, coin, s

def draw(s):
    no = [i for i in range(33) if not s[:5,56:].sum(axis=0)[i]]
    if len(no) > 9:
        np.random.shuffle(no)
        nc = no[0]
        s[-1,56+nc] = True
        return True
    else:
        return False

def play(s, n):
    pot, top, coin, _ = parse(s)
    if coin > 0 and n:
        s[0,:56] = False
        s[0,coin-1] = True
        s[-1,:56] = False
        s[-1,pot+1] = True
        return True
    else:
        s[0,:56] = False
        s[0,pot+coin] = True
        s[0,56:] += s[-1,56:]
        s[-1,:] = False
        s[-1,0] = True
        return False

def shift(s):
    s[:5,:] = np.roll(s[:5,:], -1, axis=0)
    
def getp(s):
    def rscore(line):
        coin = [c for c in range(56) if line[c]][0]
        card = -3 if line[56] else 0
        for v, (p, c) in enumerate(zip(line[56:-1], line[57:])):
            if (not p) and c: 
                card -= v + 4
        return coin, card, coin+card
    scores = [rscore(line) for line in s[:5]]
    coin = scores[0][0]
    card = scores[0][1]
    raw = scores[0][-1]
    place = [120,80,60,40,0][sum([1 for score in scores[1:] if score[-1] > raw])]
    return coin, card, raw, place, raw+place
    
def bot0(s):
    pot, top, coin, s = parse(s)
    m = []
    adj = {top-1, top+1}
    for i in range(5):
        if adj & set(s[i][1:]):
            m.append(i)
    if len(m) == 0 or 0 not in m:
        return np.random.randint(1, top) > pot - coin + 10
    if np.amax(m) == 0:
        return (top-pot)/(pot+1) > 2
    return False

def example():
    s = get0()
    disp(s)
    while draw(s):
        disp(s)
        while play(s, bot0(s)):
            disp(s)
            shift(s)
            disp(s)
        disp(s)
    print('report:')
    for i in range(5):
        print(i, '>', getp(s))
        shift(s)

# base

In [None]:
X, Y = [], []
for _ in irange(10000):
    s = get0()
    while draw(s):
        while True:
            X.append(pack(s))
            Y.append(play(s, bot0(s)))
            if not Y[-1]: break
            shift(s)
        
X = np.r_[X]
Y = pack(np.r_[Y])
X.shape, X.dtype, Y.shape, Y.dtype            

In [None]:
np.savez_compressed('base.npz', X=X, Y=Y)

In [None]:
X = unpack(np.load('base.npz')['X'])
Y = np.unpackbits(np.load('base.npz')['Y'], count=X.shape[0]).astype(bool)
X.shape, X.dtype, Y.shape, Y.dtype

In [None]:
def bot_factory(X, Y):
    clf = RandomForestClassifier()
    clf.fit(X, Y)
    def bot_(s):
        return np.random.rand() > clf.predict_proba(s.reshape((1,-1)))[0,0]
    return bot_

def test_bot(bots):
    s = get0()
    t = 0
    while draw(s):
        while play(s, bots[t](s)):
            shift(s)
            t += 1
            t %= 5
    while t < 5:
        shift(s)
        t += 1
    disp(s)
    for i in range(5):
        score = getp(s)
        print(i, '>', bots[i], f'> score={score[2]}, reward={score[3]}, total={score[4]}')
        shift(s)

In [None]:
bot1 = bot_factory(X,Y)

In [None]:
test_bot([bot1, bot1, bot1, bot1, bot1])