In [None]:
import tensorflow as tf
from tqdm import tqdm

In [None]:
STRAT_SIZE = 10
basic_sim_strat = tf.Variable(tf.random.uniform((STRAT_SIZE, STRAT_SIZE, STRAT_SIZE)), trainable=True)
table_round_strat = tf.Variable(tf.random.uniform((STRAT_SIZE, STRAT_SIZE, STRAT_SIZE)), trainable=True)
#(my_prob, largest_bet, total_balance_bet)

In [None]:
BATCH_SIZE = 32768
TABLE_SIZE = 6

@tf.function
def discret(x):
    return tf.cast(x * STRAT_SIZE, tf.int32)

@tf.function
def run_basic_sim_batch(strat):
    samp_idx = tf.concat([tf.expand_dims(tf.range(BATCH_SIZE), -1), tf.expand_dims(tf.cast(tf.random.uniform((BATCH_SIZE,)) * TABLE_SIZE, tf.int32), -1)], axis=-1)
    probs = tf.random.uniform((BATCH_SIZE, TABLE_SIZE))
    strats = tf.minimum(tf.maximum(tf.gather(strat, discret(probs), axis=0), 0.0), 1.0)
    strats = tf.where((samp_idx[:, 1, tf.newaxis] == tf.repeat(tf.expand_dims(tf.range(TABLE_SIZE), 0), BATCH_SIZE, axis=0))[:, :, tf.newaxis, tf.newaxis], strats, tf.stop_gradient(strats))
    pots = tf.zeros((BATCH_SIZE,))
    larg = tf.zeros((BATCH_SIZE,))
    stacks = tf.ones((BATCH_SIZE, TABLE_SIZE))
    playing = tf.zeros((BATCH_SIZE,))
    risks = tf.zeros((BATCH_SIZE, 0))
    for i in range(TABLE_SIZE):
        act = tf.gather_nd(strats, tf.concat([
            tf.expand_dims(tf.range(BATCH_SIZE), -1),
            tf.ones((BATCH_SIZE, 1), dtype=tf.int32) * i,
            tf.expand_dims(discret(pots/TABLE_SIZE), -1),
            tf.expand_dims(discret(larg), -1)
        ], axis=-1))
        plays = act >= larg
        playing += tf.cast(plays, tf.float32)
        larg = tf.where(plays, act, larg)
        pots = tf.where(plays, pots + stacks[:, i] * act, pots)
        risks = tf.concat([risks, tf.expand_dims(tf.where(plays, act, 0.0), 1)], axis=1)
    reward = stacks + ((probs/tf.reduce_sum(probs * tf.cast(probs != 0, tf.float32), axis=-1, keepdims=True)) * tf.expand_dims(playing, 1) - 1) * risks
    samp = tf.gather_nd(reward, samp_idx)
    return -tf.reduce_mean(samp)

In [None]:
ITERS = 100
optimizer = tf.keras.optimizers.Adam(.1)
for i in (range(ITERS)):
    with tf.GradientTape() as tape:
        neg_risks = run_basic_sim_batch(basic_sim_strat)
        print(str(i) + "/" + str(ITERS), -neg_risks.numpy(), end="\r")
    grads = tf.convert_to_tensor(tape.gradient(neg_risks, basic_sim_strat))
    optimizer.apply_gradients(zip(grads, [basic_sim_strat]))

In [None]:
@tf.function
def run_diverse_sim_batch(strat):
    samp_idx = tf.concat([tf.expand_dims(tf.range(BATCH_SIZE), -1), tf.expand_dims(tf.cast(tf.random.uniform((BATCH_SIZE,)) * TABLE_SIZE, tf.int32), -1)], axis=-1)
    probs = tf.random.uniform((BATCH_SIZE, TABLE_SIZE))
    strats = tf.minimum(tf.maximum(tf.gather(tf.where(tf.random.uniform(tf.shape(strat)) > .1, strat, tf.random.uniform(tf.shape(strat))), discret(probs), axis=0), 0.0), 1.0)
    strats = tf.where((samp_idx[:, 1, tf.newaxis] == tf.repeat(tf.expand_dims(tf.range(TABLE_SIZE), 0), BATCH_SIZE, axis=0))[:, :, tf.newaxis, tf.newaxis], strats, tf.stop_gradient(strats))
    pots = tf.zeros((BATCH_SIZE,))
    larg = tf.zeros((BATCH_SIZE,))
    stacks = tf.ones((BATCH_SIZE, TABLE_SIZE))
    playing = tf.zeros((BATCH_SIZE,))
    risks = tf.zeros((BATCH_SIZE, 0))
    for i in range(TABLE_SIZE):
        act = tf.gather_nd(strats, tf.concat([
            tf.expand_dims(tf.range(BATCH_SIZE), -1),
            tf.ones((BATCH_SIZE, 1), dtype=tf.int32) * i,
            tf.expand_dims(discret(pots/TABLE_SIZE), -1),
            tf.expand_dims(discret(larg), -1)
        ], axis=-1))
        plays = act >= larg
        playing += tf.cast(plays, tf.float32)
        larg = tf.where(plays, act, larg)
        pots = tf.where(plays, pots + stacks[:, i] * act, pots)
        risks = tf.concat([risks, tf.expand_dims(tf.where(plays, act, 0.0), 1)], axis=1)
    reward = stacks + ((probs/tf.reduce_sum(probs * tf.cast(probs != 0, tf.float32), axis=-1, keepdims=True)) * tf.expand_dims(playing, 1) - 1) * risks
    samp = tf.gather_nd(reward, samp_idx)
    return -tf.reduce_mean(samp)

In [None]:
ITERS = 100000
optimizer = tf.keras.optimizers.Adam(.05)
for i in (range(ITERS)):
    with tf.GradientTape() as tape:
        neg_risks = run_diverse_sim_batch(basic_sim_strat)
        print(str(i) + "/" + str(ITERS), -neg_risks.numpy(), end="\r")
    grads = tf.convert_to_tensor(tape.gradient(neg_risks, basic_sim_strat))
    optimizer.apply_gradients(zip(grads, [basic_sim_strat]))

In [None]:
@tf.function
def run_single_table_round_batch(strat):
    samp_idx = tf.concat([tf.expand_dims(tf.range(BATCH_SIZE), -1), tf.expand_dims(tf.cast(tf.random.uniform((BATCH_SIZE,)) * TABLE_SIZE, tf.int32), -1)], axis=-1)
    probs = tf.random.uniform((BATCH_SIZE, TABLE_SIZE))
    strats = tf.minimum(tf.maximum(tf.gather(strat, discret(probs), axis=0), 0.0), 1.0)
    strats = tf.where((samp_idx[:, 1, tf.newaxis] == tf.repeat(tf.expand_dims(tf.range(TABLE_SIZE), 0), BATCH_SIZE, axis=0))[:, :, tf.newaxis, tf.newaxis], strats, tf.stop_gradient(strats))
    pots = tf.zeros((BATCH_SIZE,))
    larg = tf.zeros((BATCH_SIZE,))
    stacks = tf.ones((BATCH_SIZE, TABLE_SIZE))
    playing = tf.zeros((BATCH_SIZE,))
    players = tf.cast(tf.zeros((BATCH_SIZE, 0)), tf.bool)
    risks = tf.zeros((BATCH_SIZE, 0))
    for i in range(TABLE_SIZE):
        act = tf.gather_nd(strats, tf.concat([
            tf.expand_dims(tf.range(BATCH_SIZE), -1),
            tf.ones((BATCH_SIZE, 1), dtype=tf.int32) * i,
            tf.expand_dims(discret(pots/TABLE_SIZE), -1),
            tf.expand_dims(discret(larg), -1)
        ], axis=-1))
        act = tf.where(tf.math.logical_and(tf.random.uniform(tf.shape(act)) < .75, act >= larg), larg, act)
        plays = act >= larg
        playing += tf.cast(plays, tf.float32)
        larg = tf.where(plays, act, larg)
        pots = tf.where(plays, pots + stacks[:, i] * act, pots)
        risks = tf.concat([risks, tf.expand_dims(tf.where(plays, act, 0.0), 1)], axis=1)
        players = tf.concat([players, tf.expand_dims(plays, 1)], axis=1)
    reward = stacks + ((probs/tf.reduce_sum(probs * tf.cast(probs != 0, tf.float32), axis=-1, keepdims=True)) * tf.expand_dims(playing, 1) - 1) * risks
    samp = tf.gather_nd(reward, samp_idx)
    return -tf.reduce_mean(samp)

In [None]:
ITERS = 500
optimizer = tf.keras.optimizers.Adam(.1)
for i in (range(ITERS)):
    with tf.GradientTape() as tape:
        neg_risks = run_single_table_round_batch(table_round_strat)
        print(str(i) + "/" + str(ITERS), -neg_risks.numpy(), end="\r")
    grads = tf.convert_to_tensor(tape.gradient(neg_risks, table_round_strat))
    optimizer.apply_gradients(zip(grads, [table_round_strat]))

In [None]:
import numpy as np
np.save("./npy", basic_sim_strat.numpy())