In [516]:
from collections import Counter

import numpy as np
np.set_printoptions(suppress=True)
from tqdm import tqdm, trange

In [508]:
import tensorflow as tf
from tensorflow.keras import backend as K
for gpu in tf.config.list_physical_devices('GPU'):
    tf.config.experimental.set_memory_growth(gpu, True)


In [631]:
def make_model(map_size=5, num_nodes=128, num_dense=4):    
    inputs = tf.keras.layers.Input(shape=(map_size, map_size))
    x = tf.keras.layers.Flatten()(inputs)
    
    for _ in range(num_dense):
        x = tf.keras.layers.Dense(num_nodes, activation='relu')(x)
    
    # Two output branches
    output1 = tf.keras.layers.Dense(5, name='Y0')(x)
    output2 = tf.keras.layers.Dense(5, name='Y1')(x)
    
    model = tf.keras.models.Model(inputs=inputs, outputs=[output1, output2])
    
    loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
    
    model.compile(optimizer='adam', loss=loss_fn, metrics=['accuracy'])
    return model

    return model

In [623]:
model = make_model()

In [563]:
def make_data(h, n=320000, verbose=False):
    X = []
    Y = []
    for _ in trange(n, disable=not verbose):
        x = np.random.random((5,5))
        x = (x - 0.5) * 2 + np.arange(5,10)
        x = np.tril(x).T
        x.sort(axis=1)
        x = x[:,::-1]
        y = (x*h).sum(axis=1).argmax()
        X.append(x)
        Y.append(y)
    X = np.array(X)
    Y = np.array(Y)
    return X, Y

In [749]:
def make_data(h, n=320000, verbose=False):
    X, Y0, Y1 = [], [], []
    for _ in trange(n, disable = not verbose):
        x = np.random.random((5,5))
        x = (x - 0.5) * 2 + np.arange(6,11)
        x = np.tril(x).T
        x.sort(axis=1)
        x = x[:,::-1]
        y = (x*h)

        x_shuf, y0_shuf, y1_shuf = [], [], []
        for i in range(5):
            p = np.random.permutation(5)
            x_shuf.append(x[i][p])
            y0_shuf.append(y[i][p])
            y1_shuf.append(np.random.choice([0,1,2,3,4], p=h[p]))
        x_shuf = np.array(x_shuf)
        y0_shuf = np.array(y0_shuf)
        y1_shuf = np.array(y1_shuf)

        y0 = y0_shuf.sum(axis=1).argmax()
        y1 = y1_shuf[y0]

        X.append(x_shuf)
        Y0.append(y0)
        Y1.append(y1)

    X = np.array(X)
    Y0 = np.array(Y0)
    Y1 = np.array(Y1)
    return X, Y0, Y1

In [758]:
%%time
models = {}
for h in range(5):
    # n = int(32e3)
    n = 3200000 * 4
    X, Y0, Y1 = make_data(H[h], n=n, verbose=True)
    model = make_model()
    model.fit(X,[Y0, Y1])
    models[h] = model

100%|██████████| 12800000/12800000 [31:51<00:00, 6694.80it/s]




100%|██████████| 12800000/12800000 [32:02<00:00, 6658.17it/s]




100%|██████████| 12800000/12800000 [31:13<00:00, 6830.67it/s]




100%|██████████| 12800000/12800000 [32:43<00:00, 6520.11it/s]




100%|██████████| 12800000/12800000 [32:54<00:00, 6481.24it/s]


CPU times: user 4h 7min 40s, sys: 22min 29s, total: 4h 30min 9s
Wall time: 3h 46min 33s


In [759]:
_X, _, _ = make_data(H[0], n=32000, verbose=True)

100%|██████████| 32000/32000 [00:05<00:00, 6346.88it/s]


In [760]:
%%time
for h in range(5):
    _Y0, _Y1 = models[h](_X)
    _Y0, _Y1 = _Y0.numpy(), _Y1.numpy()
    first = np.array([ix[iy] for ix, iy in zip(_X, _Y0.argmax(axis=1))])
    second = tf.nn.softmax(_Y1).numpy()
    reward = (first * second).sum(axis=1).mean()
    print(f'{h} {reward:.4f}')

0 6.0124
1 6.1797
2 6.9686
3 8.6825
4 9.5612
CPU times: user 187 ms, sys: 12.8 ms, total: 200 ms
Wall time: 197 ms


In [761]:
x

array([[5.21557267, 4.95643572, 4.27602829, 4.23190978, 4.12853951],
       [6.97632987, 6.60680879, 5.56566271, 5.14030193, 0.        ],
       [6.62192393, 6.29201343, 6.2670857 , 0.        , 0.        ],
       [7.40710816, 7.18607344, 0.        , 0.        , 0.        ],
       [9.94711606, 0.        , 0.        , 0.        , 0.        ]])

In [762]:
X[0]

array([[ 5.97608586,  5.39042992,  6.41283419,  5.25902765,  5.08961617],
       [ 6.74948422,  6.4867986 ,  7.07274051,  0.        ,  6.16390459],
       [ 0.        ,  7.02389391,  7.49686339,  8.24075488,  0.        ],
       [ 0.        ,  8.52408015,  0.        ,  0.        ,  8.63941092],
       [ 0.        ,  0.        ,  0.        , 10.26600027,  0.        ]])

In [799]:
def softmax(x, b):
    return np.exp(b*x) / np.exp(b*x).sum()

In [813]:
softmax(np.array([10,9,0]), 0.1)

array([0.44000202, 0.39813029, 0.1618677 ])