In [1]:
%load_ext line_profiler

In [2]:
from collections import Counter

import numpy as np
rng = np.random.default_rng()
np.set_printoptions(suppress=True, linewidth=150)
from tqdm import tqdm, trange
from tqdm.contrib.concurrent import process_map

In [25]:
def make_model(map_size=5, num_nodes=128, num_dense=4):    
    inputs = tf.keras.layers.Input(shape=(map_size*map_size+map_size+1))
    x = tf.keras.layers.Flatten()(inputs)
    
    for _ in range(num_dense):
        x = tf.keras.layers.Dense(num_nodes, activation='relu')(x)
    
    # output1 = tf.keras.layers.Dense(5, name='Y0')(x)
    # output2 = tf.keras.layers.Dense(5, name='Y1')(x)
    # model = tf.keras.models.Model(inputs=inputs, outputs=[output1, output2])
    
    output1 = tf.keras.layers.Dense(5, name='Y0')(x)
    output1 = tf.keras.layers.Softmax()(output1)
    model = tf.keras.models.Model(inputs=inputs, outputs=output1)

    
    loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False)
    model.compile(optimizer='adam', loss=loss_fn, metrics=['accuracy'])
    return model

In [3]:
def softmax(x, b, axis=-1):
    x = x - np.max(x)
    return np.exp(b*x) / np.exp(b*x).sum(axis=axis, keepdims=True)

In [4]:
def make_x():
    x = arrs
    
    rand = np.random.random(arrs.shape)
    rand = (rand - 0.5) * 2 / 10
    x = x + rand
    
    x = rng.permuted(x, axis=-1)
    return x

def eval_model(model, n = 1):
    rewards = []
    for _ in range(n):
        x = make_x()
        x_tree = np.concatenate([np.eye(6).astype(int), np.tile(x.flatten(), (6,1))], axis=1)
        y_pred = model(x_tree)
        branch_rewards = (x_tree[0,6:].reshape(5,5) * y_pred[1:].numpy()).sum(axis=1)
        reward = (y_pred[0] * branch_rewards).numpy().sum()
        rewards.append(reward)
    return np.mean(rewards)

def make_data(n, b=1, arr=[10,0,0,0,0], disable=False):
    X, Y = [], []
    
    
    for _ in trange(n, disable=disable):
        x = make_x()
        r1 = (softmax(x, b) * x)

        
        x0 = [1,0,0,0,0,0] + list(x.flatten())
        y0 = rng.choice(np.arange(5), p=softmax((softmax(x, b) * x).sum(axis=1), b))
        
        pos = [0,0,0,0,0,0]
        pos[y0+1] = 1
        x1 = pos + list(x.flatten())
        y1 = rng.choice(np.arange(5), p=softmax(x, b)[y0])


        X.append(x0)
        Y.append(y0)
        X.append(x1)
        Y.append(y1)
    
    X = np.array(X)
    Y = np.array(Y)
    
    return X,Y

In [28]:
import tensorflow as tf
from tensorflow.keras import backend as K
for gpu in tf.config.list_physical_devices('GPU'):
    tf.config.experimental.set_memory_growth(gpu, True)

In [32]:
model = make_model()

student = 0.4
Xtrain, Ytrain = make_data(64000, b=student)
model.fit(Xtrain, Ytrain)
reward = eval_model(model, 1000)
print(f"Starting: {student} {reward:.3f}")
# model.save('starting4.keras')

100%|██████████| 64000/64000 [00:09<00:00, 7021.84it/s]


Starting: 0.4 11.183


In [None]:
model = make_model()

student = 0.04
Xtrain, Ytrain = make_data(320000, b=student)
model.fit(Xtrain, Ytrain)
reward = eval_model(model, 1000)
print(f"Starting: {student} {reward:.3f}")
model.save('starting04.keras')

100%|██████████| 320000/320000 [00:46<00:00, 6915.80it/s]




In [None]:
model2 = tf.keras.models.load_model('starting04.keras')

student = 0.4

Xtrain, Ytrain = make_data(64000, b=student)
model2.fit(Xtrain, Ytrain)
reward = eval_model(model2, 1000)
print(f"Starting: {student} {reward:.3f}")
# model.save('starting4.keras')

In [None]:
# student = 0.4
# Xtrain, Ytrain = make_data(64000, b=student)

model.fit(Xtrain, Ytrain)
reward = eval_model(model, 1000)
print(f"Starting: {student} {reward:.3f}")
# model.save('starting4.keras')

In [34]:
model = tf.keras.models.load_model('starting04.keras')
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False)
model.compile(optimizer='adam', loss=loss_fn, metrics=['accuracy'])


student = 0.4

Xtrain, Ytrain = make_data(64000, b=student)
model.fit(Xtrain, Ytrain)
reward = eval_model(model, 1000)
print(f"Starting: {student} {reward:.3f}")
# model.save('starting4.keras')

100%|██████████| 64000/64000 [00:08<00:00, 7261.61it/s]


Starting: 0.4 6.135


In [49]:
ns = [0,5,5,6,7,8,9,10,11,12,13,14,15]

students = np.linspace(0, 0.2, 11)


arrs = np.triu(np.tile(np.arange(10, 15),(5,1)).T)
arrs

array([[10, 10, 10, 10, 10],
       [ 0, 11, 11, 11, 11],
       [ 0,  0, 12, 12, 12],
       [ 0,  0,  0, 13, 13],
       [ 0,  0,  0,  0, 14]])

In [55]:
arrs.mean()

6.8

In [50]:
def eval_training(b):
    
    import tensorflow as tf
    from tensorflow.keras import backend as K
    for gpu in tf.config.list_physical_devices('GPU'):
        tf.config.experimental.set_memory_growth(gpu, True)

    
    model2 = tf.keras.models.load_model('starting04.keras')
    # print(f"{b:.2f}", end=':    ')
    nsum = 0
    
    rewards = []
    for i in ns:
        if i != 0:
            n = 2**i
            nsum += n
            X, Y = make_data(n, b=b, disable=True)
            model2.fit(X,Y, verbose=False)
            
        reward = eval_model(model2, 1000)
        rewards.append(reward)
        print(f"{nsum}    ", end='\r')
    return rewards

In [51]:
%%time

all_res = []
for b in students:
    print(f"b = {b:.2f} ")
    res = process_map(eval_training, [b]*20, max_workers=30, disable=True)
    res = np.array(res).mean(axis=0)
    all_res.append(res)
    print()
all_res = np.array(all_res)

b = 0.00         
65536     0    32    32    64    64    256    256    256    256    512    512    512    512    2048    2048    4096    4096    4096    4096    8192    8192    8192    8192    16384    16384    32768    32768    32768    65536    
b = 0.02         
65536     0    64    64    64    64    128    128    128    128    256    256    512    512    512    512    512    512    2048    2048    8192    8192    16384    16384    16384    32768    32768    32768    32768    32768    32768    32768    65536    65536    65536    65536    
b = 0.04         
65536       32    32    32    64    64    128    128    128    128    128    128    2048    2048    8192    8192    16384    16384    16384    16384    16384    16384    32768    32768    
b = 0.06         
65536     0    32    32    64    64    64    64    256    256    2048    2048    16384    16384    
b = 0.08         
65536       32    32    32    1024    1024    2048    2048    32768    65536    
b = 0.10         
65536     

In [52]:
all_res.round(2)

array([[6.9 , 6.9 , 6.91, 6.91, 6.91, 6.9 , 6.89, 6.88, 6.87, 6.82, 6.81, 6.81, 6.78],
       [6.9 , 6.9 , 6.91, 6.91, 6.91, 6.91, 6.9 , 6.9 , 6.9 , 6.86, 6.86, 6.88, 6.85],
       [6.9 , 6.9 , 6.91, 6.91, 6.91, 6.91, 6.91, 6.91, 6.92, 6.9 , 6.9 , 6.93, 6.9 ],
       [6.9 , 6.9 , 6.91, 6.91, 6.91, 6.91, 6.91, 6.92, 6.94, 6.93, 6.95, 6.97, 6.94],
       [6.9 , 6.9 , 6.91, 6.91, 6.91, 6.91, 6.91, 6.93, 6.95, 6.93, 6.98, 7.  , 7.02],
       [6.9 , 6.9 , 6.91, 6.91, 6.91, 6.91, 6.91, 6.92, 6.95, 6.93, 6.99, 7.04, 7.06],
       [6.9 , 6.9 , 6.91, 6.91, 6.91, 6.91, 6.91, 6.92, 6.93, 6.9 , 6.99, 7.04, 7.15],
       [6.9 , 6.9 , 6.91, 6.91, 6.91, 6.91, 6.9 , 6.9 , 6.91, 6.9 , 7.  , 7.06, 7.24],
       [6.9 , 6.9 , 6.91, 6.91, 6.9 , 6.9 , 6.89, 6.88, 6.87, 6.88, 6.96, 7.09, 7.75],
       [6.9 , 6.9 , 6.91, 6.91, 6.9 , 6.9 , 6.89, 6.85, 6.83, 6.83, 6.94, 7.11, 8.31],
       [6.9 , 6.9 , 6.91, 6.91, 6.9 , 6.89, 6.87, 6.81, 6.8 , 6.79, 6.93, 7.18, 8.58]], dtype=float32)

In [54]:
print('Original Student: b=0.04')

print()
print('         Curriculum Size')

print('Teacher    ', end='')
nsum = 0
for i in ns:
    if i: nsum += 2**i
    print(f"{nsum:>6}", end='  ')
print()

for row, b in zip(all_res, students):
    print(f"   {b:.2f}", end='     ')
    for rew in row:
        print(f"{rew:5.3f}", end='   ')
    print()

Original Student: b=0.04

         Curriculum Size
Teacher         0      32      64     128     256     512    1024    2048    4096    8192   16384   32768   65536  
   0.00     6.904   6.905   6.907   6.908   6.906   6.902   6.891   6.879   6.867   6.817   6.806   6.810   6.780   
   0.02     6.904   6.905   6.907   6.908   6.907   6.908   6.901   6.901   6.900   6.862   6.856   6.881   6.850   
   0.04     6.904   6.905   6.907   6.908   6.908   6.909   6.907   6.915   6.916   6.898   6.903   6.934   6.900   
   0.06     6.904   6.905   6.907   6.909   6.909   6.911   6.911   6.925   6.936   6.927   6.948   6.973   6.944   
   0.08     6.904   6.905   6.907   6.909   6.908   6.910   6.915   6.928   6.949   6.933   6.978   6.999   7.016   
   0.10     6.904   6.905   6.907   6.909   6.908   6.911   6.914   6.925   6.948   6.927   6.990   7.037   7.057   
   0.12     6.904   6.905   6.907   6.908   6.908   6.909   6.911   6.918   6.931   6.901   6.995   7.035   7.149   
   0.14     6.

In [17]:
all_res.shape

(21, 15)

In [17]:
res.mean(axis=0)

array([ 8.536117,  8.566498,  8.590255,  8.668965,  8.831114,  9.14156 ,  9.754227,  9.883101,  9.898097, 10.007944, 10.061784], dtype=float32)

In [None]:
%%time
%lprun -f eval_model eval_model(model2, 1000)

In [None]:
branch_rewards = (xtest[0,6:].reshape(5,5) * ypred[1:].numpy()).sum(axis=1)

reward = (ypred[0] * branch_rewards).numpy().sum()


reward

In [None]:
arrs.flatten()

In [None]:
print('            ', end='')
for b in np.linspace(0,0.4,21):
    print(f"{b:5.2f}", end=' ')
print()
for arr in arrs:
    n = 100000
    x = np.random.random((n,5))
    x = (x - 0.5) * 2 + np.array(arr)
    x = rng.permuted(x, axis=-1)
    
    print(arr, end=' ')
    for b in np.linspace(0,0.4,21):
        r = (softmax(x,b) * x).sum(axis=1).mean()
        # print(f"{b:.1f} {r:.3f}", end=' ')
        print(f"{r:5.2f}", end=' ')
    print()

In [None]:
arrs

In [None]:
(softmax(arrs, 0.1) * arrs).sum(axis=1)

In [None]:
def make_model(map_size=5, num_nodes=32, num_dense=3):    
    inputs = tf.keras.layers.Input(shape=(map_size))
    x = tf.keras.layers.Flatten()(inputs)
    for _ in range(num_dense):
        x = tf.keras.layers.Dense(num_nodes, activation='relu')(x)
    output1 = tf.keras.layers.Dense(5, name='Y0')(x)
    model = tf.keras.models.Model(inputs=inputs, outputs=[output1])
    loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
    model.compile(optimizer='adam', loss=loss_fn, metrics=['accuracy'])
    return model

In [None]:
arrs = np.triu(np.tile(np.arange(10, 15),(5,1)).T)
arrs

In [None]:
%%time
b = 1

all_res = []
for arr in arrs:
    xtest, ytest = make_data(32000, b, arr)
    # print(arr, end=' ')
    
    res = []
    
    for i in range(10):
        # print(i, end='\r')
        res.append([])
        model = make_model()
        nsum = 0
        for n in [None, 5, 5, 6, 7, 8, 9, 10, 11, 12, 13]:
            if n:
                n = 2**n
                # n *= 5 - (arr == 0).sum()
                nsum += n
                x, y = make_data(n, b, arr)
                model.fit(x, y, verbose=False)
            reward = (softmax(model(xtest),1) * xtest).sum(axis=1).mean()
            res[-1].append(reward)
    res = np.array(res)
    res = res.mean(axis=0)
    all_res.append(res)
np.array(all_res).round(1)
 

In [None]:

## give 888 3x as much data

np.array(all_res).round(1)

In [None]:
(5 - (arr == 0).sum())

In [None]:
model = make_model()

In [None]:
(softmax(model(xtest),1) * xtest).sum(axis=1).mean()

In [None]:
(softmax(xtest,1) * xtest).sum(axis=1).mean()

In [None]:
softmax(model(xtest),100)

In [None]:
softmax(x,1)

In [None]:
y

In [None]:
print(nsum, reward)

In [None]:
def make_data(b=1, n=320000, verbose=False):
    X, Y0, Y1 = [], [], []
    rng = np.random.default_rng()
    for _ in trange(n, disable=not verbose):
        x = np.random.random((5,5))
        x = (x - 0.5) * 2 + np.arange(5,10)
        x = np.tril(x).T
        
        rng.shuffle(x, axis=0)
        x = rng.permuted(x, axis=1)
        
        reward0 = softmax(x, b) * x
        reward1 = softmax(reward0.sum(axis=1), b)
        # y0 = rng.choice(range(5), p=reward1)
        y1 = rng.choice(range(5), p=reward0[y0])

        X.append(x)
        # Y0.append(y0)
        Y1.append(y1)
    X = np.array(X)
    # Y0 = np.array(Y0)
    Y1 = np.array(Y1)
    # return X, Y0, Y1
    return X, Y1

In [None]:
X, Y0, Y1 = make_data(10, 1)

In [None]:
x = np.random.random((5,5))
x = (x - 0.5) * 2 + np.arange(5,10)
x = np.tril(x).T
x

In [None]:
rng = np.random.default_rng()

In [None]:
rng.shuffle(x, axis=0)
x

In [None]:
x = rng.permuted(x, axis=1)

In [None]:
b = 1

In [None]:
r0 = np.exp(b*x) / np.exp(b*x).sum(axis=1)

In [None]:
np.exp(b*x)

In [None]:
np.exp(b*x).sum(axis=1)

In [None]:
np.exp(b*x) / np.exp(b*x).sum(axis=1)

In [None]:
(np.exp(b*x) / np.exp(b*x).sum(axis=1, keepdims=True) * x).sum(axis=1)

In [None]:
%%time
models = {}
for h in range(5):
    # n = int(32e3)
    n = 3200000 * 4
    X, Y0, Y1 = make_data(H[h], n=n, verbose=True)
    model = make_model()
    model.fit(X,[Y0, Y1])
    models[h] = model

In [None]:
_X, _, _ = make_data(H[0], n=32000, verbose=True)

In [None]:
%%time
for h in range(5):
    _Y0, _Y1 = models[h](_X)
    _Y0, _Y1 = _Y0.numpy(), _Y1.numpy()
    first = np.array([ix[iy] for ix, iy in zip(_X, _Y0.argmax(axis=1))])
    second = tf.nn.softmax(_Y1).numpy()
    reward = (first * second).sum(axis=1).mean()
    print(f'{h} {reward:.4f}')

In [None]:
x

In [None]:
X[0]

In [None]:
softmax(np.array([10,9,0]), 10)