In [1]:
%load_ext line_profiler

In [2]:
from collections import Counter

import numpy as np
rng = np.random.default_rng()
np.set_printoptions(suppress=True, linewidth=150)
from tqdm import tqdm, trange
from tqdm.contrib.concurrent import process_map

In [3]:
def softmax(x, b, axis=-1):
    x = x - np.max(x)
    return np.exp(b*x) / np.exp(b*x).sum(axis=axis, keepdims=True)

def make_model(map_size=5, num_nodes=128, num_dense=4):
    
    import tensorflow as tf
    for gpu in tf.config.list_physical_devices('GPU'):
        tf.config.experimental.set_memory_growth(gpu, True)
    
    inputs = tf.keras.layers.Input(shape=(map_size*map_size+map_size+1))
    x = tf.keras.layers.Flatten()(inputs)
    
    for _ in range(num_dense):
        x = tf.keras.layers.Dense(num_nodes, activation='relu')(x)
    
    # output1 = tf.keras.layers.Dense(5, name='Y0')(x)
    # output2 = tf.keras.layers.Dense(5, name='Y1')(x)
    # model = tf.keras.models.Model(inputs=inputs, outputs=[output1, output2])
    
    output1 = tf.keras.layers.Dense(5, name='Y0')(x)
    output1 = tf.keras.layers.Softmax()(output1)
    model = tf.keras.models.Model(inputs=inputs, outputs=output1)

    
    loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False)
    model.compile(optimizer='adam', loss=loss_fn, metrics=['accuracy'])
    return model

def softmax(x, b, axis=-1):
    x = x - np.max(x)
    return np.exp(b*x) / np.exp(b*x).sum(axis=axis, keepdims=True)

def make_x():
    x = arrs
    
    rand = np.random.random(arrs.shape)
    rand = (rand - 0.5) * 2 / 10
    x = x + rand
    
    x = rng.permuted(x, axis=-1)
    return x

def eval_model(model, n = 1):
    rewards = []
    for _ in range(n):
        x = make_x()
        x_tree = np.concatenate([np.eye(6).astype(int), np.tile(x.flatten(), (6,1))], axis=1)
        y_pred = model(x_tree)
        branch_rewards = (x_tree[0,6:].reshape(5,5) * y_pred[1:].numpy()).sum(axis=1)
        reward = (y_pred[0] * branch_rewards).numpy().sum()
        rewards.append(reward)
    return np.mean(rewards)

def make_data(n, b=1, arr=[10,0,0,0,0], disable=False):
    X, Y = [], []
    
    
    for _ in trange(n, disable=disable):
        x = make_x()
        r1 = (softmax(x, b) * x)

        
        x0 = [1,0,0,0,0,0] + list(x.flatten())
        y0 = rng.choice(np.arange(5), p=softmax((softmax(x, b) * x).sum(axis=1), b))
        
        pos = [0,0,0,0,0,0]
        pos[y0+1] = 1
        x1 = pos + list(x.flatten())
        y1 = rng.choice(np.arange(5), p=softmax(x, b)[y0])


        X.append(x0)
        Y.append(y0)
        X.append(x1)
        Y.append(y1)
    
    X = np.array(X)
    Y = np.array(Y)
    
    return X,Y

In [24]:
def train_model(student):
    verbose = False
    if student == 0:
        verbose = True
    model = make_model()
    Xtrain, Ytrain = make_data(320000, b=student, disable=not verbose)
    model.fit(Xtrain, Ytrain, verbose=verbose)
    reward = eval_model(model, 1000)
    print(f"Starting: {student:.2f} {reward:.3f}")
    model.save(f'models/starting_{student:.2f}.keras')

In [22]:
ns = [0,5,5,6,7,8,9,10,11,12,13,14,15]

students = np.linspace(0, 0.4, 21)
display(students)

arrs = np.triu(np.tile(np.arange(10, 15),(5,1)).T)
arrs

array([0.  , 0.02, 0.04, 0.06, 0.08, 0.1 , 0.12, 0.14, 0.16, 0.18, 0.2 , 0.22, 0.24, 0.26, 0.28, 0.3 , 0.32, 0.34, 0.36, 0.38, 0.4 ])

array([[10, 10, 10, 10, 10],
       [ 0, 11, 11, 11, 11],
       [ 0,  0, 12, 12, 12],
       [ 0,  0,  0, 13, 13],
       [ 0,  0,  0,  0, 14]])

In [None]:
%%time
_ = process_map(train_model, students, disable=True)

100%|██████████| 320000/320000 [01:02<00:00, 5120.73it/s]




In [35]:
teacher = 0.4

In [36]:
model2 = make_model()
nsum = 0
for i in ns:
    if i != 0:
        n = 2**i
        nsum += n
        X, Y = make_data(n, b=teacher, disable=True)
        model2.fit(X,Y, verbose=False)

    reward = eval_model(model2, 1000)
    print(nsum, reward)


0 9.612903
32 6.8948765
64 5.6704726
128 6.7295814
256 6.652731
512 6.4314847
1024 6.992745
2048 6.9823027
4096 6.7106943
8192 6.876046
16384 8.103259
32768 10.245119
65536 11.42322


In [39]:
import tensorflow as tf
for gpu in tf.config.list_physical_devices('GPU'):
    tf.config.experimental.set_memory_growth(gpu, True)

model2 = tf.keras.models.load_model(f"models/starting_{0.00:.2f}.keras")
nsum = 0
for i in [0,5,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19]:
    if i != 0:
        n = 2**i
        nsum += n
        X, Y = make_data(n, b=teacher, disable=True)
        model2.fit(X,Y, verbose=False)

    reward = eval_model(model2, 1000)
    print(nsum, reward)

0 6.797259
32 6.7968745
64 6.796431
128 6.7929463
256 6.7845254
512 6.7581577
1024 6.707426
2048 6.639017
4096 6.4790597
8192 6.310694
16384 6.202692
32768 6.141119
65536 6.1307387
131072 6.131746
262144 6.161889
524288 6.1493545
1048576 6.146372


In [44]:
%%time
import tensorflow as tf
for gpu in tf.config.list_physical_devices('GPU'):
    tf.config.experimental.set_memory_growth(gpu, True)

model2 = tf.keras.models.load_model(f"models/starting_{0.02:.2f}.keras")
nsum = 0
for i in [0,5,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20]:
    if i != 0:
        n = 2**i
        nsum += n
        X, Y = make_data(n, b=teacher, disable=True)
        model2.fit(X,Y, verbose=False)

    reward = eval_model(model2, 1000)
    print(nsum, reward)

0 6.840676
32 6.840389
64 6.83941
128 6.833149
256 6.82416
512 6.7966104
1024 6.7360673
2048 6.615489
4096 6.4797707
8192 6.3089676
16384 6.1739626
32768 6.1339498
65536 6.106214
131072 6.126292
262144 6.1509504
524288 10.533854
1048576 12.098105
2097152 12.3039055
CPU times: user 9min 23s, sys: 53.3 s, total: 10min 16s
Wall time: 8min 31s


In [41]:
import tensorflow as tf
for gpu in tf.config.list_physical_devices('GPU'):
    tf.config.experimental.set_memory_growth(gpu, True)

model2 = tf.keras.models.load_model(f"models/starting_{0.04:.2f}.keras")
nsum = 0
for i in [0,5,5,6,7,8,9,10,11,12,13,14,15,16,17]:
    if i != 0:
        n = 2**i
        nsum += n
        X, Y = make_data(n, b=teacher, disable=True)
        model2.fit(X,Y, verbose=False)

    reward = eval_model(model2, 1000)
    print(nsum, reward)

0 6.926753
32 6.922881
64 6.924343
128 6.9164553
256 6.9031386
512 6.8706007
1024 6.8028636
2048 6.69153
4096 6.529828
8192 6.319208
16384 6.1834865
32768 6.1125
65536 6.844411
131072 11.14416
262144 11.637673


In [42]:
import tensorflow as tf
for gpu in tf.config.list_physical_devices('GPU'):
    tf.config.experimental.set_memory_growth(gpu, True)

model2 = tf.keras.models.load_model(f"models/starting_{0.08:.2f}.keras")
nsum = 0
for i in [0,5,5,6,7,8,9,10,11,12,13,14,15,16,17]:
    if i != 0:
        n = 2**i
        nsum += n
        X, Y = make_data(n, b=teacher, disable=True)
        model2.fit(X,Y, verbose=False)

    reward = eval_model(model2, 1000)
    print(nsum, reward)

0 7.4762354
32 7.5172596
64 7.5476933
128 7.622848
256 7.8719325
512 8.032523
1024 8.078094
2048 7.982797
4096 9.719364
8192 10.35369
16384 11.0965805
32768 11.194634
65536 11.417443
131072 11.785841
262144 12.172889


In [43]:
import tensorflow as tf
for gpu in tf.config.list_physical_devices('GPU'):
    tf.config.experimental.set_memory_growth(gpu, True)

model2 = tf.keras.models.load_model(f"models/starting_{0.16:.2f}.keras")
nsum = 0
for i in [0,5,5,6,7,8,9,10,11,12,13,14,15,16,17]:
    if i != 0:
        n = 2**i
        nsum += n
        X, Y = make_data(n, b=teacher, disable=True)
        model2.fit(X,Y, verbose=False)

    reward = eval_model(model2, 1000)
    print(nsum, reward)

0 10.084524
32 10.099598
64 10.147285
128 10.250272
256 10.559982
512 10.872451
1024 11.144627
2048 11.271785
4096 12.127729
8192 12.110457
16384 12.105398
32768 12.163546
65536 12.412416
131072 12.318156
262144 12.44985


In [30]:
def eval_training(student, teacher):
    
    import tensorflow as tf
    from tensorflow.keras import backend as K
    for gpu in tf.config.list_physical_devices('GPU'):
        tf.config.experimental.set_memory_growth(gpu, True)

    
    model2 = tf.keras.models.load_model(f"models/starting_{student:.2f}.keras")
    # print(f"{b:.2f}", end=':    ')
    nsum = 0
    
    rewards = []
    for i in ns:
        if i != 0:
            n = 2**i
            nsum += n
            X, Y = make_data(n, b=teacher, disable=True)
            model2.fit(X,Y, verbose=False)
            
        reward = eval_model(model2, 1000)
        rewards.append(reward)
        # print(f"{nsum}    ", end='\r')
    return rewards

In [32]:
%%time

for student in [0.0, 0.04, 0.08, 0.12, 0.16, 0.2]:
    n = 5
    teachers = students[::2]

    all_res = []
    for teacher in teachers:
        print(f"teacher = {teacher:.2f} ", end='\r')
        res = process_map(eval_training, [student]*n, [teacher]*n, max_workers=30, disable=True)
        res = np.array(res).mean(axis=0)
        all_res.append(res)
        # print()
    all_res = np.array(all_res)

    print(f'Original Student: {student:.2f}')
    print()
    print('         Curriculum Size')
    print('Teacher    ', end='')
    nsum = 0
    for i in ns:
        if i: nsum += 2**i
        print(f"{nsum:>6}", end='  ')
    print()

    for row, teacher in zip(all_res, teachers):
        print(f"   {teacher:.2f}", end='     ')
        for rew in row:
            print(f"{rew:5.3f}", end='   ')
        print()
    print()
    print()

Original Student: 0.00

         Curriculum Size
Teacher         0      32      64     128     256     512    1024    2048    4096    8192   16384   32768   65536  
   0.00     6.797   6.796   6.797   6.795   6.799   6.803   6.817   6.811   6.797   6.787   6.809   6.804   6.815   
   0.04     6.797   6.796   6.797   6.796   6.802   6.810   6.834   6.842   6.849   6.857   6.902   6.899   6.917   
   0.08     6.797   6.796   6.797   6.796   6.801   6.812   6.838   6.850   6.867   6.877   6.939   6.949   6.948   
   0.12     6.797   6.796   6.797   6.796   6.801   6.809   6.830   6.837   6.848   6.852   6.896   6.899   6.918   
   0.16     6.797   6.796   6.797   6.794   6.798   6.803   6.814   6.810   6.805   6.795   6.816   6.815   6.811   
   0.20     6.797   6.796   6.797   6.794   6.795   6.794   6.795   6.776   6.743   6.706   6.704   6.693   6.706   
   0.24     6.797   6.796   6.796   6.792   6.790   6.784   6.775   6.740   6.677   6.596   6.576   6.560   6.580   
   0.28     6.79

Process ForkProcess-1012:
Process ForkProcess-1025:
Process ForkProcess-1016:
Process ForkProcess-1028:
Process ForkProcess-1035:
Process ForkProcess-1030:
Process ForkProcess-1019:
Process ForkProcess-1029:
Process ForkProcess-1027:
Process ForkProcess-1034:
Process ForkProcess-1026:
Process ForkProcess-1033:
Process ForkProcess-1031:
Process ForkProcess-1014:
Process ForkProcess-1009:
Process ForkProcess-1022:
Process ForkProcess-1023:
Process ForkProcess-1020:
Process ForkProcess-1024:
Process ForkProcess-1032:
Process ForkProcess-1017:
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Process ForkProcess-1015:
Traceback (most recent call last):
Process ForkProcess-1018:
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Process ForkProce

KeyboardInterrupt: 