In [None]:
#Only run in colab
from google.colab import drive
drive.mount('/content/drive')
%cd /content/drive/MyDrive/Github/Taskmaster
#!pip install tensorflow --upgrade
import tensorflow as tf
print(tf.__version__)
print(tf.config.list_physical_devices('GPU'))

In [23]:
import rubiks as rubiks
import numpy as np
from student import student
from classroom import classroom
from teacher import teacher
from general_task_network import student_network
from utils import dotdict
from keras import regularizers

task = rubiks.rubiks_task
setup = rubiks.rubiks_setup
rng = np.random.default_rng(seed=0)

core_params = dotdict({
            "residual_weights_reg" : None,
            "residual_bias_reg" : None,
            "relu_leak" : 0.05,
            "residual_units" : 300,
            "learning_rate" : 0.001,
            "residual_layers" : 3
})

value_network_params = dotdict({
            "residual_weights_reg" : None,
            "residual_bias_reg" : None,
            "relu_leak" : 0.05,
            "residual_units" : 300,
            "learning_rate" : 0.001,
            "post_core_residual_layers" : 2,
            "reward_fork_layers": 1,
            "value_fork_layers": 1
})

state_network_params = dotdict({
            "residual_weights_reg" : None,
            "residual_bias_reg" : None,
            "relu_leak" : 0.05,
            "residual_units" : 300,
            "learning_rate" : 0.001,
            "post_core_residual_layers" : 2
})

params = dotdict({
    "core_params":core_params,
    "value_network_params": value_network_params,
    "state_network_params": state_network_params,
    "state_size":task.state_size,
    "action_codes": task.n_actions
})

my_student_network = student_network.create(params)
#my_student_network = student_network.load("models/combined/trained/trained")
student_template = student(task, 50,my_student_network,0.95)
t = teacher(setup, lambda n : 1+rng.poisson(lam=3,size=n))
c = classroom(task, setup, t, student_template, n_students=1, max_steps=10, buffer_size = lambda n : 2048)


In [28]:
c.max_steps=5
c.buffer_size = lambda n : 8192
t.step_dist = lambda n : 3*np.ones(n,dtype=int)

In [None]:

for i in range(10):
    print(f"Round {i}.")
    c.run_training_batch(n_problems=100,epochs_per_episode=3)
    #if i % 10 == 9:
    #    my_student_network.save("models/trained.h5")

In [29]:
n=8192
rng = np.random.default_rng(seed=0)
states = np.arange(task.state_size)[None,:].repeat(n,axis=0)

for k in range(20):
    print(f"Round {k}.")
    c.run_training_batch(n_problems=100,epochs_per_episode=3)

    for i in range(50):
        _, states = rubiks.task_action(states,rng.choice(a=task.n_actions,size=n))

    actions = rng.choice(a=task.n_actions,size=n)
    _, next_states = rubiks.task_action(states,actions)

    actions = (np.arange(task.n_actions) == actions[...,None]).astype(float)
    colors = rubiks.make_neural_input(states)
    next_colors = rubiks.make_neural_input(next_states)
    my_student_network.fit_state(colors,actions,next_colors,epochs = 5)

    #if k % 10 == 9:
    #  my_student_network.save("models/combined/trained")

Round 0.
Before step 1, 100 out of 100 remain open.
Before step 2, 87 out of 100 remain open.
Before step 3, 79 out of 100 remain open.
Before step 4, 47 out of 100 remain open.
Before step 5, 40 out of 100 remain open.
After step 5, 38 out of 100 remain open.
Epoch 1/3
Epoch 2/3
Epoch 3/3
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Round 1.
Before step 1, 100 out of 100 remain open.
Before step 2, 84 out of 100 remain open.
Before step 3, 72 out of 100 remain open.
Before step 4, 40 out of 100 remain open.
Before step 5, 37 out of 100 remain open.
After step 5, 36 out of 100 remain open.
Epoch 1/3
Epoch 2/3
Epoch 3/3
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Round 2.
Before step 1, 100 out of 100 remain open.
Before step 2, 94 out of 100 remain open.
Before step 3, 89 out of 100 remain open.
Before step 4, 51 out of 100 remain open.
Before step 5, 44 out of 100 remain open.
After step 5, 42 out of 100 remain open.
Epoch 1/3
Epoch 2/3
Epoch 3/3
Epoch 1/5
Epoch 2/5
Epoch 3

KeyboardInterrupt: 

In [21]:
states =  np.arange(54,dtype=int)[None,:]
actions = np.array([9],dtype=int)
prediction = my_student_network.predict_state(task.make_input(states),(np.arange(task.n_actions) == actions[...,None]).astype(float))
prediction.reshape(1,54,6).argmax(axis=2), rubiks.start_coloring[rubiks.task_action(states,actions)[1][0]]



(array([[0, 2, 4, 0, 2, 0, 2, 5, 4, 1, 4, 4, 1, 0, 3, 4, 0, 3, 0, 3, 5, 2,
         4, 2, 2, 5, 1, 0, 3, 4, 3, 3, 5, 1, 2, 4, 1, 2, 1, 2, 5, 5, 1, 5,
         5, 0, 1, 3, 4, 1, 3, 1, 3, 5]], dtype=int64),
 array([0, 2, 4, 0, 2, 0, 2, 5, 4, 1, 4, 4, 0, 0, 3, 4, 0, 3, 0, 3, 5, 2,
        4, 2, 2, 5, 1, 0, 3, 4, 3, 3, 5, 1, 2, 4, 1, 2, 1, 2, 5, 5, 1, 5,
        5, 0, 1, 3, 4, 1, 3, 1, 3, 5]))