In [2]:
from keras.models import Model
from keras.layers import (Conv1D, LSTM, BatchNormalization, Flatten, Dense, Activation, 
                          Input, concatenate)
from keras.layers.wrappers import Bidirectional
from keras.optimizers import Nadam
from clr_callback import CyclicLR
from keras.regularizers import l2

In [3]:
def conv_layer(prev):
    x = Conv1D(32, 1, kernel_regularizer=l2(10e-4),
               bias_regularizer=l2(10e-4))(prev)
    x = BatchNormalization(x)
    x = concatenate([x, prev], axis=-1)
    x = Activation("relu")(x)
    return x


def res_layer(prev):
    x = Conv1D(32, 1, kernel_regularizer=l2(10e-4),
               bias_regularizer=l2(10e-4))(prev)
    x = BatchNormalization()(x)
    x = Activation("relu")(x)
    x = Conv1D(32, 1, kernel_regularizer=l2(
        10e-4), bias_regularizer=l2(10e-4))(x)
    x = BatchNormalization()(x)
    x = concatenate([x, prev], axis=-1)
    x = Activation("relu")(x)
    return x


def lstm_layer(prev):
    x = Bidirectional(LSTM(32, return_sequences=True, kernel_regularizer=l2(
        10e-4), bias_regularizer=l2(10e-4)))(prev)
    x = Bidirectional(LSTM(32, return_sequences=True, kernel_regularizer=l2(
        10e-4), bias_regularizer=l2(10e-4)))(x)
    return x


def lstm_value_head(prev):
    x = Bidirectional(LSTM(1, return_sequences=False, kernel_regularizer=l2(
        10e-4), bias_regularizer=l2(10e-4)))(prev)
    x = Dense(32)(x)
    x = BatchNormalization()(x)
    x = Activation("relu")(x)
    x = Dense(1, activation="tanh")(x)
    return x


def lstm_policy_head(prev, output_length):
    x = Bidirectional(LSTM(1, return_sequences=True, kernel_regularizer=l2(
        10e-4), bias_regularizer=l2(10e-4)))(prev)
    x = Bidirectional(LSTM(1, return_sequences=False, kernel_regularizer=l2(
        10e-4), bias_regularizer=l2(10e-4)))(x)
    x = Dense(output_length, activation="softmax",
              kernel_regularizer=l2(10e-4), bias_regularizer=l2(10e-4))(x)
    return x

In [4]:
from keras.optimizers import Adam, Nadam
from keras import backend as K
from keras.datasets import mnist
from keras.utils.np_utils import to_categorical
from keras.metrics import categorical_accuracy
from keras.initializers import glorot_uniform, zero
import numpy as np
from keras.regularizers import l2
from IPython.core.debugger import set_trace

def create_net(time_steps, input_length, output_length, num_layers=8):
#     so the input is one current state
    inp = Input(shape=(time_steps, input_length))
#     I would have a series of mini heads that produce these outputs
# or I could try to have an LSTM that remembers these values and outputs them

# so at a high level what does MCTS take in and do
# it takes in a node and a state
# the node is a (s, a) pair
# so given a (s, a) and it's 
# it's (s, a) because (s, a') will have different values
# so given a (s, a), have a network that will predict probabilities and a result
# 


# so you want a subsection of the network dedicated to being the true MCTS. for a given number of 
# simulations (1600), that section uses the (s, a) to produce new (s, a)'s (s_next, a_next)
# 


# so what would be great is passing a bunch of input data and output data and having the MCTS infer everything
# from that. So for example, take in an existing TSP program, have an LSTM that gets a set of inputs (num, num,
# num, stop). for example for x y coords you would do num (x), num(y), stop
# I would need to think about it more for more complicated examples. Maybe MaML will solve some of these issues

# input and output need to be scaled between -1 to 1
# maybe have a network that learns a rescale function 

# the output would then be how bad it is on a scale of distance(min_dist, min_dist)*n, 
# distance(max_dist, max_dist)*n, rescaled to -1 to 1

# so now you would have an input and output

# the next piece you need is 

# the issue with all of this is that it's a ton of work and it isn't guarenteed to be better 

# the main benefit would be if you could take the MCTS' prediction and improve it. (which is basically
# what alphago did) 
    visits = Input(shape=())
    total_value = Input(shape=(time_steps, input_length))
    mean_value = Input(shape=(time_steps, input_length))
    x = inp
    for _ in range(num_layers):
        x = lstm_layer(x)
    policy = lstm_policy_head(x, output_length)
    value = lstm_value_head(x)
    
    model = Model(inputs=inp, outputs=[policy, value])
    
    model.compile(optimizer=Nadam(), loss = ["categorical_crossentropy", "mse"], loss_weights = [.5, .5], 
                  metrics=None)
    
    return model

In [None]:
# so for the lookback, if you go back to previous timesteps and evaluate branches from there, you could 
# 

# so if the value for a branch changes/is dramatically different from predicted,
# you do more evaluations to previous time steps for example, 
# 400 on the last one, 200 on the one before, 100, 50, 25, 25 or something

# this should result in additional computation being allocated on pivotol moments
# could have a mixing term or parameter that determines what % is spent on reflection vs look ahead

In [None]:
# Try out the alpha zero go project and try to add concurrency