## Global Black Box Optimization with RNNS

In [12]:
import tensorflow as tf
import numpy as np
import benchmarkfunctions as fun

In [2]:
def kernelTF(x1,x2,l = 0.3):
    return tf.exp(-1.0/l**2*tf.reduce_sum((tf.expand_dims(x1,axis=2) - tf.expand_dims(x2,axis=1))**2, axis = 3))

def GPTF(X,A,x, l = 0.3):
    k_xX = kernelTF(tf.expand_dims(x, axis = 1),X)
    return tf.squeeze(tf.matmul(k_xX,  A),axis=(2,))

def normalize(minv, maxv, y):
    return 2*(y-minv)/(maxv-minv)-1.0

In [3]:
# Data
dim = 1
n_train = 1600
n_test = 1600
path = "./Data/"+str(dim)+"D/"

# LSTM Model
n_hidden = 50
n_steps = 10

# Optimization
learning_rate = 0.0001
epochs = 1000
batch_size = 160

In [4]:
X, A, minv, maxv = np.load(path+"X.npy"), np.load(path+"A.npy"), np.load(path+"minv.npy"), np.load(path+"maxv.npy")
X_train, A_train, min_train, max_train = X[:n_train], A[:n_train], minv[:n_train], maxv[:n_train]
X_test, A_test, min_test, max_test = X[-n_test:], A[-n_test:], minv[-n_test:], maxv[-n_test:]

n_gp_samples = X.shape[1]

In [5]:
# LSTM Output Weights
weights = {
    'out': tf.Variable(tf.random_normal([n_hidden, dim]))
}
biases = {
    'out': tf.Variable(tf.random_normal([dim]))
}

In [6]:
# Create Model
size = tf.placeholder(tf.int32,[])

Xt = tf.placeholder(tf.float32, [None, n_gp_samples, dim])
At = tf.placeholder(tf.float32, [None, n_gp_samples, 1])
mint = tf.placeholder(tf.float32, [None, 1])
maxt = tf.placeholder(tf.float32, [None, 1])

x_0 = -0.0*tf.ones([size, dim])
h_0 = tf.ones([size, n_hidden])
c_0 = tf.ones([size, n_hidden])

state = (c_0, h_0)
x = x_0
y = normalize(mint, maxt, GPTF(Xt,At,x))
sample_points = [x]

f_min = y
f_sum = 0

# No idea why this is necessary 
cell = tf.contrib.rnn.LSTMCell(num_units = n_hidden, reuse=None)
cell(tf.concat([x, y], 1), state, scope='rnn_cell')
cell = tf.contrib.rnn.LSTMCell(num_units = n_hidden, reuse=True)

for i in range(n_steps):
    h, state = cell(tf.concat([x, y], 1), state, scope='rnn_cell')
    x = tf.tanh(tf.matmul(h, weights['out']) + biases['out'])
    sample_points.append(x)
    
    y = normalize(mint, maxt, GPTF(Xt,At,x))
    
    f_min = tf.minimum(y, f_min)
    f_sum += tf.reduce_mean(y)

f_min = tf.reduce_mean(f_min)
loss = f_sum / n_steps

In [7]:
train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)

In [8]:
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())

train_loss_list = []
test_loss_list = []
train_fmin_list = []
test_fmin_list = []

In [9]:
# Train the Network
print("Function Dimension: \t\t"+str(dim))
print("Number of Training Samples: \t"+str(n_train))
print("Number of Test Samples: \t"+str(n_test))
print("Batch size: \t\t\t"+str(batch_size))
print("Number of hidden Units: \t"+str(n_hidden))
print("Sequence length: \t\t"+ str(n_steps))
print("Epochs: \t\t\t"+str(epochs))
print("Learning rate: \t\t\t"+str(learning_rate))
print("------------------------------------------------------------------------------------")

for ep in range(epochs):
    for batch in range(n_train//batch_size):
        X_batch = X_train[batch*batch_size:(batch+1)*batch_size]
        A_batch = A_train[batch*batch_size:(batch+1)*batch_size]
        min_batch = min_train[batch*batch_size:(batch+1)*batch_size]
        max_batch = max_train[batch*batch_size:(batch+1)*batch_size]
        
        sess.run([train_step], feed_dict={Xt: X_batch, At: A_batch, mint: min_batch, maxt: max_batch, size: X_batch.shape[0]})
    
    train_loss, train_fmin = sess.run([loss, f_min], feed_dict=\
                                      {Xt: X_train, At: A_train, mint: min_train, maxt: max_train, size: n_train})
    test_loss, test_fmin = sess.run([loss, f_min], feed_dict=\
                                      {Xt: X_test, At: A_test, mint: min_test, maxt: max_test, size:n_test})
    
    train_loss_list += [train_loss]
    test_loss_list += [test_loss]
    train_fmin_list += [train_fmin]
    test_fmin_list += [test_fmin]
    
    if ep < 10 or ep % (epochs // 10) == 0 or ep == epochs-1:
        print("Ep: " +"{:4}".format(ep)+" | TrainLoss: "+"{: .3f}".format(train_loss)
              +" | TrainMin: "+ "{: .3f}".format(train_fmin)+ " | TestLoss: "+
              "{: .3f}".format(test_loss)+" | TestMin: "+ "{: .3f}".format(test_fmin))

Function Dimension: 		1
Number of Training Samples: 	1600
Number of Test Samples: 	1600
Batch size: 			160
Number of hidden Units: 	50
Sequence length: 		10
Epochs: 			1000
Learning rate: 			0.0001
------------------------------------------------------------------------------------
Ep:    0 | TrainLoss:  0.158 | TrainMin: -0.548 | TestLoss:  0.178 | TestMin: -0.533
Ep:    1 | TrainLoss:  0.059 | TrainMin: -0.485 | TestLoss:  0.095 | TestMin: -0.456
Ep:    2 | TrainLoss:  0.004 | TrainMin: -0.429 | TestLoss:  0.046 | TestMin: -0.399
Ep:    3 | TrainLoss: -0.020 | TrainMin: -0.401 | TestLoss:  0.024 | TestMin: -0.372
Ep:    4 | TrainLoss: -0.030 | TrainMin: -0.387 | TestLoss:  0.015 | TestMin: -0.359
Ep:    5 | TrainLoss: -0.033 | TrainMin: -0.379 | TestLoss:  0.012 | TestMin: -0.352
Ep:    6 | TrainLoss: -0.035 | TrainMin: -0.375 | TestLoss:  0.010 | TestMin: -0.348
Ep:    7 | TrainLoss: -0.036 | TrainMin: -0.373 | TestLoss:  0.009 | TestMin: -0.346
Ep:    8 | TrainLoss: -0.037 | TrainM