In [1]:
from __future__ import print_function
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.metrics import mean_squared_error as mse
import time
import sys
import time
import tqdm
from scipy.optimize import rosen

%matplotlib inline

  from ._conv import register_converters as _register_converters


In [2]:
def iterate_minibatches(inputs, targets, batchsize, shuffle=True):
    assert len(inputs) == len(targets)
    if shuffle:
        indices = np.arange(len(inputs))
        np.random.shuffle(indices)
    for start_idx in range(0, len(inputs) - batchsize + 1, batchsize):
        if shuffle:
            excerpt = indices[start_idx:start_idx + batchsize]
        else:
            excerpt = slice(start_idx, start_idx + batchsize)
        yield inputs[excerpt], targets[excerpt]

In [3]:
df = np.random.uniform(size=(40, 10))
targets = rosen(df.T)

train_num = 10
thres = 25
thres2 = 40


X_train = df[:train_num]
y_train = targets[:train_num][:, None]
X_pool = df[train_num:thres]
y_pool = targets[train_num:thres][:, None]
X_test = df[thres:thres2]
y_test = targets[thres:thres2][:, None]
print('train shapes:', X_train.shape, y_train.shape)
print('pool shapes:', X_pool.shape, y_pool.shape)
print('test shapes:', X_test.shape, y_test.shape)

train shapes: (10, 10) (10, 1)
pool shapes: (15, 10) (15, 1)
test shapes: (15, 10) (15, 1)


In [4]:
def get_errors(x_, y_):
    return [np.sqrt(mse(x_, y_)), np.mean(np.abs(x_ - y_)), np.max(np.abs(x_ - y_))]

def simple_cov(_x, _y):
    return np.mean((_x-np.mean(_x))*(_y-np.mean(_y)), axis = 1)

def get_mcdues(X):
    stds = np.zeros((X.shape[0], T), dtype = float)
    for cnt_ in range(T):
        stds[:, cnt_] = np.ravel(sess.run(y, feed_dict={x: X, 
                                                        keep_probability_: .5}))
    return np.std(stds, axis = 1)

def get_stds(X):
    stds = np.zeros((X.shape[0], T), dtype = float)
    for cnt_ in range(T):
        stds[:, cnt_] = np.ravel(sess.run(y, feed_dict={x: X, 
                                                        keep_probability_: .5}))
    return stds

In [5]:
ndim = X_train.shape[1]
# layers = [64,32]
layers = [64,64,32]

learning_rate_decay = .97
start_learning_rate = 8e-4
learning_rate_schedule_epochs = 50000

$X$: batch_size $\times$ dim 

$W$: dim $\times$ 1


output: batch_size $\times$ 1

In [6]:
tf.reset_default_graph()

# placeholders
x = tf.placeholder(tf.float32, [None, ndim])
y_ = tf.placeholder(tf.float32, [None, 1])

learning_rate_ = tf.placeholder(tf.float32)
forces_coeff_ = tf.placeholder(tf.float32)
keep_probability_ = tf.placeholder(tf.float32, name='keep_probability')
l2_reg_ = tf.placeholder(tf.float32, name='l2reg')

# weights
W1 = tf.Variable(tf.truncated_normal([ndim, layers[0]], stddev=(2/ndim)**.5))
b1 = tf.Variable(tf.truncated_normal([layers[0]],  stddev=.1))
h1 = tf.nn.relu(tf.matmul(x, W1) + b1)
h_drop1 = tf.nn.dropout(h1, keep_probability_, noise_shape = [1,layers[0]])

Ws = [W1]; bs = [b1]; hs = [h_drop1]
for cnt_layer in range(1, len(layers)):
    Ws.append(tf.Variable(tf.truncated_normal([layers[cnt_layer - 1], layers[cnt_layer]], 
                                              stddev=(2/layers[cnt_layer - 1])**.5)))
    bs.append(tf.Variable(tf.truncated_normal([layers[cnt_layer]],  stddev=.1)))
    hs.append(tf.nn.dropout(tf.nn.relu(tf.matmul(hs[-1], Ws[-1]) + bs[-1]), keep_probability_,
                            noise_shape = [1,layers[cnt_layer]]))

Ws.append(tf.Variable(tf.truncated_normal([layers[-1], 1], stddev=.1)))
bs.append(tf.Variable(tf.truncated_normal([1],  stddev=.1)))

# funcs
y = tf.matmul(hs[-1], Ws[-1]) + bs[-1]

l2_regularizer = sum(tf.nn.l2_loss(Wxxx) for Wxxx in Ws) 

mse_e = tf.losses.mean_squared_error(predictions = y, labels = y_)
loss = mse_e + l2_reg_*l2_regularizer

#train_step = tf.train.AdamOptimizer(learning_rate=learning_rate_).minimize(loss)

global_step = tf.Variable(0, trainable=False)
starter_learning_rate = start_learning_rate
learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step,
                                           learning_rate_schedule_epochs, learning_rate_decay, staircase=True)

lr_fun = lambda: learning_rate
min_lr = lambda: tf.constant(1e-5)
actual_lr = tf.case([(tf.less(learning_rate, tf.constant(1e-5)), min_lr)], default=lr_fun)

train_step = tf.train.AdamOptimizer(learning_rate=actual_lr).minimize(loss, global_step=global_step)

In [7]:
try:
    sess.close()
except:
    pass

init = tf.global_variables_initializer()
saver = tf.train.Saver()
sess = tf.Session()
sess.run(init)
epoch = 0
data = []

In [8]:
batch_size = 4
init_epochs = 1000000
keep_prob = .9
l2_reg = 1e-4

al_steps = 20
uptrain_epochs = 1000000
mandatory_uptrain_epochs = 10000
sample_each_step = 250
T = 25

early_stopping_window = .03
max_warnings = 3
early_stopping_check_step = 100

gpnn_max_train = 1000
diag_eps = .01

In [9]:
X_train_current = X_train.copy()
y_train_current = y_train.copy()
X_pool_current = X_pool.copy()
y_pool_current = y_pool.copy()

In [10]:
lr, gs = sess.run([learning_rate, global_step])
print('learning rate: {:.4E}, global step: {}'.format(lr, gs))

learning rate: 8.0000E-04, global step: 0


In [10]:
fname_identifier = "rosenbrock_exp"
save_path = saver.save(sess, "~/init_" + fname_identifier + ".ckpt")
print("Init model saved in path: %s" % save_path)

Init model saved in path: /Users/romaushakov/Desktop/diploma/init_rosenbrock_exp.ckpt


In [11]:
fname_identifier = "rosenbrock_exp"
saver.restore(sess, "~/init_" + fname_identifier + ".ckpt")
print("Init model restored")

INFO:tensorflow:Restoring parameters from /Users/romaushakov/Desktop/diploma/init_rosenbrock_exp.ckpt
Init model restored


In [12]:
X_train_current = X_train.copy()
y_train_current = y_train.copy()
X_pool_current = X_pool.copy()
y_pool_current = y_pool.copy()

In [13]:
def compute_block_inv(A_inv, B, C, D):
    H = D - C.dot(A_inv).dot(B)
    H_inv = 1./ H
    a00 = A_inv + H_inv * A_inv.dot(B).dot(C).dot(A_inv)
    a01 = -A_inv.dot(B) * H_inv
    a10 = -H_inv * C.dot(A_inv)
    a11 = H_inv
    
    return np.block([[a00, a01.reshape(-1, 1)],
                    [a10.reshape((1, -1)), np.array(a11).reshape((1, 1))]])

In [14]:
a = np.eye(3)
a[0,0] *= 4
b = np.zeros((3, 1))
c = np.zeros((1, 3))
d = 1

In [15]:
m = np.block([[a, b], [c, d]])
m

array([[4., 0., 0., 0.],
       [0., 1., 0., 0.],
       [0., 0., 1., 0.],
       [0., 0., 0., 1.]])

In [16]:
inv = compute_block_inv(np.linalg.inv(a), b, c, d)
inv

array([[ 0.25,  0.  ,  0.  , -0.  ],
       [ 0.  ,  1.  ,  0.  , -0.  ],
       [ 0.  ,  0.  ,  1.  , -0.  ],
       [-0.  , -0.  , -0.  ,  1.  ]])

In [17]:
inv.dot(m)

array([[1., 0., 0., 0.],
       [0., 1., 0., 0.],
       [0., 0., 1., 0.],
       [0., 0., 0., 1.]])

In [18]:
gpnn_max_train = 8
points_to_integrate = 100

print('='*40)
print('Integral-based ALGO')
print('='*40)

for al_iters in range(al_steps):
    # 1) get MCDUEs
    t = time.time()
    print('Starting iteration #', al_iters)
    random_train_inds = np.random.permutation(range(len(X_train_current)))[:gpnn_max_train]
    random_train_samples = X_train_current[random_train_inds]
    
    train_and_pool_samples = np.concatenate([random_train_samples, X_pool_current])    
    stds = get_stds(train_and_pool_samples)
    
    K_train_cov = np.cov(stds[:gpnn_max_train, :], ddof = 0)
    K_train_cov_inv = np.linalg.inv(K_train_cov + diag_eps * np.eye(gpnn_max_train))
    
    minimums = random_train_samples.min(axis=0)
    maximums = random_train_samples.max(axis=0)
    
    ### vs are points for integral
    vs = np.random.uniform(minimums, maximums,
                           size=(points_to_integrate, random_train_samples.shape[1]))
    
    # get mcdues for random vs
    y_vs = get_stds(vs)

    ### sigma(v | X) for each v in vs
    sigmas = []
    for cnt_ in range(len(vs)):
        vs_sample = y_vs[cnt_, :]
        Q = simple_cov(stds[:gpnn_max_train], vs_sample)[:, None]
        KK = np.var(vs_sample)
        sigma = KK - np.dot(np.dot(Q.T, K_train_cov_inv), Q)[0][0]
        sigmas.append(np.sqrt(sigma))
    

    # for each x in X_pool_current:
    # we count \int sigma(v|X) - sigma(v|X+x_from_pool) dv
    diffs_integral = np.zeros(X_pool_current.shape[0])
    
    ### extend cov matrix 
    new_K_cov = np.zeros((gpnn_max_train + 1, gpnn_max_train + 1))
    new_K_cov[:gpnn_max_train, :gpnn_max_train] = K_train_cov
    
    ### loop over pool data
    for x_cnt_ in range(len(X_pool_current)):
        
        # stds was recieved for train_and_pool_samples 
        # and train_pool_sample = np.concatenate([random_train_sample, X_pool_current])
        # and random_train_samples.shape[0] = gpnn_max_train. So
        
        
        # extend cov matrix
        # we don't recalculate all cov matrix
        # we only add one row 
        pool_sample = stds[(gpnn_max_train + x_cnt_), :]
        Q = simple_cov(stds[:gpnn_max_train, :], pool_sample)[:, None]
        Q = Q.ravel()
        new_K_cov[-1, :-1] = Q
        new_K_cov[:-1, -1] = Q
        new_K_cov[-1, -1] = np.var(pool_sample)
        new_K_cov_inv = np.linalg.inv(new_K_cov + 0 * diag_eps * np.eye(gpnn_max_train + 1))
        break
    break

Integral-based ALGO
Starting iteration # 0


In [19]:
new_K_cov_inv

array([[145.27525173, -12.05703417,  20.90886227, -56.59646714,
          1.38671536, -18.19042247, -52.01671123,  37.26035851,
        -46.21952905],
       [-12.05703417,  34.98402498,   3.23293147,  -7.96455272,
        -13.25495966,   3.96352876,  -7.36031558, -18.78218522,
          8.93045956],
       [ 20.90886227,   3.23293147,  21.22304131, -15.92775356,
        -17.4292095 ,  -0.59915894, -11.99308179,  11.87247775,
        -11.95870975],
       [-56.59646714,  -7.96455272, -15.92775356,  43.82494725,
          9.0960169 ,   2.68758211,  24.45796108, -19.03967278,
         18.53530131],
       [  1.38671536, -13.25495966, -17.4292095 ,   9.0960169 ,
         37.78186798, -14.95915564,   6.90164778,  -3.44596452,
          0.16357426],
       [-18.19042247,   3.96352876,  -0.59915894,   2.68758211,
        -14.95915564,  20.18445458,   4.76417915,  -1.80087189,
         -1.95505525],
       [-52.01671123,  -7.36031558, -11.99308179,  24.45796108,
          6.90164778,   4.7641

In [20]:
new_K_cov_inv.shape

(9, 9)

In [21]:
Q.shape

(8,)

In [22]:
compute_block_inv(K_train_cov_inv, Q.reshape((-1, 1)), Q.reshape((1, -1)), np.var(Q) + diag_eps)

array([[ 38.22316637,  -1.27812002,   0.21491244, -11.72846534,
          1.31271606,  -9.31126634, -16.08939775,   5.3448139 ,
          1.09316761],
       [ -1.27812002,  21.47233469,   2.96072864,  -7.2663336 ,
         -6.46134638,   2.16812879,  -5.55322465, -10.51220043,
         -0.32018579],
       [  0.21491244,   2.96072864,  11.962279  ,  -4.23450052,
        -10.45553044,  -1.92139456,  -3.9951254 ,   3.84812901,
          0.40639607],
       [-11.72846534,  -7.2663336 ,  -4.23450052,  19.79795955,
          3.74940936,   0.73429507,   6.91375405,  -5.35684348,
         -0.36673936],
       [  1.31271606,  -6.46134638, -10.45553044,   3.74940936,
         23.99795049,  -9.40446786,   2.93175022,  -2.2337084 ,
          0.06736775],
       [ -9.31126634,   2.16812879,  -1.92139456,   0.73429507,
         -9.40446786,  13.73875746,   1.1850915 ,  -0.55796432,
          0.3077161 ],
       [-16.08939775,  -5.55322465,  -3.9951254 ,   6.91375405,
          2.93175022,   1.1850

In [25]:
new_K_cov[-1, :]

array([0.53739573, 0.64584942, 0.75697023, 0.46705952, 0.66437434,
       0.7597712 , 0.68670603, 0.52919355, 0.66584906])

In [26]:
Q

array([0.53739573, 0.64584942, 0.75697023, 0.46705952, 0.66437434,
       0.7597712 , 0.68670603, 0.52919355])