#### Active learning NNGP experiment
multiple params

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import numpy as np
import tensorflow as tf
from sklearn.metrics import mean_squared_error as mse
import matplotlib.pyplot as plt

from model.mlp import MLP
from dataloader.rosen import RosenData
from uncertainty_estimator.nngp import NNGP
from uncertainty_estimator.mcdue import MCDUE
from sample_selector.eager import EagerSampleSelector
from oracle.identity import IdentityOracle
import random

In [None]:
config = {
    'random_seed': 4623457,
    'n_dim': 10,
    'n_train': 200,
    'n_test': 200,
    'n_pool': 1000,
    'layers': [128, 64, 32],
    'update_sample_size': 100,
    'al_iterations': 2
}

if config['random_seed'] is not None:
    np.random.seed(config['random_seed'])
    random.seed(config['random_seed'])

In [None]:
def print_shapes(note, *sets):
    print(note)
    for x, y in sets:
        print("shapes:", x.shape, y.shape)

# load data
X_train, y_train, X_val, y_val, _, _, X_pool, y_pool = RosenData(
    config['n_train'], 200, config['n_test'], config['n_pool'], config['n_dim']
).dataset(use_cache=True)

In [None]:
# Init neural network & tf session
tf.reset_default_graph()
if config['random_seed'] is not None:
    tf.set_random_seed(config['random_seed'])

model = MLP(ndim=config['n_dim'], layers=config['layers'])

try:
    sess.close()
except:
    pass


init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)

model.set_session(sess)

In [None]:
estimator = NNGP(model)  # to estimate uncertainties
oracle = IdentityOracle(y_pool)  # generate y for X from pool
sampler = EagerSampleSelector(oracle) # sample X and y from pool by uncertainty estimations

# estimator.estimate(X_train, y_train, X_pool)[:40]

In [None]:
model.train(X_train, y_train, X_val, y_val)

rmses = [np.sqrt(mse(model.predict(data=X_val), y_val))]


for al_iteration in range(1, config['al_iterations']+1):
    note = f'[{al_iteration}] BEFORE:'
    print_shapes(note, (X_train, y_train), (X_val, y_val), (X_pool, oracle.y_set))
    
    # update pool
    uncertainties = estimator.estimate(X_pool, X_train, y_train)
    print('Uncertainties', uncertainties[:20])
    print('Uncertainties', uncertainties[uncertainties.argsort()[-10:][::-1]])
    X_train, y_train, X_pool = sampler.update_sets(
        X_train, y_train, X_pool, uncertainties, config['update_sample_size']
    )
    
    note = f'[{al_iteration}] AFTER:'
    print_shapes(note, (X_train, y_train), (X_val, y_val), (X_pool, oracle.y_set))
    
    # retrain net
    model.train(X_train, y_train, X_val, y_val)
    rmses.append(np.sqrt(mse(model.predict(data=X_val), y_val)))

In [None]:
print(rmses)
plt.plot(rmses, label='NNGP', marker='.')
plt.title('RMS Error by active learning iterations')
plt.legend()

In [None]:
# Show some predictions
print(model.predict(data = X_val[:3]))
print(y_val[:3])

In [None]:
# show some uncertainties
estimator.estimate(X_pool, X_train, y_train)[:3]