In [None]:
import tensorflow as tf
import rfho as rf

from rfho.datasets import load_mnist, ExampleVisiting

In [None]:
mnist = load_mnist(partitions=(.2, .2)) # 20% of data in training set, 20% in validation 
# remaining in test set (you can change these percentages and see the effect on regularization hyperparameter)

In [None]:
x, y = tf.placeholder(tf.float32, name='x'), tf.placeholder(tf.float32, name='y')
# define the model (here use a linear model from rfho.models)
model = rf.LinearModel(x, mnist.train.dim_data, mnist.train.dim_target)
#vectorize the model, and build the state vector (augment by 1 since we are 
# going to optimize the weights with momentum) 
s, out, w_matrix = rf.vectorize_model(model.var_list, model.inp[-1], model.Ws[0],
                                     augment=1)
# (this function will print also some tensorflow infos and warnings about variables 
# collections... we'll solve this)

In [None]:
# define error 
error = tf.reduce_mean(rf.cross_entropy_loss(out, y))

# define training error by error + L2 weights penalty
rho = tf.Variable(0., name='rho') # regularization hyperparameter
training_error = error + rho*tf.reduce_sum(tf.pow(w_matrix, 2))

correct_prediction = tf.equal(tf.argmax(out, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))

# define learning rates and momentum factor as variables, to be optimized
eta = tf.Variable(.03, name='eta')
mu = tf.Variable(.9, name='mu')
# now define the training dynamics (similar to tf.train.Optimizer)
optimizer = rf.MomentumOptimizer.create(s, eta, mu, loss=training_error)

In [None]:
# we want to optimize the weights w.r.t. training_error
# and hyperparameters w.r.t. validation error (that in this case is 
# error evaluated on the validation set)
# we are going to use ReverseMode
hyper_dict = {error: [rho, eta, mu]}
hyper_grad = rf.ReverseHyperGradient(optimizer, hyper_dict) # this will calculate hyper-gradients

In [None]:
# define helper for stochastic descent
ev_data = ExampleVisiting(mnist, batch_size=200, epochs=20)
tr_suppl = ev_data.create_train_feed_dict_supplier(x, y)
val_supplier = ev_data.create_all_valid_feed_dict_supplier(x, y)
test_supplier = ev_data.create_all_test_feed_dict_supplier(x, y)
# all is set to compute the hyper-gradients. 
# now define optimizers for the hyperparameters and bounds 
# (we don't want rho, eta or mu to become negative..)
hyper_optimizers = rf.create_hyperparameter_optimizers(hyper_grad, rf.AdamOptimizer)
pos_constraints = rf.positivity(hyper_grad.hyper_list)

In [None]:
# Run all for some hyper-iterations and print progresses 
with tf.Session().as_default() as ss:
    tf.variables_initializer(hyper_grad.hyper_list).run() # initialize hyperparameters
    [hy_opt.support_variables_initializer().run() for hy_opt in hyper_optimizers]
    ev_data.generate_visiting_scheme()
    for hyper_step in range(10):
        hyper_grad.initialize() # reset weights to inital state
        hyper_grad.run_all(T=ev_data.T, train_feed_dict_supplier=tr_suppl,
                           val_feed_dict_suppliers=val_supplier) # optimize model and
                                                                 # computes hyper-gradients
        # apply hypergradients
        [ss.run(h_optim.assign_ops) for h_optim in hyper_optimizers]
        ss.run(pos_constraints)
        
        print('Concluded hyper-iteration', hyper_step)
        print('Test accuracy:', ss.run(accuracy, feed_dict=test_supplier()))
        print('Validation error:', ss.run(error, feed_dict=val_supplier()))
        print('Values of hyperparameters')
        [print(rf.simple_name(hyp), hyp.eval(), 'hyper-gradient:', g.eval()) for hyp, g in 
         zip(hyper_grad.hyper_list, hyper_grad.hyper_gradient_vars)]