In [1]:
import tensorflow as tf
import rfho as rf

from rfho.datasets import load_mnist

Experiment save directory is  /media/luca/DATA/EXPERIMENTS
Data folder is /media/luca/DATA/DATASETS


In [2]:
mnist = load_mnist(partitions=(.05, .01)) # 5% of data in training set, 1% in validation 
# remaining in test set (change these percentages and see the effect on regularization hyperparameter)

Extracting /media/luca/DATA/DATASETS/mnist_data/train-images-idx3-ubyte.gz
Extracting /media/luca/DATA/DATASETS/mnist_data/train-labels-idx1-ubyte.gz
Extracting /media/luca/DATA/DATASETS/mnist_data/t10k-images-idx3-ubyte.gz
Extracting /media/luca/DATA/DATASETS/mnist_data/t10k-labels-idx1-ubyte.gz
datasets.redivide_data:, computed partitions numbers - [0, 3500, 4200, 70000] len all 70000 DONE


In [3]:
x, y = tf.placeholder(tf.float32, name='x'), tf.placeholder(tf.float32, name='y')
# define the model (here use a linear model from rfho.models)
model = rf.LinearModel(x, mnist.train.dim_data, mnist.train.dim_target)
# vectorize the model, and build the state vector (augment by 1 since we are 
# going to optimize the weights with momentum) 
s, out, w_matrix = rf.vectorize_model(model.var_list, model.inp[-1], model.Ws[0],
                                     augment=1)
# (this function will print also some tensorflow infos and warnings about variables 
# collections... we'll solve this)

In [4]:
# define error 
error = tf.reduce_mean(rf.cross_entropy_loss(labels=y, logits=out), name='error')

constraints = []

# define training error by error + L2 weights penalty
rho = tf.Variable(0., name='rho')  # regularization hyperparameter
training_error = error + rho*tf.reduce_sum(tf.pow(w_matrix, 2))
constraints.append(rf.positivity(rho))  # regularization coefficient should be positive

accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(out, 1), tf.argmax(y, 1)),
                                  "float"), name='accuracy')

# define learning rates and momentum factor as variables, to be optimized
eta = tf.Variable(.01, name='eta')
mu = tf.Variable(.5, name='mu')
# now define the training dynamics (similar to tf.train.Optimizer)
optimizer = rf.MomentumOptimizer.create(s, eta, mu, loss=training_error)

# add constraints for learning rate and momentum factor
constraints += optimizer.get_natural_hyperparameter_constraints()

In [5]:
# we want to optimize the weights w.r.t. training_error
# and hyperparameters w.r.t. validation error (that in this case is 
# error evaluated on the validation set)
# we are going to use ReverseMode
hyper_dict = {error: [rho, eta, mu]}
hyper_opt = rf.HyperOptimizer(optimizer, hyper_dict, method=rf.ReverseHG)

In [6]:
# define helper for stochastic descent
ev_data = rf.ExampleVisiting(mnist.train, batch_size=2**8, epochs=200)
tr_suppl = ev_data.create_supplier(x, y)
val_supplier = mnist.validation.create_supplier(x, y)
test_supplier = mnist.test.create_supplier(x, y)

In [7]:
# Run all for some hyper-iterations and print progresses 
def run(hyper_iterations):
    with tf.Session().as_default() as ss:
        ev_data.generate_visiting_scheme()  # needed for remembering the example visited in forward pass
        for hyper_step in range(hyper_iterations):
            hyper_opt.initialize()  # initializes all variables or reset weights to initial state
            hyper_opt.run(ev_data.T, train_feed_dict_supplier=tr_suppl,
                          val_feed_dict_suppliers=val_supplier, 
                          hyper_constraints_ops=constraints)
        # 
        # print('Concluded hyper-iteration', hyper_step)
        # print('Test accuracy:', ss.run(accuracy, feed_dict=test_supplier()))
        # print('Validation error:', ss.run(error, feed_dict=val_supplier()))

In [8]:
saver = rf.Saver('Staring example', collect_data=False)
with saver.record(rf.Records.tensors('error', fd=('x', 'y', mnist.validation), rec_name='valid'),
                  rf.Records.tensors('error', fd=('x', 'y', mnist.test), rec_name='test'),
                  rf.Records.tensors('accuracy', fd=('x', 'y', mnist.validation), rec_name='valid'),
                  rf.Records.tensors('accuracy', fd=('x', 'y', mnist.test), rec_name='test'),
                  rf.Records.hyperparameters(),
                  rf.Records.hypergradients(),
                  ):  # a context to print some statistics.
    # If you execute again any cell containing the model construction,
    # restart the notebook or reset tensorflow graph in order to prevent errors
    # due to tensor namings
    run(50)  # this will take some time... run it for less hyper-iterations for a qucker look

Step 0                Values
------------------  --------
valid::error         2.30259
test::error          2.30259
valid::accuracy      0.09143
test::accuracy       0.09828
rho                  0.00000
eta                  0.01000
mu                   0.50000
grad::rho            0.00000
grad::eta            0.00000
grad::mu             0.00000
Elapsed time (sec)   0.00000
Step 1                Values
------------------  --------
valid::error         0.47800
test::error          0.41440
valid::accuracy      0.87286
test::accuracy       0.88319
rho                  0.00000
eta                  0.01100
mu                   0.50100
grad::rho            5.67378
grad::eta           -5.25401
grad::mu            -0.10519
Elapsed time (sec)  34.00000
Step 2                Values
------------------  --------
valid::error         0.47308
test::error          0.40872
valid::accuracy      0.87429
test::accuracy       0.88409
rho                  0.00000
eta                  0.01199
mu            

Step 22                Values
------------------  ---------
valid::error          0.44052
test::error           0.37337
valid::accuracy       0.87857
test::accuracy        0.89058
rho                   0.00000
eta                   0.02783
mu                    0.52042
grad::rho             1.52742
grad::eta            -0.80294
grad::mu             -0.04551
Elapsed time (sec)  760.00000
Step 23                Values
------------------  ---------
valid::error          0.44001
test::error           0.37290
valid::accuracy       0.87857
test::accuracy        0.89070
rho                   0.00000
eta                   0.02841
mu                    0.52124
grad::rho             1.30838
grad::eta            -0.76371
grad::mu             -0.04431
Elapsed time (sec)  794.00000
Step 24                Values
------------------  ---------
valid::error          0.43955
test::error           0.37247
valid::accuracy       0.87857
test::accuracy        0.89088
rho                   0.00000
eta       

Step 43                 Values
------------------  ----------
valid::error           0.43465
test::error            0.36931
valid::accuracy        0.87429
test::accuracy         0.89190
rho                    0.00021
eta                    0.03699
mu                     0.53572
grad::rho              1.36042
grad::eta             -0.43105
grad::mu              -0.03395
Elapsed time (sec)  1455.00000
Step 44                 Values
------------------  ----------
valid::error           0.43445
test::error            0.36901
valid::accuracy        0.87571
test::accuracy         0.89194
rho                    0.00015
eta                    0.03731
mu                     0.53638
grad::rho              0.71862
grad::eta             -0.41094
grad::mu              -0.03269
Elapsed time (sec)  1490.00000
Step 45                 Values
------------------  ----------
valid::error           0.43428
test::error            0.36865
valid::accuracy        0.87571
test::accuracy         0.89198
rho     