In [1]:
from random import shuffle
import numpy as np
import tensorflow as tf

from graph.graph import Graph
from graph.module import *
from graph.sublayer import *
from l2l.training import training_setup, training

## Create a very simple problem to solve

In [2]:
# A simple (deterministic) 1D problem
def simple_problem(batch_size, problem='mult'):
    x = np.random.rand(batch_size, 1) * 10
    if problem == 'mult' or problem == 'multiply':
        y = x * 5
        x = np.concatenate((x, np.ones_like(x)), axis=1)
    elif problem == 'square':
        y = np.multiply(x, x)
        x = np.concatenate((x, -1*np.ones_like(x)), axis=1)
    elif problem == 'sqrt':
        y = np.sqrt(x)
        x = np.concatenate((x, np.zeros_like(x)), axis=1)
    else:
        raise ValueError('Invalid problem type: {}'.format(problem))
    return (x, y)

## The training function

In [3]:
def train(parameter_dict, MO_options, training_info, training_iters=10):
    # Callable data getter where you can pick the problem
    def data_getter(problem):
        def func(batch_size):
            return simple_problem(batch_size, problem=problem)
        return func

    def random_data_getter(problems):
        def func(batch_size):
            prob = problems[np.random.randint(0, len(problems))]
            return simple_problem(batch_size, problem=prob)
        return func


    with tf.Session() as sess:
        packed_vars = training_setup(sess,
                                     parameter_dict=parameter_dict,
                                     MO_options=MO_options,
                                     training_info=training_info,
                                     additional_train=True,
                                     summaries=None,#'graph',
                                     accuracy_func=None,
                                     optimizer_sharing='m',
                                     load_prev_meta_opt=None,
                                     save_optimizer=False)

        # Run a round of training for multiplication problem
        print('First Iteration: {}. Problem: {}.'.format(0, 'mult'))
        training(data_getter=data_getter('mult'),
                 data_getter_additional=random_data_getter(['mult']),
                 **packed_vars)

        '''# List of problems
        problems = ['mult', 'square', 'sqrt']

        # Run a round of training with a random problem
        for i in range(int(training_iters/3)):
            # Select problem to use
            current_p = np.random.randint(0, len(problems))
            prob = problems[current_p]
            a_probs = [p for j, p in enumerate(problems) if j != current_p]

            # Run training
            print('\n\n\nIteration: {}. Next problem: {}.'.format(i, prob))
            training(data_getter=data_getter(prob),
                     data_getter_additional=random_data_getter(a_probs),
                     **packed_vars)'''

        for i in range(int(training_iters/2)):
            print('\n\n\nIteration: {}. Next problem: {}.'.format(i*2+1, 'square'))
            training(data_getter=data_getter('square'),
                     data_getter_additional=data_getter('mult'),
                     **packed_vars)
            print('\n\n\nIteration: {}. Next problem: {}.'.format(i*2+2, 'mult'))
            training(data_getter=data_getter('mult'),
                     data_getter_additional=data_getter('square'),
                     **packed_vars)

## Define parameters

In [4]:
parameter_dict = {
        'C': 1,
        'sublayer_type': 'AdditionSublayerModule',
        'hidden_size': 4, # Hidden size of each module (or # of filters if using conv)
        'gamma': 3., # Gamma is the strength of gating in Gatenet
        'M': 6, # The number of 'modules' in each layer of Gatenet
        'L': 2, # The number of layers
        'module_type': 'PerceptronModule' # The type of module used
}
MO_options = {
        'optimizer_type': 'CoordinateWiseLSTM', # Type of RNN used for the meta optimizer
        'second_derivatives': False,
        'rnn_layers': (3,3), # Hidden sizes of the layers in the RNN
        'len_unroll': 3,
        'w_ts': [0.33 for _ in range(3)], # Matches w_t in section 2 of Learning to Learn By GD by GD (https://arxiv.org/pdf/1606.04474.pdf)
        'lr': 0.001, # Multiply the output of the RNN
        'meta_lr': 0.005, # LR for the meta optimizer
        'additional_loss_scale': 1. # How to weight the 'additional' loss
}
training_info = {
        'batch_size': 16,
        'num_batches': 100,
        'print_every': 10
}

training_iters=10

## Train

In [5]:
train(parameter_dict, MO_options, training_info, training_iters)

TODO Implement _verify_scope
TODO Implement _verify_scope
TODO Implement _verify_scope
TODO Implement _verify_scope
TODO Implement _verify_scope
TODO Implement _verify_scope
TODO Implement _verify_scope
TODO Implement _verify_scope
TODO Implement _verify_scope
TODO Implement _verify_scope
TODO Implement _verify_scope
TODO Implement _verify_scope
TODO Implement _verify_scope
TODO Implement _verify_scope
TODO Implement _verify_scope
First Iteration: 0. Problem: mult.

Iteration: 0 , loss: 24.9484 , meta_l: 55.9783 , addi_l: 31.2611
Predictions & Answers
Pred: [ 0.30684626], Actual: [ 15.91762576] -- Input: [ 3.18352515  1.        ]
Pred: [ 0.04947605], Actual: [ 46.40465069] -- Input: [ 9.28093014  1.        ]
Pred: [ 0.04485606], Actual: [ 37.00725019] -- Input: [ 7.40145004  1.        ]
Pred: [ 0.08111712], Actual: [ 24.86059217] -- Input: [ 4.97211843  1.        ]
Pred: [ 0.13427126], Actual: [ 22.16477457] -- Input: [ 4.43295491  1.        ]
Pred: [ 0.04503509], Actual: [ 33.45274685