In [1]:
import tensorflow as tf
from tensorflow.keras import Model
import numpy as np
import utils

# Concat

In [2]:
def _concat(xs):
    """nd tensor to 1d tensor

    Args:
        xs (array): the array of nd tensor

    Returns:
        array: concated array
    """
    return tf.concat([tf.reshape(x, [tf.size(x)]) for x in xs], axis=0, name="_concat")

## Testing

In [3]:
a = tf.constant([[[[1],[2],[3]], [[4], [5], [6]]], [[[2],[4],[6]], [[8], [10], [12]]]])
b = tf.constant([1,2])

In [4]:
tf.concat(tf.reshape(a, [tf.size(a)]), axis=-1)

<tf.Tensor 'concat/concat:0' shape=(12,) dtype=int32>

# Architect

In [3]:
class Architect(object):
    """Constructs the model

    Parameters:
      network_momentum(float):  network momentum
      network_weight_decay(float): network weight decay
      model(Network): Network archtecture with cells
      optimise(optimiser): Adam / SGD
    """

    def __init__(self, model, args):
        """Initialises the architecture

        Args:
            model (Network): Network archtecture with cells
            args (dict): cli args
        """
        self.network_momentum = args.momentum
        self.network_weight_decay = args.weight_decay
        self.model = model
        self.arch_learning_rate = args.arch_learning_rate
        self.optimizer = tf.train.AdamOptimizer(learning_rate=self.arch_learning_rate,
                                                beta1=0.5,
                                                beta2=0.999)    
        self.learning_rate = args.learning_rate

    def get_model_theta(self, model):
        specific_tensor = []
        specific_tensor_name = []
        for var in model.trainable_weights:
            if not 'alphas' in var.name:
                specific_tensor.append(var)
                specific_tensor_name.append(var.name)
        return specific_tensor
    
    def step(self, input_train, target_train, input_valid, target_valid, unrolled):
        """Computer a step for gradient descend

        Args:
            input_train (tensor): a train of input
            target_train (tensor): a train of targets
            input_valid (tensor): a train of validation
            target_valid (tensor): a train of validation targets
            eta (tensor): eta
            network_optimizer (optimiser): network optimiser for network
            unrolled (bool): True if training we need unrolled
        """
        if unrolled:
#             w_regularization_loss = tf.add_n(utils.get_var(tf.losses.get_regularization_losses(),'network')[1])
            w_regularization_loss = 0.25
            logits = self.model(input_train)
            train_loss = self.model._loss(logits, target_train)
            train_loss += 1e4*0.25*w_regularization_loss
            return self._compute_unrolled_step(input_train, 
                                               target_train, 
                                               input_valid, 
                                               target_valid,
                                               self.get_model_theta(self.model),
                                               train_loss,
                                               self.learning_rate
                                              )
        else:
            return self._backward_step(input_valid, target_valid)
        
    
    def _compute_unrolled_step(self, x_train, y_train, x_valid, y_valid, w_var, train_loss, lr):
        arch_var = self.model.arch_parameters()
        
        unrolled_model = self.model.new()
        logits = unrolled_model(x_train)
        unrolled_train_loss = unrolled_model._loss(logits, y_train)  
        unrolled_w_var = self.get_model_theta(unrolled_model)
        copy_weight_opts = [v.assign(w) for v,w in zip(unrolled_w_var,w_var)]
        #w'
        with tf.control_dependencies(copy_weight_opts):
            unrolled_optimizer = tf.train.GradientDescentOptimizer(lr)
            unrolled_optimizer = unrolled_optimizer.minimize(unrolled_train_loss, var_list=unrolled_w_var)

        valid_logits = unrolled_model(x_valid)
        valid_loss = unrolled_model._loss(valid_logits, y_valid)
        tf.summary.scalar('valid_loss', valid_loss)

        with tf.control_dependencies([unrolled_optimizer]):
            valid_grads = tf.gradients(valid_loss, unrolled_w_var)

        r=1e-2
        R = r / (tf.global_norm(valid_grads)+1e-6)

        optimizer_pos=tf.train.GradientDescentOptimizer(R)
        optimizer_pos=optimizer_pos.apply_gradients(zip(valid_grads, w_var))

        optimizer_neg=tf.train.GradientDescentOptimizer(-2*R)
        optimizer_neg=optimizer_neg.apply_gradients(zip(valid_grads, w_var))

        optimizer_back=tf.train.GradientDescentOptimizer(R)
        optimizer_back=optimizer_back.apply_gradients(zip(valid_grads, w_var))

        with tf.control_dependencies([optimizer_pos]):
            train_grads_pos=tf.gradients(train_loss, arch_var)
            with tf.control_dependencies([optimizer_neg]):
                train_grads_neg=tf.gradients(train_loss,arch_var)	
                with tf.control_dependencies([optimizer_back]):
                  leader_opt= self.optimizer
                  leader_grads=leader_opt.compute_gradients(valid_loss, var_list =unrolled_model.arch_parameters())
        for i,(g,v) in enumerate(leader_grads):
            leader_grads[i]=(g - self.learning_rate * tf.divide(train_grads_pos[i]-train_grads_neg[i],2*R),v)

        leader_opt=leader_opt.apply_gradients(leader_grads)
        return leader_opt
    
    def _backward_step(self, input_valid, target_valid):
        """Backward step for validation

        Args:
            input_train (tensor): a train of input
            target_train (tensor): a train of targets
        """
        loss = self.model._loss(self.model(input_valid), target_valid)
        opt = self.optimizer.minimize(loss, var_list=model.get_weights())
        return opt

## Testing

In [4]:
from model_search import Network

In [5]:
criterion = tf.losses.sigmoid_cross_entropy
model = Network(3, 3, criterion)
args = {
    "momentum": 0.9,
    "weight_decay": 3e-4,
    "arch_learning_rate": 3e-1,
    "arch_weight_decay": 1e-3,
    "learning_rate": 0.025
}

class Struct:
    def __init__(self, **entries):
        self.__dict__.update(entries)






In [8]:
np_ds_train = (np.random.randint(0, 256, (20, 4, 4, 3)).astype(np.float32), np.random.randint(0, 2, (20, 4, 4, 1)).astype(np.float32))
np_ds_valid = (np.random.randint(0, 256, (20, 4, 4, 3)).astype(np.float32), np.random.randint(0, 2, (20, 4, 4, 1)).astype(np.float32))
ds_train = tf.data.Dataset.from_tensor_slices(np_ds_train).batch(1)
ds_valid = tf.data.Dataset.from_tensor_slices(np_ds_valid).batch(1)

it_train = ds_train.make_one_shot_iterator()
image, label = it_train.get_next()
it_valid = ds_valid.make_one_shot_iterator()
image_valid, label_valid = it_valid.get_next()
lr=0.025
unrolled=True

In [9]:
init = tf.global_variables_initializer()
res = model(image)


Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


In [11]:
architect = Architect(model, Struct(**args))

In [13]:
opt = architect.step(image, label, image_valid, label_valid, unrolled)

Instructions for updating:
Use `tf.cast` instead.


In [16]:
with tf.Session() as sess:
    sess.run(init)
    tf.initialize_all_variables().run()
    out = sess.run(opt[0])