In [1]:
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()
import numpy as np
import utils

# Concat

In [2]:
def _concat(xs):
    """nd tensor to 1d tensor

    Args:
        xs (array): the array of nd tensor

    Returns:
        array: concated array
    """
    return tf.concat([tf.reshape(x, [tf.size(x)]) for x in xs], axis=0, name="_concat")

## Testing

In [7]:
a = tf.constant([[[[1],[2],[3]], [[4], [5], [6]]], [[[2],[4],[6]], [[8], [10], [12]]]])
b = tf.constant([1,2])

In [8]:
tf.concat(tf.reshape(a, [tf.size(a)]), axis=-1)

<tf.Tensor 'concat/concat:0' shape=(12,) dtype=int32>

# Architect

In [3]:
class Architect(object):
    """Constructs the model

    Parameters:
      network_momentum(float):  network momentum
      network_weight_decay(float): network weight decay
      model(Network): Network archtecture with cells
      optimise(optimiser): Adam / SGD
    """

    def __init__(self, model, args):
        """Initialises the architecture

        Args:
            model (Network): Network archtecture with cells
            args (dict): cli args
        """
        self.network_momentum = args.momentum
        self.network_weight_decay = args.weight_decay
        self.model = model
        self.use_tpu = args.use_tpu
        
        self.arch_learning_rate = args.arch_learning_rate
        self.optimizer = tf.train.AdamOptimizer(learning_rate=self.arch_learning_rate,
                                                beta1=0.5,
                                                beta2=0.999)
        if(self.use_tpu):
            self.optimizer = tf.tpu.CrossShardOptimizer(self.optimizer)
            
        global_step = tf.train.get_or_create_global_step()
        learning_rate_min = tf.constant(args.learning_rate_min)

        learning_rate = tf.train.exponential_decay(
            args.learning_rate,
            global_step,
            decay_rate=args.learning_rate_decay,
            decay_steps=args.num_batches_per_epoch,
            staircase=True,
        )

        lr = tf.maximum(learning_rate, learning_rate_min)
        
        self.learning_rate = lr

    def get_model_theta(self, model):
        specific_tensor = []
        specific_tensor_name = []
        for var in model.trainable_weights:
            if not 'alphas' in var.name:
                specific_tensor.append(var)
                specific_tensor_name.append(var.name)
        return specific_tensor
    
    def step(self, input_train, target_train, input_valid, target_valid, unrolled):
        """Computer a step for gradient descend

        Args:
            input_train (tensor): a train of input
            target_train (tensor): a train of targets
            input_valid (tensor): a train of validation
            target_valid (tensor): a train of validation targets
            eta (tensor): eta
            network_optimizer (optimiser): network optimiser for network
            unrolled (bool): True if training we need unrolled
        """
        if unrolled:
            w_regularization_loss = 0.25
            logits = self.model(input_train)
            train_loss = self.model._loss(logits, target_train)
            train_loss += 1e4*0.25*w_regularization_loss
            return self._compute_unrolled_step(input_train, 
                                               target_train, 
                                               input_valid, 
                                               target_valid,
                                               self.get_model_theta(self.model),
                                               train_loss,
                                               self.learning_rate
                                              )
        else:
            return self._backward_step(input_valid, target_valid)
        
    
    def _compute_unrolled_step(self, x_train, y_train, x_valid, y_valid, w_var, train_loss, lr):
        arch_var = self.model.arch_parameters()
        
        unrolled_model = self.model.new()
        _ = unrolled_model(x_train)
        unrolled_w_var = self.get_model_theta(unrolled_model)
        copy_weight_opts = [v.assign(w) for v,w in zip(unrolled_w_var, w_var)]
        logits = unrolled_model(x_train)
        
        unrolled_train_loss = unrolled_model._loss(logits, y_train)  

        with tf.control_dependencies(copy_weight_opts):
            unrolled_optimizer = tf.train.GradientDescentOptimizer(lr)
            if(self.use_tpu):
                unrolled_optimizer = tf.tpu.CrossShardOptimizer(unrolled_optimizer)
            unrolled_optimizer = unrolled_optimizer.minimize(unrolled_train_loss, var_list=unrolled_w_var)

        valid_logits = unrolled_model(x_valid)
        valid_loss = unrolled_model._loss(valid_logits, y_valid)

        with tf.control_dependencies([unrolled_optimizer]):
            valid_grads = tf.gradients(valid_loss, unrolled_w_var)

        r=1e-2
        R = r / (tf.global_norm(valid_grads)+1e-6)

        optimizer_pos=tf.train.GradientDescentOptimizer(R)
        if(self.use_tpu):
            optimizer_pos = tf.tpu.CrossShardOptimizer(optimizer_pos)
        optimizer_pos=optimizer_pos.apply_gradients(zip(valid_grads, w_var))

        optimizer_neg=tf.train.GradientDescentOptimizer(-2*R)
        if(self.use_tpu):
            optimizer_neg = tf.tpu.CrossShardOptimizer(optimizer_neg)
        optimizer_neg=optimizer_neg.apply_gradients(zip(valid_grads, w_var))

        optimizer_back=tf.train.GradientDescentOptimizer(R)
        if(self.use_tpu):
            optimizer_back = tf.tpu.CrossShardOptimizer(optimizer_back)
        optimizer_back=optimizer_back.apply_gradients(zip(valid_grads, w_var))
        
        with tf.control_dependencies([optimizer_pos]):
            train_grads_pos=tf.gradients(train_loss, arch_var)
            with tf.control_dependencies([optimizer_neg]):
                train_grads_neg=tf.gradients(train_loss,arch_var)
                with tf.control_dependencies([optimizer_back]):
                    leader_opt= self.optimizer
                    leader_grads=tf.gradients(valid_loss, unrolled_model.arch_parameters())
        
        for i,g in enumerate(leader_grads):
            leader_grads[i]= g - self.learning_rate * tf.divide(train_grads_pos[i]-train_grads_neg[i],2*R)

        leader_opt=leader_opt.apply_gradients(zip(leader_grads, arch_var))
        return leader_opt
    
    def _backward_step(self, input_valid, target_valid):
        """Backward step for validation

        Args:
            input_train (tensor): a train of input
            target_train (tensor): a train of targets
        """
        loss = self.model._loss(self.model(input_valid), target_valid)
        opt = self.optimizer.minimize(loss, var_list=model.get_weights())
        return opt

## Testing

In [4]:
from model_search import Network

In [5]:
criterion = tf.losses.sigmoid_cross_entropy
model = Network(3, 3, criterion, num_classes=)
args = {
    "momentum": 0.9,
    "weight_decay": 3e-4,
    "arch_learning_rate": 3e-1,
    "momentum": 0.9,
    "grad_clip": 5,
    "learning_rate": 0.025,
    "learning_rate_decay": 0.97,
    "learning_rate_min": 0.0001,
    "num_batches_per_epoch": 2000,
    
    "unrolled": True,
    "epochs": 10,
    "train_batch_size": 2,
    "eval_batch_size": 2,
    "save": "EXP",
    "init_channels": 3,
    "num_layers": 3,
    "num_classes": 6,
    "crop_size": [8, 8],
    "save_checkpoints_steps": 100,
    "model_dir": 'gs://unet-darts/train-search-ckptss',
    "max_steps": 10000,
    # NEW
    "steps_per_eval": 2,
    "num_train_examples": 16,
    #
    
    "use_tpu": False,
    "use_host_call": True,
    "tpu": 'unet-darts',
    "zone": 'us-central1-f',
    "project": "isro-nas",
}
args.update({"num_batches_per_epoch": args["num_train_examples"] // args["train_batch_size"]})

class Struct:
    def __init__(self, **entries):
        self.__dict__.update(entries)

args = Struct(**args)







In [6]:
W, H = args.crop_size[0], args.crop_size[1]
NUM_IMAGES = 20
x_train = np.random.randint(0, 256, (NUM_IMAGES, W, H, 3)).astype(np.float32)
y_train = np.random.randint(0, args.num_classes, (NUM_IMAGES, W, H, 1)).astype(np.float32)
x_valid = np.random.randint(0, 256, (NUM_IMAGES, W, H, 3)).astype(np.float32)
y_valid = np.random.randint(0, args.num_classes, (NUM_IMAGES, W, H, 1)).astype(np.float32)

ds_train = tf.data.Dataset.from_tensor_slices((x_train, y_train)).batch(args.train_batch_size, drop_remainder=True)
ds_valid = tf.data.Dataset.from_tensor_slices((x_valid, y_valid)).batch(args.train_batch_size, drop_remainder=True)

it_train = ds_train.make_one_shot_iterator()
image, label = it_train.get_next()
it_valid = ds_valid.make_one_shot_iterator()
image_valid, label_valid = it_valid.get_next()
lr=0.025
unrolled=True

Instructions for updating:
Use `for ... in dataset:` to iterate over a dataset. If using `tf.estimator`, return the `Dataset` object directly from your input function. As a last resort, you can use `tf.compat.v1.data.make_one_shot_iterator(dataset)`.


In [7]:
init = tf.global_variables_initializer()
res = model(image)


Instructions for updating:
If using Keras pass *_constraint arguments to layers.


In [8]:
architect = Architect(model, args)

In [9]:
opt = architect.step(image, label, image_valid, label_valid, unrolled)

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


In [41]:
with tf.Session() as sess:
    sess.run(init)
    tf.initialize_all_variables().run()
    out1 = sess.run(model.arch_parameters())
    out = sess.run(opt)
    out2 = sess.run(model.arch_parameters())

In [11]:
def get_model_theta(model):
    specific_tensor = []
    specific_tensor_name = []
    for var in model.trainable_weights:
        if not 'alphas' in var.name:
            specific_tensor.append(var)
            specific_tensor_name.append(var.name)
    return specific_tensor

In [18]:
arch_var = model.arch_parameters()
unrolled_model = model.new()
_ = unrolled_model(image)
unrolled_w_var = get_model_theta(unrolled_model)
w_var = get_model_theta(model)
copy_weight_opts = [v.assign(w) for v,w in zip(unrolled_w_var, w_var)]

In [19]:
with tf.control_dependencies(copy_weight_opts):
    logits = unrolled_model(image)
    unrolled_train_loss = unrolled_model._loss(logits, label)
    unrolled_w_var = get_model_theta(unrolled_model)
#     unrolled_optimizer = tf.train.GradientDescentOptimizer(lr)
#     unrolled_train_grads = unrolled_optimizer.compute_gradients(unrolled_train_loss, var_list=unrolled_w_var)
#     unrolled_optimizer_op = unrolled_optimizer.apply_gradients(unrolled_train_grads)
    unrolled_train_grads = tf.gradients(unrolled_train_loss, unrolled_w_var)

In [20]:
valid_logits = unrolled_model(image_valid)
valid_loss = unrolled_model._loss(valid_logits, label_valid)
valid_grads = tf.gradients(valid_loss, unrolled_w_var)

In [21]:
# r=1e-2
# R = r / (tf.global_norm(valid_grads)+1e-6)

# optimizer_pos=tf.train.GradientDescentOptimizer(R)
# optimizer_pos=optimizer_pos.apply_gradients(zip(valid_grads, w_var))

In [22]:
with tf.Session() as sess:
    sess.run(init)
    tf.initialize_all_variables().run()
    out1 = sess.run(unrolled_train_grads)
    out2 = sess.run(valid_grads)

In [45]:
out2

[array([[ 0.3001952 , -0.29948696,  0.3008377 ,  0.29996917],
        [ 0.30031642, -0.2996615 , -0.2997647 ,  0.30070767],
        [-0.2991369 ,  0.30045164,  0.3000093 , -0.2999449 ],
        [ 0.30068585, -0.29991263, -0.29915914,  0.30049714],
        [ 0.29826996,  0.29994002,  0.29957512, -0.29978627],
        [ 0.30017585, -0.2998143 , -0.29921976,  0.3006019 ],
        [ 0.30059466, -0.29950923,  0.30016547,  0.30075297],
        [ 0.30009156, -0.29971334, -0.29942992,  0.3000586 ],
        [ 0.3004881 , -0.29953355, -0.29924586,  0.30093056],
        [ 0.29992858,  0.30010808, -0.29989988,  0.30026817],
        [-0.29938245,  0.3005266 ,  0.3005949 , -0.2993926 ],
        [-0.29937303,  0.300259  ,  0.30051953, -0.29907057],
        [-0.29914486,  0.30060256,  0.30067992, -0.2994711 ],
        [-0.29946443,  0.30038452,  0.3004314 , -0.2999285 ]],
       dtype=float32),
 array([[ 0.30047026,  0.30066213, -0.2997903 ,  0.300108  ],
        [ 0.3003905 , -0.29819813, -0.29910982

In [12]:
alphas_normal = tf.get_variable("alphas_normal", [14, 4])
alphas_reduce = tf.get_variable("alphas_reduce", [14, 4])

In [19]:
alphas_normal_t = tf.get_variable('alphas_normal', initializer=alphas_normal)

In [16]:
saver = tf.train.Saver()
with tf.Session() as sess:
    sess.run(init)
#     print(sess.run(alphas_normal))
    saver.restore(sess, "../../train-search-ckpts/model.ckpt-100")
    print(sess.run(alphas_normal))

INFO:tensorflow:Restoring parameters from ../../train-search-ckpts/model.ckpt-100
[[7.6811708e-04 6.1610749e-04 7.3774339e-04 1.7251099e-04]
 [4.6491768e-04 1.1831308e-04 4.8414184e-04 8.2456821e-04]
 [3.8573553e-04 9.6499722e-04 4.1154864e-05 4.5638240e-04]
 [2.1470130e-04 8.5145357e-04 1.4969934e-04 6.1814004e-04]
 [7.2059088e-04 9.4683509e-04 9.7304612e-04 2.3769797e-04]
 [9.2476612e-06 7.6371187e-04 3.9916387e-04 6.2424364e-04]
 [4.5342889e-04 8.0606272e-04 4.9657130e-04 7.5611676e-04]
 [2.9467285e-04 9.4560866e-04 2.8576973e-04 2.9266250e-04]
 [1.9598247e-05 7.2383991e-04 7.3719054e-04 1.7768217e-04]
 [3.8440814e-04 8.4802596e-04 4.9985957e-04 5.6202081e-04]
 [4.0616884e-04 8.8108651e-04 2.6535750e-05 9.6354849e-04]
 [5.6696753e-04 1.9432831e-04 6.9372944e-04 2.0856393e-04]
 [5.4914324e-04 6.7978277e-04 6.0854066e-04 4.3868009e-04]
 [5.6436576e-04 2.2776485e-05 9.7615091e-04 5.3994311e-04]]


In [17]:
saver = tf.train.Saver()
with tf.Session() as sess:
    sess.run(init)
    saver.restore(sess, "../../train-search-ckpts/model.ckpt-200")
    print(sess.run(alphas_normal))

INFO:tensorflow:Restoring parameters from ../../train-search-ckpts/model.ckpt-200
[[7.6811708e-04 6.1610749e-04 7.3774339e-04 1.7251099e-04]
 [4.6491768e-04 1.1831308e-04 4.8414184e-04 8.2456821e-04]
 [3.8573553e-04 9.6499722e-04 4.1154864e-05 4.5638240e-04]
 [2.1470130e-04 8.5145357e-04 1.4969934e-04 6.1814004e-04]
 [7.2059088e-04 9.4683509e-04 9.7304612e-04 2.3769797e-04]
 [9.2476612e-06 7.6371187e-04 3.9916387e-04 6.2424364e-04]
 [4.5342889e-04 8.0606272e-04 4.9657130e-04 7.5611676e-04]
 [2.9467285e-04 9.4560866e-04 2.8576973e-04 2.9266250e-04]
 [1.9598247e-05 7.2383991e-04 7.3719054e-04 1.7768217e-04]
 [3.8440814e-04 8.4802596e-04 4.9985957e-04 5.6202081e-04]
 [4.0616884e-04 8.8108651e-04 2.6535750e-05 9.6354849e-04]
 [5.6696753e-04 1.9432831e-04 6.9372944e-04 2.0856393e-04]
 [5.4914324e-04 6.7978277e-04 6.0854066e-04 4.3868009e-04]
 [5.6436576e-04 2.2776485e-05 9.7615091e-04 5.3994311e-04]]


In [27]:
alphas_reduce

<tf.Variable 'alphas_reduce_1:0' shape=(14, 4) dtype=float32_ref>

In [9]:
variables = sess.graph.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)

NameError: name 'sess' is not defined

In [None]:
variables