In [1]:
import tensorflow as tf
import numpy as np
import utils

from model_search import Network
from architect_graph import Architect
from genotypes import Genotype

In [2]:
def get_model_theta(model):
    specific_tensor = []
    specific_tensor_name = []
    for var in model.trainable_weights:
        if not 'alphas' in var.name:
            specific_tensor.append(var)
            specific_tensor_name.append(var.name)
    return specific_tensor

args = {
    "momentum": 0.9,
    "weight_decay": 3e-4,
    "arch_learning_rate": 3e-1,
    "momentum": 0.9,
    "grad_clip": 5,
    "learning_rate": 0.025,
    "learning_rate_decay": 0.97,
    "learning_rate_min": 0.0001,
    "num_batches_per_epoch": 2000,
    
    "unrolled": True,
    "epochs": 10,
    "train_batch_size": 2,
    "eval_batch_size": 2,
    "save": "EXP",
    "init_channels": 3,
    "num_layers": 3,
    "num_classes": 6,
    "crop_size": [8, 8],
    "save_checkpoints_steps": 100,
    "model_dir": 'gs://unet-darts/train-search-ckptss',
    "max_steps": 10000,
    # NEW
    "steps_per_eval": 2,
    "num_train_examples": 16,
    #
    
    "use_tpu": False,
    "use_host_call": True,
    "tpu": 'unet-darts',
    "zone": 'us-central1-f',
    "project": "isro-nas",
}
args.update({"num_batches_per_epoch": args["num_train_examples"] // args["train_batch_size"]})

class Struct:
    def __init__(self, **entries):
        self.__dict__.update(entries)

args = Struct(**args)

criterion = tf.losses.sigmoid_cross_entropy
model = Network(3, 3, criterion, num_classes=args.num_classes)
lr=args.learning_rate
unrolled=args.unrolled
W, H = args.crop_size[0], args.crop_size[1]
NUM_IMAGES = 20
x_train = np.random.randint(0, 256, (NUM_IMAGES, W, H, 3)).astype(np.float32)
y_train = np.random.randint(0, args.num_classes, (NUM_IMAGES, W, H, 1)).astype(np.float32)
x_valid = np.random.randint(0, 256, (NUM_IMAGES, W, H, 3)).astype(np.float32)
y_valid = np.random.randint(0, args.num_classes, (NUM_IMAGES, W, H, 1)).astype(np.float32)
ds_train = tf.data.Dataset.from_tensor_slices((x_train, y_train)).batch(args.train_batch_size, drop_remainder=True)
ds_valid = tf.data.Dataset.from_tensor_slices((x_valid, y_valid)).batch(args.train_batch_size, drop_remainder=True)
it_train = ds_train.make_one_shot_iterator()
image, label = it_train.get_next()
it_valid = ds_valid.make_one_shot_iterator()
image_valid, label_valid = it_valid.get_next()
init = tf.global_variables_initializer()
_ = model(image)
architect = Architect(model, args)

global_step = tf.train.get_global_step()
learning_rate_min = tf.constant(args.learning_rate_min)

learning_rate = tf.train.exponential_decay(
    args.learning_rate,
    global_step,
    decay_rate=args.learning_rate_decay,
    decay_steps=args.num_batches_per_epoch,
    staircase=True,
    )
lr = tf.maximum(learning_rate, learning_rate_min)

b, w, h, c = label.shape
y = tf.reshape(tf.cast(label, tf.int64), (b, w, h))
y = tf.one_hot(y, args.num_classes, on_value=1.0, off_value=0.0)






Instructions for updating:
Use `for ... in dataset:` to iterate over a dataset. If using `tf.estimator`, return the `Dataset` object directly from your input function. As a last resort, you can use `tf.compat.v1.data.make_one_shot_iterator(dataset)`.

Instructions for updating:
If using Keras pass *_constraint arguments to layers.





In [3]:
arch_step = architect.step(image, label, image_valid, label_valid, unrolled)

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where




In [4]:
w_var = model.get_thetas()
preds = model(image)
loss = model._loss(preds, label)
optimizer = tf.train.MomentumOptimizer(lr, args.momentum)
train_op = optimizer.minimize(loss, var_list=w_var, global_step=tf.train.get_global_step())

In [5]:
arch_params = model.arch_parameters()
alphas_normal, alphas_reduce = arch_params[0], arch_params[1]
# Add ops to save and restore all the variables.
saver = tf.train.Saver()

In [6]:
with tf.Session() as sess:
    sess.run(init)
    tf.initialize_all_variables().run()
    w_var_o = sess.run(w_var)
    alphas_normal_bo, alphas_reduce_bo = sess.run([alphas_normal, alphas_reduce])
    print("Computing Arch Step")
    sess.run(arch_step)
    alphas_normal_ao, alphas_reduce_ao = sess.run([alphas_normal, alphas_reduce])
    print("Computing Train Step")
    sess.run(train_op)
    w_var_ao = sess.run(w_var)
    save_path = saver.save(sess, "./final_model/shit_model.ckpt")
    print("Model saved in path: %s" % save_path)

Instructions for updating:
Use `tf.global_variables_initializer` instead.
Computing Arch Step
Computing Train Step
Model saved in path: ./final_model/shit_model.ckpt


## Check alphas

In [7]:
alphas_normal_bo

array([[7.12536159e-04, 7.32944871e-04, 9.54648887e-04, 7.41222757e-04],
       [1.92740205e-04, 2.85102869e-04, 4.22806508e-04, 8.78524093e-04],
       [8.07227509e-04, 7.91816856e-04, 9.16260644e-04, 3.12135584e-04],
       [6.07450493e-04, 4.67423699e-04, 4.86078876e-04, 7.58790062e-04],
       [1.77840833e-04, 5.76221964e-06, 8.09587538e-04, 9.34876851e-04],
       [2.95954233e-04, 1.99510701e-04, 4.28048748e-04, 7.74598157e-04],
       [4.27998079e-04, 8.43071379e-04, 6.53319061e-04, 3.91634967e-04],
       [9.56513337e-04, 5.22855553e-05, 3.46614630e-04, 8.97666265e-04],
       [2.59233260e-04, 1.50762688e-04, 7.16523558e-04, 9.15468379e-04],
       [1.24872931e-05, 6.87128224e-04, 4.30633314e-04, 4.58354625e-04],
       [1.69412029e-04, 2.34465129e-04, 3.21025145e-04, 6.04183006e-04],
       [1.70330648e-04, 9.72496462e-04, 2.30917110e-04, 1.98182592e-04],
       [4.84933407e-04, 9.02137544e-04, 4.99655958e-04, 1.16319665e-04],
       [7.15676229e-04, 2.81204004e-04, 8.78347550e

In [8]:
alphas_normal_ao

array([[7.1198615e-04, 7.2746788e-04, 9.6007105e-04, 7.4182759e-04],
       [1.8302909e-04, 2.9186808e-04, 4.3197046e-04, 8.7230606e-04],
       [8.0202968e-04, 7.9980376e-04, 9.1616018e-04, 3.0944683e-04],
       [5.9581187e-04, 4.7895222e-04, 4.9409480e-04, 7.5088424e-04],
       [1.6830792e-04, 1.2588678e-05, 8.1552973e-04, 9.3164120e-04],
       [2.9931555e-04, 1.9713894e-04, 4.2687057e-04, 7.7478675e-04],
       [4.1851398e-04, 8.5141859e-04, 6.6222821e-04, 3.8386270e-04],
       [9.5327944e-04, 5.5400473e-05, 3.5133364e-04, 8.9306623e-04],
       [2.5579846e-04, 1.5495979e-04, 7.2160648e-04, 9.0962322e-04],
       [9.9319368e-06, 6.9085346e-04, 4.3143576e-04, 4.5638229e-04],
       [1.6581672e-04, 2.4000688e-04, 3.2327723e-04, 5.9998449e-04],
       [1.6850508e-04, 9.7663119e-04, 2.3424468e-04, 1.9254586e-04],
       [4.7988590e-04, 9.0659113e-04, 5.0395430e-04, 1.1261527e-04],
       [7.0980861e-04, 2.8840150e-04, 8.8462618e-04, 3.2511406e-04]],
      dtype=float32)

In [9]:
np.equal(alphas_normal_bo, alphas_normal_ao)

array([[False, False, False, False],
       [False, False, False, False],
       [False, False, False, False],
       [False, False, False, False],
       [False, False, False, False],
       [False, False, False, False],
       [False, False, False, False],
       [False, False, False, False],
       [False, False, False, False],
       [False, False, False, False],
       [False, False, False, False],
       [False, False, False, False],
       [False, False, False, False],
       [False, False, False, False]])

## Check w_var

In [10]:
w_var_o[0]

array([[[[-3.44002545e-02, -1.95918381e-01,  1.27433315e-01,
          -2.09924355e-01, -1.80527866e-02,  7.20191747e-02,
          -3.11945081e-02, -1.07923865e-01, -1.94553614e-01],
         [ 4.17596102e-03,  7.32188970e-02,  2.52482742e-02,
          -1.71959430e-01, -9.04804617e-02,  6.78667575e-02,
           6.45583421e-02, -1.84949189e-01,  9.80690867e-02],
         [-1.97424665e-01,  1.63378432e-01, -1.32725090e-01,
           2.25142345e-01, -2.21933275e-01,  4.13633138e-02,
          -7.43439496e-02, -5.44528812e-02, -1.12978280e-01]],

        [[-1.03536546e-01, -1.98178440e-02,  1.99012980e-01,
           7.96629339e-02, -7.22578466e-02, -1.48295194e-01,
           1.04676977e-01, -2.10286930e-01,  2.21435890e-01],
         [-1.07301041e-01, -1.66118085e-01, -2.28298008e-01,
           1.17229715e-01,  1.84945598e-01,  1.51183948e-01,
          -9.17023867e-02, -1.41135857e-01,  2.32118472e-01],
         [ 6.20699376e-02, -1.36453003e-01,  9.61616635e-03,
          -8.0998

In [11]:
w_var_ao[0]

array([[[[-0.03442837, -0.19600122,  0.12719032, -0.20992821,
          -0.01833246,  0.07180147, -0.03064003, -0.10792386,
          -0.19448635],
         [ 0.00414314,  0.07310251,  0.02488243, -0.17197381,
          -0.0906544 ,  0.06768876,  0.06518925, -0.18494917,
           0.09822389],
         [-0.1974354 ,  0.16325174, -0.13307647,  0.22510232,
          -0.22217709,  0.04119756, -0.07348753, -0.05445288,
          -0.11283895]],

        [[-0.10354199, -0.01992343,  0.19863144,  0.07962783,
          -0.07248777, -0.14840886,  0.10554582, -0.21028695,
           0.22161986],
         [-0.10730677, -0.16625865, -0.22848445,  0.11719748,
           0.18475099,  0.15106677, -0.09111515, -0.14113586,
           0.23229635],
         [ 0.0620399 , -0.13657337,  0.0093889 , -0.0810284 ,
          -0.15057312, -0.23169768, -0.0992375 , -0.08982612,
          -0.08481759]],

        [[ 0.04264848,  0.23551881, -0.1929766 , -0.06323685,
          -0.04846992, -0.09720816,  0.1750691

In [12]:
np.equal(w_var_o[0], w_var_ao[0])

array([[[[False, False, False, False, False, False, False, False,
          False],
         [False, False, False, False, False, False, False, False,
          False],
         [False, False, False, False, False, False, False, False,
          False]],

        [[False, False, False, False, False, False, False, False,
          False],
         [False, False, False, False, False, False, False,  True,
          False],
         [False, False, False, False, False, False, False,  True,
          False]],

        [[False, False, False, False, False, False, False, False,
          False],
         [False, False, False, False, False, False, False,  True,
          False],
         [False, False, False, False, False, False, False,  True,
          False]]],


       [[[False, False, False, False, False, False, False, False,
          False],
         [False, False, False, False, False, False, False, False,
          False],
         [False, False, False, False, False, False, False, False,
  

## Load Weights

## Other way to check w_var

In [30]:
grads = tf.gradients(loss, w_var)
train_op2 = optimizer.apply_gradients(zip(grads, w_var))
with tf.Session() as sess:
    sess.run(init)
    tf.initialize_all_variables().run()
    grads_o = sess.run(grads)
    w_var_o2 = sess.run(w_var)
    sess.run(train_op2)
    w_var_ao2 = sess.run(w_var)