In [1]:
import tensorflow as tf
from tensorflow.keras import Model
from genotypes import PRIMITIVES
from genotypes import Genotype
from operations import *
import numpy as np
from utils import get_tensor_at

# Mixed Op

In [8]:
USE_BFLOAT = True

class MixedOp(Model):
    """Makes mixed operations object
    """

    def __init__(self, C, stride):
        """Makes ops array of all the arrays

        Args:
            C (int): the current state
        """
        super(MixedOp, self).__init__()
        self._ops = []
        for primitive in PRIMITIVES:
                op = OPS[primitive](C, stride)
                if 'pool' in primitive:
                    if('avg' in primitive):
                        op = AvgPool3x3(C, stride)
                    elif('max' in primitive):
                        op = MaxPool3x3(C, stride)
                self._ops.append(op)
        
    def call(self, x, weights):
        """Converts the discrete set of operation into conitnuous mixed operation

        Args:
            x (tensor): can be tensor or array, (e.g. image-array)
            weights (tensor): tensor or array of Softmax probability of alphas

        Returns:
            tensor: sum of product(weights, operation(x))
        """
        computed_ops = tf.convert_to_tensor([op(x) for op in self._ops])
        weights = tf.reshape(weights, (weights.shape[0], 1, 1, 1, 1))
        if(USE_BFLOAT):
            computed_ops = tf.cast(computed_ops, dtype=tf.bfloat16, name='computed_ops_to_bfloat16')
            weights = tf.cast(computed_ops, dtype=tf.bfloat16, name='weights_to_bfloat16')
        return tf.reduce_sum(weights * computed_ops, axis=0)

In [9]:
mix_op = MixedOp(3, 1)
weights = tf.random_uniform(minval=0, maxval=2, shape=[4])
np_ds_train = (np.random.randint(0, 256, (20, 16, 16, 3)).astype(np.float32), np.random.randint(0, 2, (20, 16, 16, 1)).astype(np.float32))
ds_train = tf.data.Dataset.from_tensor_slices(np_ds_train).batch(2, drop_remainder=True)

init = tf.global_variables_initializer()
it = ds_train.make_one_shot_iterator()
image, label = it.get_next()

init = tf.global_variables_initializer()

out = mix_op(image, weights)

(<tf.Tensor 'mixed_op_3/computed_ops_to_bfloat16:0' shape=(8, 2, 16, 16, 3) dtype=bfloat16>, <tf.Tensor 'mixed_op_3/computed_ops_to_bfloat16:0' shape=(8, 2, 16, 16, 3) dtype=bfloat16>)


In [4]:
with tf.Session() as sess:
    tf.initialize_all_variables().run()
    sess.run(init)
    res = sess.run(out)
print(res.shape)

Instructions for updating:
Use `tf.global_variables_initializer` instead.
(2, 16, 16, 3)


# Cell

In [5]:
class Cell(Model):

    def __init__(self, steps, multiplier, C_prev_prev, C_prev, C, reduction, reduction_prev, upsample_prev):
        super(Cell, self).__init__()
        self.reduction = reduction

        if reduction_prev:
            self.preprocess0 = FactorizedReduce(C_prev_prev, C)
        elif upsample_prev:
            self.preprocess0 = FactorizedUp(C_prev_prev, C)
        else:
            self.preprocess0 = ReLUConvBN(C_prev_prev, C, 1, 1, 'same')
        self.preprocess1 = ReLUConvBN(C_prev, C, 1, 1, 'same')
        self._steps = steps
        self._multiplier = multiplier

        self._ops = []
        for i in range(self._steps):
            for j in range(2+i):
                stride = 2 if reduction and j < 2 else 1
                op = MixedOp(C, stride)
                self._ops.append(op)

    def call(self, s0, s1, weights):
    
        s0 = self.preprocess0(s0)
        s1 = self.preprocess1(s1)
        states = [s0, s1]
        offset = 0
    
        for i in range(self._steps):
            s = sum(self._ops[offset+j](h, weights[offset+j]) for j, h in enumerate(states))
            offset += len(states)
            states.append(s)

        return tf.concat(states[-self._multiplier:], axis=-1)

## Cell Test

In [6]:
cell = Cell(steps=4,
            multiplier=4,
            C_prev_prev=3,
            C_prev=3,
            C=3,
            reduction=False,
            reduction_prev=False,
            upsample_prev=False)

In [7]:
weights = tf.random_uniform([14,4], 0, 1)
s0 = tf.random_uniform([2, 16, 16, 3], 0, 255)
s1 = tf.random_uniform([2, 16, 16, 3], 0, 255)

init = tf.global_variables_initializer()
cell_out = cell(s0, s1, weights)

In [8]:
with tf.Session() as sess:
    tf.initialize_all_variables().run()
    sess.run(init)
    res = sess.run(cell_out)

In [9]:
print(res.shape)

(2, 16, 16, 12)


# Upsample Cell

In [10]:
class UpsampleCell(Model):

    def __init__(self, steps, multiplier, C_prev_prev, C_prev, C):
        super(UpsampleCell, self).__init__()
        self.preprocess0 = ReLUConvBN(C_prev_prev, C, 1, 1, 'same')
        self.preprocess1 = ReLUConvBN(C_prev, C, 1, 1, 'same')
        self._steps = steps
        self._multiplier = multiplier
        self.UpConv = layers.Conv2DTranspose(C*self._multiplier, 
                                        kernel_size=3,
                                        strides=2,
                                        padding='same')
        self.reduction = False

    def call(self, s0, s1, weights):
        s0 = self.preprocess0(s0)
        s1 = self.preprocess1(s1)

        s0 = self.UpConv(s0)
        s1 = self.UpConv(s1)

        return s0 + s1


## Upsample Cell Test

In [11]:
up_cell = UpsampleCell(4, 4, 3, 3, 3)

In [12]:
s0 = np.random.rand(2, 16, 16, 3).astype(np.float32)
s1 = np.random.rand(2, 16, 16, 3).astype(np.float32)

init = tf.global_variables_initializer()

up_cell_out = up_cell(s0, s1, [])
with tf.Session() as sess:
    tf.initialize_all_variables().run()
    sess.run(init)
    res = sess.run(up_cell_out)
print(res.shape)

(2, 32, 32, 12)


# Network

In [28]:
class Network(Model):

    def __init__(self, C, net_layers, criterion, steps=4, multiplier=4, stem_multiplier=3, num_classes=1):
        super(Network, self).__init__()
        self._C = C
        self._criterion = criterion
        self._steps = steps
        self._multiplier = multiplier
        self.net_layers = net_layers
        self.num_classes = num_classes
        
        C_curr = C*stem_multiplier

        # stem operation
        self.stem_op = tf.keras.Sequential()
        self.stem_op.add(tf.keras.layers.Conv2D(
            C_curr, kernel_size=3, padding='same', use_bias=False))
        self.stem_op.add(tf.keras.layers.BatchNormalization())

        C_prev_prev, C_prev, C_curr = C_curr, C_curr, C
        self.cells = []
        self.skip_ops = []

        reduction_prev = False

        # For reduction
        for i in range(self.net_layers):
            if i % 2 == 1:
                C_curr *= 2
                reduction = True
            else:
                reduction = False
                self.skip_ops += [SkipConnection(C_curr)]
            cell = Cell(steps, multiplier, C_prev_prev, C_prev, C_curr,
                        reduction,
                        reduction_prev,
                        upsample_prev=False)
            reduction_prev = reduction
            self.cells += [cell]

            C_prev_prev, C_prev = C_prev, multiplier*C_curr

        for i in range(self.net_layers-1):
            if i % 2 == 0:
                C_curr = C_curr // 2

                cell = UpsampleCell(steps, multiplier,
                                    C_prev_prev, C_prev, C_curr)
            else:
                cell = Cell(steps, multiplier, C_prev_prev, C_prev, C_curr,
                            reduction=False,
                            reduction_prev=False,
                            upsample_prev=True)
            self.cells += [cell]
            C_prev_prev, C_prev = C_prev, multiplier*C_curr
        
        self.softmaxConv = tf.keras.Sequential(name="softmaxConv")
        self.softmaxConv.add(tf.keras.layers.Conv2D(
            self.num_classes, kernel_size=1, strides=1, padding='same'))
        self.softmaxConv.add(Softmax())

        self._initialize_alphas()

    def _initialize_alphas(self):
        k = sum(1 for i in range(self._steps) for n in range(2+i))
        num_ops = len(PRIMITIVES)
        alphas_normal = lambda: 1e-3*tf.random.uniform([k, num_ops])
        alphas_reduce = lambda: 1e-3*tf.random.uniform([k, num_ops])
        self.alphas_normal = tf.Variable(
            alphas_normal, name='alphas_normal')
        self.alphas_reduce = tf.Variable(
            alphas_reduce, name='alphas_reduce')
        self._arch_parameters = [
            self.alphas_normal,
            self.alphas_reduce,
        ]

    def arch_parameters(self):
        return self._arch_parameters

    def new(self):
        model_new = Network(self._C, self.net_layers, self._criterion, num_classes=self.num_classes)
        for x, y in zip(model_new.arch_parameters(), self.arch_parameters()):
            x.assign(y)
        return model_new

    def _loss(self, logits, target):
        b, w, h, c = target.shape
        y = tf.reshape(tf.cast(target, tf.int64), (b, w, h))
        y = tf.one_hot(y, self.num_classes, on_value=1.0, off_value=0.0)
        return self._criterion(y, logits)

    def call(self, inp):
        s0 = s1 = self.stem_op(inp)
        self.arr = []
        ids = []
        pos = -1

        middle = self.net_layers - 1
        for i, cell in enumerate(self.cells):
            if cell.reduction:
                weights = tf.nn.softmax(self.alphas_reduce, axis=-1)
            else:
                weights = tf.nn.softmax(self.alphas_normal, axis=-1)

            s0, s1 = s1, cell(s0, s1, weights)

            if (i < middle and i % 2 == 0):
                self.arr.append(s1)
                ids.append(i)

            if (i > middle and i % 2 == 1):
                C_curr = s1.shape[1]
                s1 = self.skip_ops[-pos-1](self.arr[pos], s1)
                pos -= 1
        return self.softmaxConv(s1)
    
    def genotype(self, alphas_normal, alphas_reduce):
            def _parse(weights):
                gene = []
                n = 2
                start = 0
                for i in range(self._steps):
                    end = start + n
                    W = weights[start:end].copy()
                    edges = sorted(range(i + 2), key=lambda x: -max(W[x][k] for k in range(len(W[x])) if k != PRIMITIVES.index('none')))[:2]
                    for j in edges:
                        k_best = None
                        for k in range(len(W[j])):
                            if k != PRIMITIVES.index('none'):
                                if k_best is None or W[j][k] > W[j][k_best]:
                                    k_best = k
                        gene.append((PRIMITIVES[k_best], j))
                    start = end
                    n += 1
                return gene

            gene_normal = _parse(alphas_normal)
            gene_reduce = _parse(alphas_reduce)

            concat = range(2+self._steps-self._multiplier, self._steps+2)
            genotype = Genotype(
              normal=gene_normal, normal_concat=concat,
              reduce=gene_reduce, reduce_concat=concat
            )
            return genotype


## Network Test

In [29]:
criterion = tf.losses.sigmoid_cross_entropy
network = Network(3, 3, criterion, num_classes=6)

In [30]:
init = tf.global_variables_initializer()
a = network.arch_parameters()[0]
with tf.Session() as sess:
    sess.run(init)
    out = sess.run(a)

In [31]:
np_ds_train = (np.random.randint(0, 256, (20, 16, 16, 3)).astype(np.float32), np.random.randint(0, 2, (20, 16, 16, 1)).astype(np.float32))
ds_train = tf.data.Dataset.from_tensor_slices(np_ds_train).batch(2, drop_remainder=True)

init = tf.global_variables_initializer()

it = ds_train.make_one_shot_iterator()
image, label = it.get_next()

In [32]:
net_out = network(image)

In [34]:
with tf.Session() as sess:
    sess.run(init)
    tf.initialize_all_variables().run()
    res = sess.run(net_out)

In [35]:
def get_model_theta(model):
    specific_tensor = []
    specific_tensor_name = []
    for var in model.trainable_weights:
        if not 'alphas' in var.name:
            specific_tensor.append(var)
            specific_tensor_name.append(var.name)
    return specific_tensor

In [37]:
model = network
arch_var = model.arch_parameters()
w_var = get_model_theta(model)
logits = model(image)
train_loss = model._loss(logits, label)
train_grads = tf.gradients(train_loss, w_var)

In [38]:
with tf.Session() as sess:
    sess.run(init)
    tf.initialize_all_variables().run()
    out1 = sess.run(train_grads)

In [None]:
out1

# Testing Here!

In [109]:
import numpy as np

In [None]:
s0 = np.random.rand(2, 16, 16, 6).astype(np.float32)
conv = tf.keras.Sequential()
conv.add(ReLUConvBN(6, 6, 1, 1, 'same'))
conv.add(FactorizedReduce(6,6))
conv(s0)

In [None]:
var = tf.Variable([[0.9], [0.5]])
var2 = tf.Variable([[0.9], [0.5]])

In [None]:
Zero(2).name

# Genotype

In [6]:
@tf.function
def _parse(weights):
    with tf.variable_scope("parse"):
        primitives = tf.convert_to_tensor(PRIMITIVES)
        gene = []
        n = 2
        start = 0
        for i in range(steps):
            end = start + n
            W = tf.identity(weights[start:end], name="alphas_for_op_strided")
            none_idx = PRIMITIVES.index('none')
            # make none_weight to be -inf
            mask = np.ones(W.shape)
            mask[:, none_idx] = -1
            W = W * tf.convert_to_tensor(mask, dtype=W.dtype)        
            # calc of edges
            W_sorted = tf.sort(W, axis=-1, direction='DESCENDING', name='sorted_weights')
            edges = tf.argsort(W_sorted[:,0], axis=-1, direction='DESCENDING', name='edges')[:2]

            for idx in range(edges.shape[0]):
                j = edges[idx]
                j = tf.identity(j, name="selected_op")
#                 tf.get_default_graph().add_to_collection(j.name, j)
                k_best = tf.argsort(W, axis=-1, direction='DESCENDING', name='k_best')[j][0]
                sel_block = primitives[k_best]
                sel_block = tf.identity(sel_block, name="selected_block")
#                 tf.get_default_graph().add_to_collection(sel_block.name, sel_block)
                gene.append((sel_block, j))
            start = end
            n += 1
        return gene

In [7]:
k = sum(1 for i in range(4) for n in range(2+i))
num_ops = len(PRIMITIVES)
weights = tf.nn.softmax(tf.Variable(1e-3*tf.random.uniform([k, num_ops]), name='weights'), axis=-1)

In [12]:
steps = 4
def calc_gene():
    return _parse(weights)

def run_gene(session):
    res = calc_gene()
    return session.run(res)

def make_genotype():
    gene_normal = _parse(tf.nn.softmax(weights, axis=-1))
    gene_reduce = _parse(tf.nn.softmax(weights, axis=-1))

    concat = range(2+steps-2, steps+2)
    genotype = Genotype(
        normal=gene_normal, normal_concat=concat,
        reduce=gene_reduce, reduce_concat=concat
    )
    return genotype

def ret_genotype(session):
    genotype = make_genotype()
    gene_normal_op = genotype.normal
    return run_gene(session)

In [13]:
calc_gene()

[(<tf.Tensor 'PartitionedCall:0' shape=() dtype=string>,
  <tf.Tensor 'PartitionedCall:1' shape=() dtype=int32>),
 (<tf.Tensor 'PartitionedCall:2' shape=() dtype=string>,
  <tf.Tensor 'PartitionedCall:3' shape=() dtype=int32>),
 (<tf.Tensor 'PartitionedCall:4' shape=() dtype=string>,
  <tf.Tensor 'PartitionedCall:5' shape=() dtype=int32>),
 (<tf.Tensor 'PartitionedCall:6' shape=() dtype=string>,
  <tf.Tensor 'PartitionedCall:7' shape=() dtype=int32>),
 (<tf.Tensor 'PartitionedCall:8' shape=() dtype=string>,
  <tf.Tensor 'PartitionedCall:9' shape=() dtype=int32>),
 (<tf.Tensor 'PartitionedCall:10' shape=() dtype=string>,
  <tf.Tensor 'PartitionedCall:11' shape=() dtype=int32>),
 (<tf.Tensor 'PartitionedCall:12' shape=() dtype=string>,
  <tf.Tensor 'PartitionedCall:13' shape=() dtype=int32>),
 (<tf.Tensor 'PartitionedCall:14' shape=() dtype=string>,
  <tf.Tensor 'PartitionedCall:15' shape=() dtype=int32>)]

In [21]:
init = tf.global_variables_initializer()
with tf.Session() as sess:
    sess.run(init)
    out = ret_genotype(sess)

In [23]:
out

[('avg_pool_3x3', 1),
 ('max_pool_3x3', 0),
 ('max_pool_3x3', 1),
 ('avg_pool_3x3', 0),
 ('max_pool_3x3', 0),
 ('max_pool_3x3', 1),
 ('max_pool_3x3', 4),
 ('max_pool_3x3', 1)]

In [24]:
tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='parse')

[]

In [22]:
tf.get_default_graph().get_all_collection_keys()

['local_variables', 'variables', 'trainable_variables']