In [1]:
import tensorflow as tf

import numpy as np
from tensorflow.keras.layers import Input, ZeroPadding2D, Dense, Dropout, Activation, Convolution2D
from tensorflow.keras.layers import AveragePooling2D, GlobalAveragePooling2D, MaxPooling2D, BatchNormalization

from tensorflow.keras import Model

from tensorflow.keras.layers import Layer, InputSpec
from tensorflow.keras import initializers
import tensorflow.keras.backend as K

print(tf.__version__)

2.0.0-alpha0


In [2]:
mnist = tf.keras.datasets.mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0

# Add a channels dimension
x_train = x_train[..., tf.newaxis]
x_test = x_test[..., tf.newaxis]

In [3]:
train_ds = tf.data.Dataset.from_tensor_slices(
    (x_train, y_train)).shuffle(10000).batch(32)
test_ds = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(32)


In [13]:
class Scale(Layer):
    '''Custom Layer for DenseNet used for BatchNormalization.
    
    Learns a set of weights and biases used for scaling the input data.
    the output consists simply in an element-wise multiplication of the input
    and a sum of a set of constants:
        out = in * gamma + beta,
    where 'gamma' and 'beta' are the weights and biases larned.
    # Arguments
        axis: integer, axis along which to normalize in mode 0. For instance,
            if your input tensor has shape (samples, channels, rows, cols),
            set axis to 1 to normalize per feature map (channels axis).
        momentum: momentum in the computation of the
            exponential average of the mean and standard deviation
            of the data, for feature-wise normalization.
        weights: Initialization weights.
            List of 2 Numpy arrays, with shapes:
            `[(input_shape,), (input_shape,)]`
        beta_init: name of initialization function for shift parameter
            (see [initializations](../initializations.md)), or alternatively,
            Theano/TensorFlow function to use for weights initialization.
            This parameter is only relevant if you don't pass a `weights` argument.
        gamma_init: name of initialization function for scale parameter (see
            [initializations](../initializations.md)), or alternatively,
            Theano/TensorFlow function to use for weights initialization.
            This parameter is only relevant if you don't pass a `weights` argument.
    '''
    def __init__(self, weights=None, axis=-1, momentum = 0.9, beta_init='zero', gamma_init='one', **kwargs):
        self.momentum = momentum
        self.axis = axis
        self.beta_init = initializers.get(beta_init)
        self.gamma_init = initializers.get(gamma_init)
        self.initial_weights = weights
        super(Scale, self).__init__(**kwargs)

    def build(self, input_shape):
        self.input_spec = [InputSpec(shape=input_shape)]
        shape = (int(input_shape[self.axis]),)

        # Tensorflow >= 1.0.0 compatibility
        self.gamma = K.variable(self.gamma_init(shape), name='{}_gamma'.format(self.name))
        self.beta = K.variable(self.beta_init(shape), name='{}_beta'.format(self.name))
        #self.gamma = self.gamma_init(shape, name='{}_gamma'.format(self.name))
        #self.beta = self.beta_init(shape, name='{}_beta'.format(self.name))
        self.trainable_weights = [self.gamma, self.beta]

        if self.initial_weights is not None:
            self.set_weights(self.initial_weights)
            del self.initial_weights

    def call(self, x, mask=None):
        input_shape = self.input_spec[0].shape
        broadcast_shape = [1] * len(input_shape)
        broadcast_shape[self.axis] = input_shape[self.axis]

        out = K.reshape(self.gamma, broadcast_shape) * x + K.reshape(self.beta, broadcast_shape)
        return out

    def get_config(self):
        config = {"momentum": self.momentum, "axis": self.axis}
        base_config = super(Scale, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))

In [15]:
class DenseNet(Model):
    def __init__(self, nb_dense_block=4, growth_rate=32, nb_filter=64, reduction=0.0, 
                 dropout_rate=0.0, weight_decay=1e-4, classes=1000):
        super(DenseNet, self).__init__()

        self.eps = 1.1e-5
                
        # compute compression factor
        compression = 1.0 - reduction
        
        self.concat_axis = 3
        img_input = Input(shape=(224, 224, 3), name='data')

        # From architecture for ImageNet (Table 1 in the paper)
        nb_filter = 64
        nb_layers = [6,12,24,16] # For DenseNet-121

        self.initial_layers = self.initial_block(nb_filter)
        
        self.dense_blocks = []
        self.transition_blocks = []

        # Add dense blocks
        for block_idx in range(nb_dense_block - 1):
            stage = block_idx+2
            block, nb_filter = self.dense_block(stage, nb_layers[block_idx], nb_filter, growth_rate, dropout_rate=dropout_rate, weight_decay=weight_decay)
            self.dense_blocks += block
            
            # Add transition_block
            self.transition_blocks += self.transition_block(stage, nb_filter, compression=compression, dropout_rate=dropout_rate, weight_decay=weight_decay)
            nb_filter = int(nb_filter * compression)

        final_stage = stage + 1
        block, nb_filter = self.dense_block(final_stage, nb_layers[-1], nb_filter, growth_rate, dropout_rate=dropout_rate, weight_decay=weight_decay)
        self.dense_blocks += block
        
        self.final_layers = self.final_block(nb_filter, classes)       

    def initial_block(self, nb_filter):
        block = []
        block.append(ZeroPadding2D((3, 3), name='conv1_zeropadding'))
        block.append(Convolution2D(nb_filter, 7, 2, name='conv1', use_bias=False))
        block.append(BatchNormalization(epsilon=self.eps, axis=self.concat_axis, name='conv1_bn'))
        block.append(Scale(axis=self.concat_axis, name='conv1_scale'))
        block.append(Activation('relu', name='relu1'))
        block.append(ZeroPadding2D((1, 1), name='pool1_zeropadding'))
        block.append(MaxPooling2D((3, 3), strides=(2, 2), name='pool1'))
        return block
    
    def final_block(self, nb_filter, classes):
        block = []
        block.append(BatchNormalization(epsilon=self.eps, axis=self.concat_axis, name='conv_final_blk_bn'))
        block.append(Scale(axis=self.concat_axis, name='conv_final_blk_scale'))
        block.append(Activation('relu', name='relu_final_blk'))
        block.append(GlobalAveragePooling2D(name='pool_final'))
        block.append(Dense(classes, name='fc6'))
        block.append(Activation('softmax', name='prob'))
        return block
    
    def conv_block(self, stage, branch, nb_filter, dropout_rate=None, weight_decay=1e-4):
        conv_name_base = 'conv' + str(stage) + '_' + str(branch)
        relu_name_base = 'relu' + str(stage) + '_' + str(branch)

        # 1x1 Convolution (Bottleneck layer)
        inter_channel = nb_filter * 4  
        block = []
        block.append(BatchNormalization(epsilon=self.eps, axis=self.concat_axis, name=conv_name_base+'_x1_bn'))
        block.append(Scale(axis=self.concat_axis, name=conv_name_base+'_x1_scale'))
        block.append(Activation('relu', name=relu_name_base+'_x1'))
        block.append(Convolution2D(inter_channel, 1, 1, name=conv_name_base+'_x1', use_bias=False))

        if dropout_rate:
            block.append(Dropout(dropout_rate))

        # 3x3 Convolution
        block.append(BatchNormalization(epsilon=self.eps, axis=self.concat_axis, name=conv_name_base+'_x2_bn'))
        block.append(Scale(axis=self.concat_axis, name=conv_name_base+'_x2_scale'))
        block.append(Activation('relu', name=relu_name_base+'_x2'))
        block.append(ZeroPadding2D((1, 1), name=conv_name_base+'_x2_zeropadding'))
        block.append(Convolution2D(nb_filter, 3, 1, name=conv_name_base+'_x2', use_bias=False))

        if dropout_rate:
            block.append(Dropout(dropout_rate))
        return block
                                        

    def dense_block(stage, nb_layers, nb_filter, growth_rate, dropout_rate=None, weight_decay=1e-4, 
                    grow_nb_filters=True):
        block = []
        for i in range(nb_layers):
            branch = i+1
            block.append(self.conv_block(concat_feat, stage, branch, growth_rate, dropout_rate, weight_decay))

            if grow_nb_filters:
                nb_filter += growth_rate

        return block, nb_filter
                                        
    def transition_block(stage, nb_filter, compression=1.0, dropout_rate=None, weight_decay=1E-4):
        conv_name_base = 'conv' + str(stage) + '_blk'
        relu_name_base = 'relu' + str(stage) + '_blk'
        pool_name_base = 'pool' + str(stage) 

        block = []
        block.append(BatchNormalization(epsilon=self.eps, axis=self.concat_axis, name=conv_name_base+'_bn'))
        block.append(Scale(axis=self.concat_axis, name=conv_name_base+'_scale'))
        block.append(Activation('relu', name=relu_name_base))
        block.append(Convolution2D(int(nb_filter * compression), 1, 1, name=conv_name_base, use_bias=False))

        if dropout_rate:
            block.append(Dropout(dropout_rate))

        block.append(AveragePooling2D((2, 2), strides=(2, 2), name=pool_name_base))

        return block

    def call(self, x):
        for layer in self.initial_layers:
            x = layer(x)
        for i in range(self.transition_blocks.len()):
            concat_feat = x
            for conv_layer in self.dense_blocks[i]:                                    
                x = conv_layer(x)
                x = tf.concat([concat_feat, x], self.concat_axis)
                concat_feat = x
            x = self.transition_blocks[i]
        concat_feat = x
        for conv_layer in self.dense_blocks[i]:                                    
            x = conv_layer(x)
            x = tf.concat([concat_feat, x], self.concat_axis)
            concat_feat = x
        for layer in self.final_layers:
            x = layer(x)                                      
        return x

model = DenseNet(classes=28)
# model.load_weights(weights_path) 

TypeError: dense_block() got multiple values for argument 'dropout_rate'

In [6]:
loss_object = tf.keras.losses.SparseCategoricalCrossentropy()

optimizer = tf.keras.optimizers.Adam()


In [7]:
train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')

test_loss = tf.keras.metrics.Mean(name='test_loss')
test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='test_accuracy')


In [8]:
@tf.function
def train_step(images, labels):
  with tf.GradientTape() as tape:
    predictions = model(images)
    loss = loss_object(labels, predictions)
  gradients = tape.gradient(loss, model.trainable_variables)
  optimizer.apply_gradients(zip(gradients, model.trainable_variables))

  train_loss(loss)
  train_accuracy(labels, predictions)


In [9]:
@tf.function
def test_step(images, labels):
  predictions = model(images)
  t_loss = loss_object(labels, predictions)

  test_loss(t_loss)
  test_accuracy(labels, predictions)


In [None]:
EPOCHS = 5

for epoch in range(EPOCHS):
  for images, labels in train_ds:
    train_step(images, labels)

  for test_images, test_labels in test_ds:
    test_step(test_images, test_labels)

  template = 'Epoch {}, Loss: {}, Accuracy: {}, Test Loss: {}, Test Accuracy: {}'
  print (template.format(epoch+1,
                         train_loss.result(),
                         train_accuracy.result()*100,
                         test_loss.result(),
                         test_accuracy.result()*100))


KeyboardInterrupt: 

E0516 17:59:31.057700 140502922868544 alias.py:221] Invalid alias: The name clear can't be aliased because it is another magic command.
E0516 17:59:31.058384 140502922868544 alias.py:221] Invalid alias: The name more can't be aliased because it is another magic command.
E0516 17:59:31.058891 140502922868544 alias.py:221] Invalid alias: The name less can't be aliased because it is another magic command.
E0516 17:59:31.059455 140502922868544 alias.py:221] Invalid alias: The name man can't be aliased because it is another magic command.
