In [0]:
# https://keras.io/
# !pip install -q keras
# import keras

In [0]:
import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten, Input, AveragePooling2D, Activation,GlobalAveragePooling2D
from tensorflow.keras.layers import Conv2D, MaxPooling2D, BatchNormalization
from tensorflow.keras.layers import Concatenate
from tensorflow.keras.optimizers import Adam,SGD
from tensorflow.keras.regularizers import l2
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import os
import numpy as np
from tensorflow.keras.callbacks import LearningRateScheduler,ModelCheckpoint,ReduceLROnPlateau


In [0]:
# # this part will prevent tensorflow to allocate all the avaliable GPU Memory
# # backend
# import tensorflow as tf
# from keras import backend as k

# # Don't pre-allocate memory; allocate as-needed
# config = tf.ConfigProto()
# config.gpu_options.allow_growth = True

# # Create a session with the above options specified.
# k.tensorflow_backend.set_session(tf.Session(config=config))

In [0]:
# Hyperparameters
batch_size = 64
num_classes = 10
epochs = 250
l = 16
num_filter = 32
growth_rate = 12
compression = 0.5
dropout_rate = 0.2
weight_decay = 1e-4
dilate_rate = 1


In [0]:
# Load CIFAR10 Data
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
img_height, img_width, channel = x_train.shape[1],x_train.shape[2],x_train.shape[3]

x_train = x_train.astype('float32')
x_test = x_test.astype('float32')

# convert to one hot encoing 
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

#x = np.vstack((x_train, x_test))
for i in range(3):
		mean = np.mean(x_train[:, :, :, i])
		std = np.std(x_train[:, :, :, i])
		x_train[:, :, :, i] = (x_train[:, :, :, i] - mean) / std
		x_test[:, :, :, i] = (x_test[:, :, :, i] - mean) / std

In [0]:
# Dense Block
def add_denseblock(input, num_filter = 12, dropout_rate = 0.2):
    global compression, weight_decay, growth_rate

    temp = input
    
    for _ in range(l):
      
        BatchNorm = BatchNormalization(gamma_regularizer=l2(weight_decay),beta_regularizer=l2(weight_decay))(temp)
        relu = Activation('relu')(BatchNorm)
        Conv2D_1_1 = Conv2D(int(growth_rate*4), (1,1), use_bias=False ,padding='same',
                              kernel_initializer='he_uniform',kernel_regularizer=l2(weight_decay))(relu)
        
        BatchNorm_1_1 = BatchNormalization(gamma_regularizer=l2(weight_decay),beta_regularizer=l2(weight_decay))(Conv2D_1_1)
        relu_1_1 = Activation('relu')(BatchNorm_1_1)
        Conv2D_3_3 = Conv2D(int(growth_rate), (3,3), use_bias=False ,padding='same', dilation_rate = dilate_rate,
                              kernel_initializer='he_uniform',kernel_regularizer=l2(weight_decay))(relu_1_1)
        
        if dropout_rate>0:
          Conv2D_3_3 = Dropout(rate=dropout_rate)(Conv2D_3_3)
        
        concat = Concatenate(axis=-1)([temp,Conv2D_3_3])
        
        temp = concat
        num_filter += growth_rate
        
    return temp , num_filter

In [0]:
def add_transition(input, num_filter = 12, dropout_rate = 0.2):
    global compression, weight_decay
    BatchNorm = BatchNormalization(gamma_regularizer=l2(weight_decay),beta_regularizer=l2(weight_decay))(input)
    relu = Activation('relu')(BatchNorm)
    Conv2D_BottleNeck = Conv2D(int(num_filter*compression), (1,1), use_bias=False ,padding='same',
                              kernel_initializer='he_uniform',kernel_regularizer=l2(weight_decay))(relu)
    if dropout_rate>0:
      Conv2D_BottleNeck = Dropout(rate=dropout_rate)(Conv2D_BottleNeck)
    avg = AveragePooling2D(pool_size=(2,2),strides=(2,2))(Conv2D_BottleNeck)
    
    return avg

In [0]:
def output_layer(input):
    global compression, weight_decay
    BatchNorm = BatchNormalization(gamma_regularizer=l2(weight_decay),beta_regularizer=l2(weight_decay))(input)
    relu = Activation('relu')(BatchNorm)
    #AvgPooling = AveragePooling2D(pool_size=(2,2))(relu)
    #flat = Flatten()(AvgPooling)
    #output = Dense(num_classes, activation='softmax')(flat)
    GloAvgPooling = GlobalAveragePooling2D()(relu)
    output = Dense(num_classes, activation='softmax',
		kernel_regularizer=l2(weight_decay),bias_regularizer=l2(weight_decay))(GloAvgPooling)
    
    return output

In [0]:

input = Input(shape=(img_height, img_width, channel))
First_Conv2D = Conv2D(num_filter, (3,3), use_bias=False ,padding='same', dilation_rate = dilate_rate,
			kernel_initializer='he_uniform',kernel_regularizer=l2(weight_decay))(input)

First_Block, num_filters = add_denseblock(First_Conv2D, num_filter, dropout_rate)
First_Transition = add_transition(First_Block, num_filters, dropout_rate)

Second_Block,num_filters = add_denseblock(First_Transition, num_filters, dropout_rate)
Second_Transition = add_transition(Second_Block, num_filters, dropout_rate)

# Third_Block = add_denseblock(Second_Transition, num_filter, dropout_rate)
# Third_Transition = add_transition(Third_Block, num_filter, dropout_rate)

Last_Block,num_filters = add_denseblock(Second_Transition,  num_filters, dropout_rate)
output = output_layer(Last_Block)


In [10]:
model = Model(inputs=[input], outputs=[output])
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 32, 32, 3)    0                                            
__________________________________________________________________________________________________
conv2d (Conv2D)                 (None, 32, 32, 32)   864         input_1[0][0]                    
__________________________________________________________________________________________________
batch_normalization (BatchNorma (None, 32, 32, 32)   128         conv2d[0][0]                     
__________________________________________________________________________________________________
activation (Activation)         (None, 32, 32, 32)   0           batch_normalization[0][0]        
__________________________________________________________________________________________________
conv2d_1 (

In [0]:
datagen = ImageDataGenerator(
			featurewise_center=False,  # set input mean to 0 over the dataset
			samplewise_center=False,  # set each sample mean to 0
			featurewise_std_normalization=False,  # divide inputs by std of the dataset
			samplewise_std_normalization=False,  # divide each input by its std
			zca_whitening=False,  # apply ZCA whitening
			rotation_range=15,  # randomly rotate images in the range (degrees, 0 to 180)
			width_shift_range=0.16,  # randomly shift images horizontally (fraction of total width)
			height_shift_range=0.16,  # randomly shift images vertically (fraction of total height)
			horizontal_flip=True,  # randomly flip images
			vertical_flip=False) # randomly flip images
       

In [0]:
def step_decay(epoch):
	initial_lrate = 0.1
	lrate = 0.1
	if epoch >= 125 and epoch < 187:
		lrate = initial_lrate / 10
	if epoch >= 187 :
		lrate = initial_lrate / 100
	
	return float(lrate)
lrschedular = LearningRateScheduler(step_decay)

In [0]:
# determine Loss function and Optimizer
# model.compile(loss='categorical_crossentropy',
#               optimizer=Adam(),
#               metrics=['accuracy'])
model.compile(loss='categorical_crossentropy',
              optimizer=SGD(lr=0.1,momentum=0.9,nesterov=True),
              metrics=['accuracy'])

In [14]:
tpu_model = tf.contrib.tpu.keras_to_tpu_model(model,
                                              strategy = tf.contrib.tpu.TPUDistributionStrategy(
                                                  tf.contrib.cluster_resolver.TPUClusterResolver(tpu='grpc://'+os.environ['COLAB_TPU_ADDR'])
                                              )
                                             )

INFO:tensorflow:Querying Tensorflow master (b'grpc://10.89.87.186:8470') for TPU system metadata.
INFO:tensorflow:Found TPU system:
INFO:tensorflow:*** Num TPU Cores: 8
INFO:tensorflow:*** Num TPU Workers: 1
INFO:tensorflow:*** Num TPU Cores Per Worker: 8
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:CPU:0, CPU, -1, 8983147937132826590)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 17179869184, 11067988314755813392)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:XLA_GPU:0, XLA_GPU, 17179869184, 3553232749503011065)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:0, TPU, 17179869184, 8860226884709928202)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:1, TPU, 17179869184, 9600603507261866485)
INFO:tensorflow:*** Available Device: _DeviceA

In [15]:
# tpu_model.fit(x_train, y_train,
#                     batch_size=batch_size,
#                     epochs=50,
#                     verbose=1,
#                     validation_data=(x_test, y_test))

tpu_model.fit_generator(datagen.flow(x_train, y_train,
									 batch_size=64),
						steps_per_epoch=782,epochs=250,verbose=1,callbacks=[lrschedular])

Epoch 1/250
INFO:tensorflow:New input shapes; (re-)compiling: mode=train (# of cores 8), [TensorSpec(shape=(8,), dtype=tf.int32, name='core_id0'), TensorSpec(shape=(8, 32, 32, 3), dtype=tf.float32, name='input_1_10'), TensorSpec(shape=(8, 10), dtype=tf.float32, name='dense_target_30')]
INFO:tensorflow:Overriding default placeholder.
INFO:tensorflow:Cloning SGD {'lr': 0.10000000149011612, 'momentum': 0.8999999761581421, 'decay': 0.0, 'nesterov': True}
INFO:tensorflow:Remapping placeholder for input_1
INFO:tensorflow:KerasCrossShard: <tensorflow.python.keras.optimizers.SGD object at 0x7fa9406e5ef0> []
INFO:tensorflow:Started compiling
INFO:tensorflow:Finished compiling. Time elapsed: 88.06197094917297 secs
INFO:tensorflow:Setting weights on TPU model.
INFO:tensorflow:CPU -> TPU lr: 0.10000000149011612 {0.1}
INFO:tensorflow:CPU -> TPU momentum: 0.8999999761581421 {0.9}
INFO:tensorflow:CPU -> TPU decay: 0.0 {0.0}
INFO:tensorflow:Overriding default placeholder.
INFO:tensorflow:Remapping pla

<tensorflow.python.keras.callbacks.History at 0x7fa95430f1d0>

In [16]:
# Test the model
score = tpu_model.evaluate(x_test, y_test, verbose=1)
print(score)
print('Test loss: %.3f ' % (score[0]))
print('Test accuracy: %.3f ' % (score[1]*100))

INFO:tensorflow:New input shapes; (re-)compiling: mode=eval (# of cores 8), [TensorSpec(shape=(4,), dtype=tf.int32, name='core_id_10'), TensorSpec(shape=(4, 32, 32, 3), dtype=tf.float32, name='input_1_10'), TensorSpec(shape=(4, 10), dtype=tf.float32, name='dense_target_30')]
INFO:tensorflow:Overriding default placeholder.
INFO:tensorflow:Cloning SGD {'lr': 0.10000000149011612, 'momentum': 0.8999999761581421, 'decay': 0.0, 'nesterov': True}
INFO:tensorflow:Remapping placeholder for input_1
INFO:tensorflow:KerasCrossShard: <tensorflow.python.keras.optimizers.SGD object at 0x7fa93f6a8780> []
INFO:tensorflow:Started compiling
INFO:tensorflow:Finished compiling. Time elapsed: 69.7616708278656 secs
INFO:tensorflow:Overriding default placeholder.
INFO:tensorflow:Remapping placeholder for input_1
INFO:tensorflow:KerasCrossShard: <tensorflow.python.keras.optimizers.SGD object at 0x7fa93f6a8780> []
INFO:tensorflow:Started compiling
INFO:tensorflow:Finished compiling. Time elapsed: 43.13383769989

In [17]:
# Save the trained weights in to .h5 format
tpu_model.save_weights("DNST_weights_Shravan_B9_change_mean_std.h5")
tpu_model.save("DNST_model_Shravan_B9_change_mean_std.hdf5")
print("Saved model to disk")

INFO:tensorflow:Copying TPU weights to the CPU
INFO:tensorflow:TPU -> CPU lr: 0.0010000000474974513
INFO:tensorflow:TPU -> CPU momentum: 0.8999999761581421
INFO:tensorflow:TPU -> CPU decay: 0.0
INFO:tensorflow:TPU -> CPU nesterov: True
INFO:tensorflow:Copying TPU weights to the CPU
INFO:tensorflow:TPU -> CPU lr: 0.0010000000474974513
INFO:tensorflow:TPU -> CPU momentum: 0.8999999761581421
INFO:tensorflow:TPU -> CPU decay: 0.0
INFO:tensorflow:TPU -> CPU nesterov: True
Saved model to disk


# Link for 
Weights:  https://drive.google.com/open?id=14lLLPW7OZtJ0flWSkMkoghfqxfzKxjGH 
Model:  https://drive.google.com/open?id=1RC4TCM72gSdoVNfjA63a_hcUG8IBpSZB


# References
1. Original Densenet Paper: https://arxiv.org/pdf/1608.06993
2. https://medium.com/intuitionmachine/notes-on-the-implementation-densenet-in-tensorflow-beeda9dd1504
3. https://forums.fast.ai/t/training-a-model-from-scratch-cifar-10/7897
4. https://towardsdatascience.com/densenet-2810936aeebb
5. https://towardsdatascience.com/normalized-direction-preserving-adam-switching-from-adam-to-sgd-and-nesterov-momentum-adam-with-460be5ddf686


# Densenet implementations
1. Original Densenet Implementation: https://github.com/liuzhuang13/DenseNet
2. Fast.ai: http://files.fast.ai/part2/lesson13/densenet-keras.ipynb
3. Github Users
	a. https://github.com/titu1994/DenseNet
	b. https://github.com/flyyufelix/DenseNet-Keras 

# LR Callbacks
1. Cyclic Learning Rate: https://github.com/bckenstler/CLR
2. SGDR: https://gist.github.com/t2kasa/490610116ddb0f3b664458d0e086e643
3. SWATS: https://arxiv.org/pdf/1712.07628 (Implementation not found)
	https://github.com/kweonwooj/papers/issues/76
	https://www.groundai.com/project/improving-generalization-performance-by-switching-from-adam-to-sgd/
	