### Implementation of DenseNet in Keras

In [None]:
import keras
from keras.layers.normalization import BatchNormalization
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Convolution2D, MaxPooling2D
from keras.optimizers import Adam
from keras.utils import np_utils
from keras.datasets import mnist
#from tensorflow.examples.tutorials.mnist import input_data

###### Get MNIST data to test

In [None]:
import numpy as np
(X_train, Y_train), (X_test, Y_test) = mnist.load_data()
X_train = X_train.reshape(X_train.shape[0], 1, 28, 28)
X_test = X_test.reshape(X_test.shape[0], 1, 28, 28)
Y_train = np_utils.to_categorical(Y_train, n_classes)
Y_test = np_utils.to_categorical(Y_test, n_classes)
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255
X_test /= 255

In [None]:
Y_train.shape
Y_test.shape

### DenseNet 
##### With default parameters for Adam optimizer

In [None]:
#single layer with batchnorm, relu, conv2D and a dropout
def bn_relu_conv(model, num_filter, drop): 
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(Convolution2D(num_filter, 3, 3))
    model.add(Dropout(drop))
    return model

# function for adding layers
def add_layer(model, num_filter, drop):
    bn_relu_conv(model, num_filter , drop)
    return model

#function for generating transitions between blocks
def transition(model, num_filter, drop):
    bn_relu_conv(model, num_filter, drop)
    MaxPooling2D(pool_size=(2, 2))
    return model

n_classes = 10
drop = 0.2         
start_channels = 16
growth_rate = 12

#initial layer is a basic Conv2D
model = Sequential()
model.add(Convolution2D(start_channels, 3, 3, border_mode = 'valid', input_shape = (1, 28, 28)))

n_channels = start_channels

#make a dense block (10 layers deep) with parameters growing by a small value (12) with each layer (+ previous layers)
for i in range(10):
    add_layer(model, n_channels, drop)
    n_channels += growth_rate
transition(model, n_channels, drop)

#currently using a single dense block (paper did 3 dense blocks with transitions and total layers of 40 or 100) 
#final classification layer 
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Flatten())
model.add(Dense(n_classes))
model.add(Activation('softmax'))
    
#Adam(lr=0.1, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
model.compile(optimizer = 'adam',
              loss = 'categorical_crossentropy',
              metrics = ['accuracy'])


model.fit(X_train, Y_train, nb_epoch=10, batch_size=64, validation_data=(X_test, Y_test),
              shuffle=True)
score = model.evaluate(X_test, Y_test, verbose=0)
print('Test score:', score[0])
print('Test accuracy:', score[1])

In [None]:

#Output for the code above (with Adam optimizer)
Train on 60000 samples, validate on 10000 samples
Epoch 1/10
60000/60000 [==============================] - 1218s - loss: 0.6435 - acc: 0.7790 - val_loss: 0.7873 - val_acc: 0.7249
Epoch 2/10
60000/60000 [==============================] - 1231s - loss: 0.2497 - acc: 0.9213 - val_loss: 0.4668 - val_acc: 0.8565
Epoch 3/10
60000/60000 [==============================] - 1226s - loss: 0.1875 - acc: 0.9418 - val_loss: 0.3019 - val_acc: 0.9112
Epoch 4/10
60000/60000 [==============================] - 1249s - loss: 0.1585 - acc: 0.9497 - val_loss: 0.2843 - val_acc: 0.9147
Epoch 5/10
60000/60000 [==============================] - 1278s - loss: 0.1377 - acc: 0.9566 - val_loss: 0.3219 - val_acc: 0.9000
Epoch 6/10
 4560/60000 [=>............................] - ETA: 1098s - loss: 0.1140 - acc: 0.9638

#### Same DenseNet as above except with Adadelta optimizer and no real tuning of parameters (appears to be a bit better than default Adam)

In [None]:
def bn_relu_conv(model, num_filter, drop): 
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(Convolution2D(num_filter, 3, 3))
    model.add(Dropout(drop))
    return model

def add_layer(model, num_filter, drop):
    bn_relu_conv(model, num_filter , drop)
    return model

def transition(model, num_filter, drop):
    bn_relu_conv(model, num_filter, drop)
    MaxPooling2D(pool_size=(2, 2))
    return model

n_classes = 10
drop = 0.2         
start_channels = 16
growth_rate = 12

model = Sequential()
model.add(Convolution2D(start_channels, 3, 3, border_mode = 'valid', input_shape = (1, 28, 28)))

n_channels = start_channels

for i in range(10):
    add_layer(model, n_channels, drop)
    n_channels += growth_rate
transition(model, n_channels, drop)

    
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Flatten())
model.add(Dense(n_classes))
model.add(Activation('softmax'))
    
#adam_opt = Adam(lr=0.1, beta_1=0.9, beta_2=0.999, epsilon=1e-08)
model.compile(optimizer = 'adadelta',
              loss = 'categorical_crossentropy',
              metrics = ['accuracy'])


model.fit(X_train, Y_train, nb_epoch=10, batch_size=64, validation_data=(X_test, Y_test),
              shuffle=True)
score = model.evaluate(X_test, Y_test, verbose=0)
print('Test score:', score[0])
print('Test accuracy:', score[1])

In [None]:
Train on 60000 samples, validate on 10000 samples
Epoch 1/10
60000/60000 [==============================] - 1422s - loss: 0.4466 - acc: 0.8525 - val_loss: 0.0918 - val_acc: 0.9690
Epoch 2/10
60000/60000 [==============================] - 1525s - loss: 0.1038 - acc: 0.9681 - val_loss: 0.0505 - val_acc: 0.9846
Epoch 3/10
60000/60000 [==============================] - 1454s - loss: 0.0753 - acc: 0.9766 - val_loss: 0.0702 - val_acc: 0.9786
Epoch 4/10
60000/60000 [==============================] - 1450s - loss: 0.0611 - acc: 0.9817 - val_loss: 0.0355 - val_acc: 0.9884
Epoch 5/10
60000/60000 [==============================] - 1464s - loss: 0.0527 - acc: 0.9834 - val_loss: 0.0663 - val_acc: 0.9799
Epoch 6/10
60000/60000 [==============================] - 1418s - loss: 0.0444 - acc: 0.9865 - val_loss: 0.0281 - val_acc: 0.9912
Epoch 7/10
60000/60000 [==============================] - 1455s - loss: 0.0437 - acc: 0.9872 - val_loss: 0.0409 - val_acc: 0.9868
Epoch 8/10
60000/60000 [==============================] - 1385s - loss: 0.0387 - acc: 0.9885 - val_loss: 0.0252 - val_acc: 0.9925
Epoch 9/10
60000/60000 [==============================] - 1476s - loss: 0.0356 - acc: 0.9888 - val_loss: 0.0323 - val_acc: 0.9904
Epoch 10/10
60000/60000 [==============================] - 1503s - loss: 0.0343 - acc: 0.9896 - val_loss: 0.0273 - val_acc: 0.9917