# DaeseNet implement in keras
* papers：[Densely Connected Convolutional Network](http://arxiv.org/abs/1608.06993)
* dataset:cifar10
* environment:tennsorflow==1.3, keras==2.08 

## Common_functions

### conv_factor
* Apply BatchNorm, Relu 3x3Conv2D, optional dropout
    * param x: Input keras network
    * param concat_axis: int -- index of contatenate axis
    * param nb_filter: int -- number of filters
    * param dropout_rate: int -- dropout rate
    * param weight_decay: int -- weight decay factor
    * returns: keras network with b_norm, relu and Conv2D added
    * rtype: keras network

In [None]:
def conv_factory(x, concat_axis, nb_filter,dropout_rate=None, weight_decay=1E-4):
    x = BatchNormalization(axis=concat_axis,gamma_regularizer=l2(weight_decay),beta_regularizer=l2(weight_decay))(x)
    x = Activation('relu')(x)
    x = Conv2D(nb_filter, (3, 3),
               kernel_initializer="he_uniform",
               padding="same",
               use_bias=False,
               kernel_regularizer=l2(weight_decay))(x)
    if dropout_rate:
        x = Dropout(dropout_rate)(x)
    return x

### transition
* Apply BatchNorm, Relu 1x1Conv2D, optional dropout and Maxpooling2D
    * param x: keras model
    * param concat_axis: int -- index of contatenate axis
    * param nb_filter: int -- number of filters
    * param dropout_rate: int -- dropout rate
    * param weight_decay: int -- weight decay factor
    * returns: model
    * rtype: keras model, after applying batch_norm, relu-conv, dropout, maxpool

In [None]:
def transition(x, concat_axis, nb_filter,dropout_rate=None, weight_decay=1E-4):
    x = BatchNormalization(axis=concat_axis,
                           gamma_regularizer=l2(weight_decay),
                           beta_regularizer=l2(weight_decay))(x)
    x = Activation('relu')(x)
    x = Conv2D(nb_filter, (1, 1),
               kernel_initializer="he_uniform",
               padding="same",
               use_bias=False,
               kernel_regularizer=l2(weight_decay))(x)
    if dropout_rate:
        x = Dropout(dropout_rate)(x)
    x = AveragePooling2D((2, 2), strides=(2, 2))(x)
    return x

## create dateasets

## cretate networks

### 导入相关包

In [None]:
from keras.models import Model
from keras.layers.core import Dense, Dropout, Activation
from keras.layers.convolutional import Conv2D
from keras.layers.pooling import AveragePooling2D
from keras.layers.pooling import GlobalAveragePooling2D
from keras.layers import Input, Concatenate
from keras.layers.normalization import BatchNormalization
from keras.regularizers import l2
import keras.backend as K

### Create densenetblock
* Build a denseblock where the output of each conv_factory is fed to subsequent ones
    * param x: keras model
    * param concat_axis: int -- index of contatenate axis
    * param nb_layers: int -- the number of layers of conv_factory to append to the model.
    * param nb_filter: int -- number of filters
    * param dropout_rate: int -- dropout rate
    * param weight_decay: int -- weight decay factor
    * returns: keras model with nb_layers of conv_factory appended
    * rtype: keras model

In [None]:
def denseblock(x, concat_axis, nb_layers, nb_filter, growth_rate,dropout_rate=None, weight_decay=1E-4):
    list_feat = [x]
    for i in range(nb_layers):
        x = conv_factory(x, concat_axis, growth_rate,dropout_rate, weight_decay)
        list_feat.append(x)
        x = Concatenate(axis=concat_axis)(list_feat)
        nb_filter += growth_rate
    return x, nb_filter

### denseblock_altern
* Build a denseblock where the output of each conv_factoryis fed to subsequent ones. (Alternative of a above)
    * param x: keras model
    * param concat_axis: int -- index of contatenate axis
    * param nb_layers: int -- the number of layers of conv_factory to append to the model.
    * param nb_filter: int -- number of filters
    * param dropout_rate: int -- dropout rate
    * param weight_decay: int -- weight decay factor
    * returns: keras model with nb_layers of conv_factory appended
    * rtype: keras model

In [None]:
def denseblock_altern(x, concat_axis, nb_layers, nb_filter, growth_rate,dropout_rate=None, weight_decay=1E-4):
    for i in range(nb_layers):
        merge_tensor = conv_factory(x, concat_axis, growth_rate,dropout_rate, weight_decay)
        x = Concatenate(axis=concat_axis)([merge_tensor, x])
        nb_filter += growth_rate
    return x, nb_filter

### DenseNet
*  Build the DenseNet model
    * param nb_classes: int -- number of classes
    * param img_dim: tuple -- (channels, rows, columns)
    * param depth: int -- how many layers
    * param nb_dense_block: int -- number of dense blocks to add to end
    * param growth_rate: int -- number of filters to add
    * param nb_filter: int -- number of filters
    * param dropout_rate: float -- dropout rate
    * param weight_decay: float -- weight decay
    * returns: keras model with nb_layers of conv_factory appended
    * rtype: keras model

In [None]:
def DenseNet(nb_classes, img_dim, depth, nb_dense_block, growth_rate, nb_filter, dropout_rate=None, weight_decay=1E-4):
    K.image_dim_ordering() == "tf":
    concat_axis = -1
    model_input = Input(shape=img_dim)
    assert (depth - 4) % 3 == 0, "Depth must be 3 N + 4"

    # layers in each dense block
    nb_layers = int((depth - 4) / 3)
    # Initial convolution
    x = Conv2D(nb_filter, (3, 3),kernel_initializer="he_uniform",padding="same",name="initial_conv2D",use_bias=False,
               kernel_regularizer=l2(weight_decay))(model_input)

    # Add dense blocks
    for block_idx in range(nb_dense_block - 1):
        x, nb_filter = denseblock(x, concat_axis, nb_layers,nb_filter, growth_rate, 
                                  dropout_rate=dropout_rate,weight_decay=weight_decay)
        # add transition
        x = transition(x, nb_filter, dropout_rate=dropout_rate,weight_decay=weight_decay)

    # The last denseblock does not have a transition
    x, nb_filter = denseblock(x, concat_axis, nb_layers,nb_filter, growth_rate, 
                        d                 ropout_rate=dropout_rate,weight_decay=weight_decay)

    x = BatchNormalization(axis=concat_axis,gamma_regularizer=l2(weight_decay),
                                         beta_regularizer=l2(weight_decay))(x)
    x = Activation('relu')(x)
    x = GlobalAveragePooling2D(data_format=K.image_data_format())(x)
    x = Dense(nb_classes,activation='softmax',kernel_regularizer=l2(weight_decay),bias_regularizer=l2(weight_decay))(x)
    densenet = Model(inputs=[model_input], outputs=[x], name="DenseNet")
    return densenet

## train and test the networks

* CIFAR10 experiments
    * param batch_size: int -- batch size
    * param nb_epoch: int -- number of training epochs
    * param depth: int -- network depth
    * param nb_dense_block: int -- number of dense blocks
    * param nb_filter: int -- initial number of conv filter
    * param growth_rate: int -- number of new filters added by conv layers
    * param dropout_rate: float -- dropout rate
    * param learning_rate: float -- learning rate
    * param weight_decay: float -- weight decay
    * param plot_architecture: bool -- whether to plot network architecture

In [None]:
import os
import time
import json
import densenet
import numpy as np
import keras.backend as K
from keras.datasets import cifar10
from keras.optimizers import Adam
from keras.utils import np_utils

def run_cifar10(batch_size,nb_epoch,depth,nb_dense_block,nb_filter,growth_rate,dropout_rate,learning_rate,
                                                       weight_decay,plot_architecture):

    ###################
    # Data processing #
    ###################
    # the data, shuffled and split between train and test sets
    (X_train, y_train), (X_test, y_test) = cifar10.load_data()
    nb_classes = len(np.unique(y_train))
    img_dim = X_train.shape[1:]
    n_channels = X_train.shape[-1]

    # convert class vectors to binary class matrices
    Y_train = np_utils.to_categorical(y_train, nb_classes)
    Y_test = np_utils.to_categorical(y_test, nb_classes)

    X_train = X_train.astype('float32')
    X_test = X_test.astype('float32')

    # Normalisation
    X = np.vstack((X_train, X_test))
    # 2 cases depending on the image ordering
    for i in range(n_channels):
        mean = np.mean(X[:, :, :, i])
        std = np.std(X[:, :, :, i])
        X_train[:, :, :, i] = (X_train[:, :, :, i] - mean) / std
        X_test[:, :, :, i] = (X_test[:, :, :, i] - mean) / std

    ###################
    # Construct model #
    ###################

    model = densenet.DenseNet(nb_classes,img_dim,depth,nb_dense_block,growth_rate,nb_filter,
                                               dropout_rate=dropout_rate,weight_decay=weight_decay)
    # Model output
    model.summary()

    # Build optimizer
    opt = Adam(lr=learning_rate, beta_1=0.9, beta_2=0.999, epsilon=1e-08)
    model.compile(loss='categorical_crossentropy',optimizer=opt,metrics=["accuracy"])
    if plot_architecture:
        from keras.utils.visualize_util import plot
        plot(model, to_file='./figures/densenet_archi.png', show_shapes=True)

    ####################
    # Network training #
    ####################
    print("Training")
    list_train_loss = []
    list_test_loss = []
    list_learning_rate = []
    for e in range(nb_epoch):

        if e == int(0.5 * nb_epoch):
            K.set_value(model.optimizer.lr, np.float32(learning_rate / 10.))

        if e == int(0.75 * nb_epoch):
            K.set_value(model.optimizer.lr, np.float32(learning_rate / 100.))

        split_size = batch_size
        num_splits = X_train.shape[0] / split_size
        arr_splits = np.array_split(np.arange(X_train.shape[0]), num_splits)

        l_train_loss = []
        start = time.time()

        for batch_idx in arr_splits:

            X_batch, Y_batch = X_train[batch_idx], Y_train[batch_idx]
            train_logloss, train_acc = model.train_on_batch(X_batch, Y_batch)

            l_train_loss.append([train_logloss, train_acc])

        test_logloss, test_acc = model.evaluate(X_test,Y_test,verbose=0,batch_size=64)
        list_train_loss.append(np.mean(np.array(l_train_loss), 0).tolist())
        list_test_loss.append([test_logloss, test_acc])
        list_learning_rate.append(float(K.get_value(model.optimizer.lr)))
        # to convert numpy array to json serializable
        print('Epoch %s/%s, Time: %s' % (e + 1, nb_epoch, time.time() - start))

        d_log = {}
        d_log["batch_size"] = batch_size
        d_log["nb_epoch"] = nb_epoch
        d_log["optimizer"] = opt.get_config()
        d_log["train_loss"] = list_train_loss
        d_log["test_loss"] = list_test_loss
        d_log["learning_rate"] = list_learning_rate

        json_file = os.path.join('./log/experiment_log_cifar10.json')
        with open(json_file, 'w') as fp:
            json.dump(d_log, fp, indent=4, sort_keys=True)

### 参数设置

In [None]:
batch_size=64
nb_epoch=30
depth=7
nb_dense_block=1,        
nb_filter=16,              
growth_rate=12,              
dropout_rate=0.2,              
learning_rate=1E-3,           
weight_decay=1E-4,
plot_architecture=False
run_cifar10(batch_size,nb_epoch,depth,nb_dense_block,nb_filter,growth_rate,dropout_rate,learning_rate,weight_decay,plot_architecture)

# use the networks

# 可视化结果

In [None]:
import matplotlib.pylab as plt
import json
import numpy as np
def plot_cifar10(save=True):
    with open("./log/experiment_log_cifar10.json", "r") as f:
        d = json.load(f)
    train_accuracy = 100 * (np.array(d["train_loss"])[:, 1])
    test_accuracy = 100 * (np.array(d["test_loss"])[:, 1])

    fig = plt.figure()
    ax1 = fig.add_subplot(111)
    ax1.set_ylabel('Accuracy')
    ax1.plot(train_accuracy, color="tomato", linewidth=2, label='train_acc')
    ax1.plot(test_accuracy, color="steelblue", linewidth=2, label='test_acc')
    ax1.legend(loc=0)
    train_loss = np.array(d["train_loss"])[:, 0]
    test_loss = np.array(d["test_loss"])[:, 0]
    ax2 = ax1.twinx()
    ax2.set_ylabel('Loss')
    ax2.plot(train_loss, '--', color="tomato", linewidth=2, label='train_loss')
    ax2.plot(test_loss, '--', color="steelblue", linewidth=2, label='test_loss')
    ax2.legend(loc=1)
    ax1.grid(True)
    if save:
        fig.savefig('./figures/plot_cifar10.svg')

    plt.show()
    plt.clf()
    plt.close()