## Side Output Fusion Convolutional Neural Network

The deep learning architecture, called Side Output Fusion Network, classifies the feature map obtained by combining low-level, mid-level and high-level information from each convolution layer. Feature fusion from low and high level layers positively affects the results.

<img src="./data/Figure.png" width="750" height="750">

In [12]:
# Importing

from matplotlib import pyplot as plt
from keras import models
from keras.models import Model, Sequential
from keras.layers import Conv2D, MaxPooling2D, Dense, Dropout, Activation, Flatten, Input, Convolution2D, MaxPooling2D 
from keras.layers.normalization import BatchNormalization
import numpy as np
from keras.utils import np_utils
import keras
from keras.datasets import cifar10
from keras.layers.merge import concatenate

# Since I have a GPU related problem on my computer, I disabled GPU.
import os
os.environ["CUDA_VISIBLE_DEVICES"]="-1"    
import tensorflow as tf

# Parameter Settings
num_classes  = 10  # Number of Classes
batch_size   = 128 # Batch Size
epochs       = 3 # Epoch number

# Data Distrubiton; Traing/Testing Data
(x_train, y_train), (x_test, y_test) = cifar10.load_data()

img_rows, img_cols = 32, 32

if K.image_data_format() == 'channels_first':
    input_shape = (3, img_rows, img_cols)
else:
    input_shape = (img_rows, img_cols, 3)

y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

x_train = x_train.astype('float32')
x_test = x_test.astype('float32')

print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')
print('y_train shape:', y_train.shape)

# Creating Model

inputs = Input(shape=input_shape)

#--- block 1 ---
x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv1')(inputs)
x = BatchNormalization()(x)
x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv2')(x)
x = BatchNormalization()(x)
x = MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x)

#--- block 2 ---
x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv1')(x)
x = BatchNormalization()(x)
x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv2')(x)
x = BatchNormalization()(x)
x = MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x)

#---Side 1 branch ---
s_1_bch = Flatten(name='side1_flatten')(x)
sideoutput1=s_1_bch
s_1_bch = Dense(256, activation='relu', name='side1_fc1')(s_1_bch)
s_1_bch = BatchNormalization()(s_1_bch)
s_1_bch = Dropout(0.5)(s_1_bch)
s_1_bch = Dense(256, activation='relu', name='side1_fc2')(s_1_bch)
s_1_bch = BatchNormalization()(s_1_bch)
s_1_bch = Dropout(0.5)(s_1_bch)
s_1_pred = Dense(num_classes, activation='softmax', name='side1_predictions')(s_1_bch)

#--- block 3 ---
x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv1')(x)
x = BatchNormalization()(x)
x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv2')(x)
x = BatchNormalization()(x)
x = MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x)

#--- Side 2 branch ---
s_2_bch = Flatten(name='c2_flatten')(x)
sideoutput2=s_2_bch 
s_2_bch = Dense(512, activation='relu', name='side2_fc1')(s_2_bch)
s_2_bch = BatchNormalization()(s_2_bch)
s_2_bch = Dropout(0.5)(s_2_bch)
s_2_bch = Dense(512, activation='relu', name='side2_fc2')(s_2_bch)
s_2_bch = BatchNormalization()(s_2_bch)
s_2_bch = Dropout(0.5)(s_2_bch)
s_2_pred = Dense(num_classes, activation='softmax', name='side2_predictions')(s_2_bch)

#--- block 4 ---
x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv1')(x)
x = BatchNormalization()(x)
x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv2')(x)
x = BatchNormalization()(x)
x = MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(x)

#--- Side3 (Last) block ---
x = Flatten(name='flatten')(x)
sideoutput3=x
x = Dense(1024, activation='relu', name='side3_fc1')(x)
x = BatchNormalization()(x)
x = Dropout(0.5)(x)
x = Dense(1024, activation='relu', name='side3_fc2')(x)
x = BatchNormalization()(x)
x = Dropout(0.5)(x)
side3_pred = Dense(num_classes, activation='softmax', name='side3_predictions')(x)

# Fusion Network
fusionfeatures=concatenate([sideoutput1, sideoutput2,sideoutput3])
fusion = Dense(1024, activation='relu', name='fusion_fc1')(fusionfeatures)
fusion = BatchNormalization()(fusion)
fusion = Dropout(0.5)(fusion)
fusion = Dense(1024, activation='relu', name='fusion_fc2')(fusion)
fusion = BatchNormalization()(fusion)
fusion = Dropout(0.5)(fusion)
fusion_pred = Dense(num_classes, activation='softmax', name='fusion_predictions')(fusion)

# We will run 4 different models at once without additional adjustment.
model = Model(input=inputs, output=[s_1_pred, s_2_pred, side3_pred, fusion_pred], name='our_model')
model.summary()

# Training Parameters
sgd = optimizers.SGD(lr=0.003, momentum=0.9, nesterov=True)

model.compile(optimizer='adam',
              loss=['binary_crossentropy', 'binary_crossentropy', 'binary_crossentropy', 'binary_crossentropy'],
              loss_weights=[0.05, 0.1, 1.0, 1.0],
              metrics=['accuracy'])

# Training Stage
model.fit(x_train,  [y_train, y_train, y_train,y_train],
          batch_size=batch_size,
          epochs=epochs,
          verbose=1,
          validation_split = 0.15)

x_train shape: (50000, 32, 32, 3)
50000 train samples
10000 test samples
y_train shape: (50000, 10)




__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_12 (InputLayer)           (None, 32, 32, 3)    0                                            
__________________________________________________________________________________________________
block1_conv1 (Conv2D)           (None, 32, 32, 64)   1792        input_12[0][0]                   
__________________________________________________________________________________________________
batch_normalization_177 (BatchN (None, 32, 32, 64)   256         block1_conv1[0][0]               
__________________________________________________________________________________________________
block1_conv2 (Conv2D)           (None, 32, 32, 64)   36928       batch_normalization_177[0][0]    
__________________________________________________________________________________________________
batch_norm

Train on 42500 samples, validate on 7500 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x1e76c8f5208>