In [1]:
import keras

Using TensorFlow backend.


In [0]:
from keras.models import Model
from keras.layers import Flatten, Dense, Dropout, Conv2D, MaxPool2D, BatchNormalization, Activation, Input, AveragePooling2D
from keras import regularizers
from keras.layers import concatenate

In [0]:
def conv2d_with_bn(prev_layer, filters, kernel_size, name, weight_decay=.0, strides = (1,1), bn=True):
    conv_name = name + "-conv"
    x = Conv2D(filters = filters, 
               kernel_size = kernel_size, 
              strides = strides,
              padding = 'same',
              kernel_regularizer = regularizers.l2(weight_decay),
              name = conv_name,
              use_bias = False)(prev_layer)
    if(bn):
        bn_name = name + "-bn"
        x = BatchNormalization(scale = False, axis = 3, name = bn_name)(x)
    relu_name = name + "-relu"
    x = Activation('relu', name=relu_name)(x)
    return x;

In [0]:
def inception_block(prev_layer, num_filters, name, weight_decay=.0, bn = True, use_whistle = False, numclasses = -1):
    
    branch0 = conv2d_with_bn(prev_layer=prev_layer, filters = num_filters[0],kernel_size = (1,1),
                             weight_decay = weight_decay,name = name+'-br0-1x1',bn = bn)
    
    branch1 = conv2d_with_bn(prev_layer=prev_layer, filters = num_filters[1][0],kernel_size = (1,1),
                             weight_decay = weight_decay,name = name+'-br1-1x1',bn = bn)
    branch1 = conv2d_with_bn(prev_layer = branch1, filters = num_filters[1][1], kernel_size = (3,3),
                             weight_decay = weight_decay,name = name+'-br1-3x3',bn = bn)
    
    branch2 = conv2d_with_bn(prev_layer = prev_layer, filters = num_filters[2][0], kernel_size = (1,1),
                            weight_decay = weight_decay, name = name+'-br2-1x1', bn = bn)
    branch2 = conv2d_with_bn(prev_layer = branch2, filters = num_filters[2][1], kernel_size = (5,5),
                            weight_decay = weight_decay, name = name+'-br2-5x5', bn = bn)
    
    branch3 = MaxPool2D(pool_size=(3,3), strides = (1,1),padding = 'same', name = name + '-br3-pool')(prev_layer)
    branch3 = conv2d_with_bn(branch3, filters = num_filters[3], kernel_size = (1,1), weight_decay = weight_decay, name = name + '-br3-1x1')
    x = concatenate([branch0, branch1, branch2, branch3], axis = 3, name = name)
    
    if(use_whistle):
      out = aux_whistle(prev_layer, numclasses = numclasses, name = name + '-whistle')
      return x , out
    
    return x

In [0]:
def aux_whistle(prev_layer, numclasses, name):
    aux_clf = AveragePooling2D(pool_size=(5,5), strides = (3,3), name = name + '-averagePool')(prev_layer)
    aux_clf = conv2d_with_bn(aux_clf, filters = 128, kernel_size=(1,1), name = name + '-1x1conv', weight_decay = 0.3)
    aux_clf = Flatten(name = name+'-flatten')(aux_clf)
    aux_clf = Dense(1024, activation = 'relu')(aux_clf)
    aux_clf = Dropout(0.3, name = name + '-dropout')(aux_clf)
    aux_clf = Dense(num_classes, activation = 'softmax', name=name + '-predictions')(aux_clf)
    return aux_clf

In [0]:
def inceptionNet(input_shape, numclasses, weight_decay = 0.0, bn = True):
    inp = Input(shape = input_shape)
    x = conv2d_with_bn(inp, filters = 64, kernel_size=(1,1), weight_decay=weight_decay, name = '2a', bn=bn)
    x = conv2d_with_bn(x, filters = 192, kernel_size=(3,3), weight_decay=weight_decay, name = '2b', bn=bn)
    
    x = inception_block(x, (64, (96, 128), (16, 32), 32),
                           weight_decay=weight_decay,
                           name='inception3a', bn=bn)
    
    x = inception_block(x, (128, (128, 192), (32, 96), 64),
                           weight_decay=weight_decay,
                           name='inception3b', bn=bn)
    
    x = MaxPool2D(pool_size=(3,3), strides=(2,2), padding = 'same', name = '3pool')(x)
    
    x = inception_block(x , (192, (96, 208), (16, 48), 64), weight_decay=weight_decay,
                           name='inception4a', bn=bn)
    x, whistle1 = inception_block(x , (160, (112, 224), (24, 64), 64), weight_decay=weight_decay,
                           name='inception4b', bn=bn, use_whistle = True, numclasses = numclasses)
    
    x = inception_block(x , (128, (128, 256), (24, 64), 64),weight_decay=weight_decay,
                           name='inception4c', bn=bn)
    x = inception_block(x , (112, (144, 288), (32, 64), 64),weight_decay=weight_decay,
                           name='inception4d', bn=bn)
    x,  whistle2 = inception_block(x , (256, (160, 320), (32, 128), 128), weight_decay=weight_decay,
                           name='inception4e', bn=bn, use_whistle = True, numclasses = numclasses)
    
                        
    x = MaxPool2D(pool_size=(3,3), strides = (2,2), padding = 'same', name = '4pool')(x)
    
    x = inception_block(x , (256, (160, 320), (32, 128), 128), weight_decay=weight_decay,
                           name='inception5a', bn=bn)
    x = inception_block(x , (384, (192, 384), (48, 128), 128), weight_decay=weight_decay,
                           name='inception5b', bn=bn)
    
    # average pool
    x = AveragePooling2D(pool_size=(8, 8), strides=(1, 1), padding='valid', name='avg8x8')(x)
    # x = Dropout(0.4)(x)
    x = Flatten(name='flatten')(x)
    x = Dense(numclasses, activation='softmax', name='predictions')(x)
    model = Model(inp, [x, whistle1,whistle2] , name='inception_v1')
    return model

In [0]:
from keras.datasets import cifar100
import numpy as np
from matplotlib import pyplot as plt
from keras.utils import to_categorical

In [9]:
(x_train, y_train),(x_test, y_test) = cifar100.load_data()

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz


In [0]:
num_classes = np.max(y_train) - np.min(y_train) + 1
epochs = 50
batch_size = 128
weight_decay = 5e-4
lr = 1e-1

In [0]:
x_train = x_train.astype('float32') 
x_test = x_test.astype('float32') 
x_train /= 255
x_test /= 255
y_train = to_categorical(y_train, num_classes=num_classes)
y_test = to_categorical(y_test, num_classes=num_classes)

In [0]:
inceptionv1 = inceptionNet(input_shape=(32,32,3), numclasses=num_classes, weight_decay=weight_decay)

In [0]:
opt = keras.optimizers.SGD(lr=lr, momentum=0.9, nesterov=False)
#opt = keras.optimizers.Adam(lr=lr)

In [24]:
inceptionv1.compile(optimizer=opt,
                    loss='categorical_crossentropy',
                    loss_weights = [1.0, 0.3, 0.3],
                    metrics=['accuracy'])
inceptionv1.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 32, 32, 3)    0                                            
__________________________________________________________________________________________________
2a-conv (Conv2D)                (None, 32, 32, 64)   192         input_1[0][0]                    
__________________________________________________________________________________________________
2a-bn (BatchNormalization)      (None, 32, 32, 64)   192         2a-conv[0][0]                    
__________________________________________________________________________________________________
2a-relu (Activation)            (None, 32, 32, 64)   0           2a-bn[0][0]                      
__________________________________________________________________________________________________
2b-conv (C

In [0]:
history = inceptionv1.fit(x_train, [y_train, y_train, y_train],
                                batch_size = batch_size,
                                epochs = epochs,
                                verbose = 1,
                                validation_data = (x_test,[y_test,y_test,y_test]))

Train on 50000 samples, validate on 10000 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50