In [0]:
import keras
from keras.layers import Input, merge
from keras.layers import Convolution2D , concatenate ,Conv2D,Dense
from keras.layers import Activation,Dropout, GlobalAveragePooling2D,BatchNormalization,SeparableConv2D
from keras.models import Model
from keras.optimizers import Adam
from keras.utils import  to_categorical
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import tensorflow.contrib.slim as slim
from keras.layers.core import Dense,Dropout,Activation,Flatten,Lambda


In [57]:
(x_train, y_train), (x_test, y_test) = keras.datasets.fashion_mnist.load_data()
class_names = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat', 'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']

x_train = x_train/ 255.0

x_test = x_test/255.0

# Add empty color dimension
x_train = np.expand_dims(x_train, -1)  # shape: (num_samples, 28, 28, 1)
x_test = np.expand_dims(x_test, -1) # shape: (num_samples, 1)
y_test = to_categorical(y_test, num_classes=10)
y_train = to_categorical(y_train, num_classes=10)
print(y_train.shape,y_test.shape,x_train.shape,x_test.shape)
print(type(x_train))

Downloading data from http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Downloading data from http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Downloading data from http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
Downloading data from http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz
(60000, 10) (10000, 10) (60000, 28, 28, 1) (10000, 28, 28, 1)
<class 'numpy.ndarray'>


In [0]:
# squeezenet has bottle neck modules- firmodules
def build_squeezenet(input_shape=(28, 28, 1), n_classes=10):
    
    input_layer = Input(shape = input_shape)
    
    out = Conv2D(96, kernel_size =(3, 3), activation='relu')(input_layer)
    # no mxpooling done since small images 
    out = fire_module(out,squeeze =16 , expansion =64)
    out = fire_module(out,squeeze = 16 , expansion=64)
    
    out = fire_module(out,squeeze= 32,expansion = 128)
    out = fire_module(out,squeeze= 32,expansion = 128)
    
    out = fire_module(out , squeeze = 48 ,expansion=192)
    out = fire_module(out,squeeze = 48 , expansion = 192)
    
    out = fire_module(out,squeeze = 64 , expansion = 256)
    out = fire_module(out,squeeze = 64 , expansion = 256)
    
    out = Dropout(0.2)(out)
    
    out = Conv2D(10,kernel_size=(1,1),padding='valid',activation='relu')(out)
    out = GlobalAveragePooling2D()(out)
    out = Dense(10, activation="softmax")(out)
    
    model = Model(input_layer, out, name='squeezenet')
    return model 
    


In [0]:
def fire_module(input_layer, squeeze =16 , expansion =32):

# a definition for fire module et.al https://github.com/rcmalli/keras-squeezenet/
# inspiration
# squeezing using 1x1 kernel and expansion with 1x1 and 3x3. Concatenate the output to get best result.

    fire_sq = Conv2D(squeeze,1,1,activation='relu')(input_layer)
    fire_exp1 = Conv2D(expansion ,kernel_size=(1,1),activation='relu',padding='valid')(fire_sq)
    fire_exp2 = Conv2D(expansion ,kernel_size=(3,3),activation='relu',padding='same')(fire_sq)
    out = concatenate([fire_exp1,fire_exp2], axis=3)

    return out

In [65]:
model = build_squeezenet(input_shape=(28, 28, 1), n_classes=10)
# model_dw = build_squeezenet_depthwise()

model.compile(
    optimizer=Adam(lr=1e-3), loss='categorical_crossentropy',
    metrics=['accuracy'])
# model.summary()
model.fit(x=x_train, 
            y=y_train, 
            batch_size=128,
            epochs=10, 
            verbose=1,  
            validation_split=0.2,
            validation_data=(x_test,y_test), 
            shuffle=True)

  import sys
  import sys
  import sys
  import sys


Train on 60000 samples, validate on 10000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f28b78a4390>

In [0]:
#building squeezenet 
def build_squeezenet_depthwise(input_shape, width_multiplier = 1):
    
    # call depthwise convolution - step 1 
    # call pointwise conolution - step 2 
    

    input_layer = Input((input_shape))
    
    # the spatial convolutional network is 1x3 and then 3x1 . Implementd in the below function 

    x = Convolution2D(int(32), (3, 3), padding='same')(input_layer)
    # each layer is passed through a bath normalization , helps in normalizing .
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    

    x = DepthwiseConvolution2D(x,int(32), padding='same')
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = Convolution2D(int(64), (1, 1), strides=(1, 1), padding='same')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    x = DepthwiseConvolution2D(x,int(64), padding='same')
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = Convolution2D(int(128), (1, 1), strides=(1, 1), padding='same')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    x = DepthwiseConvolution2D(x,int(128), padding='same')
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = Convolution2D(int(128) ,(1, 1), strides=(1, 1), padding='same')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    x = DepthwiseConvolution2D(x,int(128), padding='same')
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = Convolution2D(int(256), (1, 1), strides=(1, 1), padding='same')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    x = DepthwiseConvolution2D(x,int(256), padding='same')
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = Convolution2D(int(256), (1, 1), strides=(1, 1), padding='same')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    x = DepthwiseConvolution2D(x,int(256), padding='same')
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = Convolution2D(int(512), (1, 1), strides=(1, 1), padding='same')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    
    x = Conv2D(10,kernel_size=(1,1),padding='valid',activation='relu')(x)
    x = GlobalAveragePooling2D()(x)
    
   
    x = Dense(10, activation='softmax')(x)
    
    
    model = Model(input_layer, x, name='mobilenet')
    return model 
    
    
    
    
    

def DepthwiseConvolution2D( inputs , filter_size, width=1, padding='same') :
    
    # call depthwise convolution - step 1 
    # call pointwise conolution - step 2 
    
    # the seperable convoltuion makes 3x3 filters 
    #applied first 1x3 and then 3x1. 
    
    x = SeparableConv2D(filter_size, kernel_size=(3,3) , strides = (1, 1),depth_multiplier= 1)(inputs)
#     out = slim.separable_convolution2d(inputs,num_outputs=None,
#                                                   stride=(2,2),
#                                                   padding ="SAME",
#                                                   depth_multiplier=1,
#                                                   kernel_size=[3, 3])
    
    num_pwc_filters = int(filter_size*width)
    
#     batch =slim.batch_norm(out)
    x = BatchNormalization()(x)
    

    
#     point_out=  slim.convolution2d(batch,num_pwc_filters,kernel_size=[1, 1])
    x = Convolution2D(filter_size,kernel_size =(1, 1), strides=(1, 1), padding='same')(x)
   
    x = BatchNormalization()(x)
    print(x.shape)

    return x
    
    

In [59]:
model_dw = build_squeezenet_depthwise(input_shape= (28,28,1))

model_dw.compile(
    optimizer=Adam(lr=1e-3), loss='categorical_crossentropy',
    metrics=['accuracy'])
# model.summary()
model_dw.fit(x=x_train, 
            y=y_train, 
            batch_size=128,
            epochs=10, 
            verbose=1,  
            validation_split=0.2,
            validation_data=(x_test,y_test), 
            shuffle=True)

(?, 26, 26, 32)
(?, 24, 24, 64)
(?, 22, 22, 128)
(?, 20, 20, 128)
(?, 18, 18, 256)
(?, 16, 16, 256)
Instructions for updating:
Use tf.cast instead.
Train on 60000 samples, validate on 10000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f29d10b1c88>

Summary of the models have been given below. I believe that Mobile net performs better than the squeezenet version given above since, the spatial convolution preserves features better than the Squeeze model and the convoultional redcues complexity.


Squeezenet is preserving accuracy with few parameters.The Squeeze Module decreases the number of input channels significantly.
The Expand Module then increases the number of input channels again.



MobileNet -MobileNet  is a stack of the separable convolution modules which are composed of depthwise conv and conv1x1 (pointwise conv). It uses layers such as depthwise convolution  which significantly reduces the computational cost by omitting convolution in channel domain.I have used the V1 version. 

Given more time I will do hyperparamter tuning and implement on larger size images to check for spatial reductions. Applying in different datasets with more ambigous data. 

It can be seen that although Mobilenet performs better it is still smaller in size compared to Squeezenet. Hence it is a matter of tradeoff, we could inreality tune squeeenet and make it equalized. 

If the intention is make Mobilenet smaller , we could apply L1 norm to eleiminate a few parameters and make it smalers

In [60]:
model_dw.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_23 (InputLayer)        (None, 28, 28, 1)         0         
_________________________________________________________________
conv2d_120 (Conv2D)          (None, 28, 28, 32)        320       
_________________________________________________________________
batch_normalization_197 (Bat (None, 28, 28, 32)        128       
_________________________________________________________________
activation_142 (Activation)  (None, 28, 28, 32)        0         
_________________________________________________________________
separable_conv2d_32 (Separab (None, 26, 26, 32)        1344      
_________________________________________________________________
batch_normalization_198 (Bat (None, 26, 26, 32)        128       
_________________________________________________________________
conv2d_121 (Conv2D)          (None, 26, 26, 32)        1056      
__________

In [66]:
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_25 (InputLayer)           (None, 28, 28, 1)    0                                            
__________________________________________________________________________________________________
conv2d_159 (Conv2D)             (None, 26, 26, 96)   960         input_25[0][0]                   
__________________________________________________________________________________________________
conv2d_160 (Conv2D)             (None, 26, 26, 16)   1552        conv2d_159[0][0]                 
__________________________________________________________________________________________________
conv2d_161 (Conv2D)             (None, 26, 26, 64)   1088        conv2d_160[0][0]                 
__________________________________________________________________________________________________
conv2d_162