In [4]:
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Input
from tensorflow.keras.models import Model
from tensorflow.keras.utils import plot_model
from tensorflow.keras.layers import concatenate

In [10]:
# basic inception block 

def naive_inception_block(in_layer, f1, f3, f5):
    conv_1 = Conv2D(f1, (1,1), padding = 'same')(in_layer)
    conv_3 = Conv2D(f3, (3,3), padding = 'same')(in_layer)
    conv_5 = Conv2D(f5, (5,5), padding = 'same')(in_layer)
    pool = MaxPooling2D((3,3), strides=(1,1), padding='same')(in_layer)
    
    out_layer = concatenate((conv_1, conv_3, conv_5, pool), axis = 1)
    
## 1*1 convolution were added to reduce the filters before 3*3 and 5*5 convolution to improve computational performance.

def inception_block(in_layer, f1, f2_in, f2_out, f3_in, f3_out, f4_out):
    conv_1 = Conv2D(f1, (1,1), padding = 'same')(in_layer)
    
    conv_2 = Conv2D(f2_in, (1,1), padding = 'same')(in_layer)
    conv_2 = Conv2D(f2_out, (3,3), padding = 'same')(conv_2)
    
    conv_3 = Conv2D(f3_in, (1,1), padding = 'same')(in_layer)
    conv_3 = Conv2D(f3_out, (3,3), padding = 'same')(conv_3)
    
    pool = MaxPooling2D((3,3), strides=(1,1), padding='same')(in_layer)
    pool = Conv2D(f4_out, (1,1), padding = 'same')(pool)
    
    out_layer = concatenate([conv_1, conv_2, conv_3, pool], axis = -1)
    
    return out_layer

In [13]:
visible = Input(shape=(256, 256, 3))
# add inception block 1
layer = inception_block(visible, 64, 96, 128, 16, 32, 32)
# add inception block 1
layer = inception_block(layer, 128, 128, 192, 32, 96, 64)
# create model
model = Model(inputs=visible, outputs=layer)
# summarize model
model.summary()
# plot model architecture
plot_model(model, show_shapes=True, to_file='inception_module.png')

Model: "model_2"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_6 (InputLayer)            [(None, 256, 256, 3) 0                                            
__________________________________________________________________________________________________
conv2d_31 (Conv2D)              (None, 256, 256, 96) 384         input_6[0][0]                    
__________________________________________________________________________________________________
conv2d_33 (Conv2D)              (None, 256, 256, 16) 64          input_6[0][0]                    
__________________________________________________________________________________________________
max_pooling2d_5 (MaxPooling2D)  (None, 256, 256, 3)  0           input_6[0][0]                    
____________________________________________________________________________________________

![](/helper/inception.JPG)

![](helper/inception.jpg)

* By concatenation, we are stacking the filters of all the parallel branches together (axis = -1). in concatenate_2 output is 480 (128+192+96+64) 
* the small kernels have more no of filteres while 5*5 has lesser (may be for compute? )
* Feeding 480 filters to the next layer will be very compute intensive hence the 1*1 convolution. 