In [6]:
import tensorflow as tf
import keras
import numpy as np

In [2]:
# Residual connection : Used so that the noise to the input that occurs during back propagation is minimized
# We add a residual connection so that the input of the layer is added to the output of the layer( the shape of the output and input should be the same)

In [3]:
#x ------> input
#residual = x
#x = block(x) # block in the layer
#x = add([x,residual]) # adding the input and the output of the layer(the output will have the full information of the input)

In [4]:
# However , in convolutional layers, the shapes of the input and the output is different, if the number of filters is increased or max pooling is introduced
# We use a 1 X 1 Conv2D layer with no activation to add the input and the output of the layer (padding = "same" required for this)
# If max pooling is introduced, then if max pooling = 2 , then next 1X1 conv2D layer will have strides=2 to match the downsampling caused by the max pooling, and then the residual is added


In [7]:
from keras.layers import Input,Conv2D,MaxPool2D,add


In [16]:
# Applying residual connections to the convolutional block
input_layer = Input(shape=(32,32,3))

conv_layer1 = Conv2D(32,3,activation="relu")(input_layer)
residual_1 = conv_layer1 # Output of conv_layer 1 , hence input of conv_layer2

#Conv block around which we create the residuals
#For conv_layer 2 , layer is defined along with max pooling 
conv_layer2 = Conv2D(64,3,activation="relu",padding="same")(conv_layer1)
max_pool1 = MaxPool2D(2,padding="same")(conv_layer2)

# residual_2 is the 1X1 layer with no activation(64 is the number of kernels that will be similar to the previous conv layer), strides=2 since max pooling = 2, and its input will be the previous residual(because we are converting the input into the size of the output of the conv layer 2)
residual_2 = Conv2D(64,1,strides=2)(residual_1) 
add_layer = add([max_pool1,residual_2]) # Adding the previous formatted input with the output of the max pool layer i.e the conv block output of conv_layer2


In [8]:
from keras.layers import Rescaling

In [40]:
# Defining a function which creates a residual block with 2 convolutional layers and depending on the pooling parameter, it will have strides or not

input_layer = Input(shape=(32,32,3))
rescale_layer = Rescaling(1./255)(input_layer)

def build_residual_block(previous_layer,filters,pool_size=0,pooling=False): # Pooling default value as False
    residual = previous_layer
    conv_layer1 = Conv2D(filters,3,activation="relu",padding="same")(previous_layer)
    conv_layer2 = Conv2D(filters,3,activation="relu",padding="same")(conv_layer1)
    if pooling == False:
        residual_reshape = Conv2D(filters,1,padding="same")(residual)
        add_layer = add([conv_layer2,residual_reshape])
        return add_layer
    elif pooling==True:
        max_pool1 = MaxPool2D(pool_size,padding="same")(conv_layer2)
        residual_reshape = Conv2D(filters,1,strides=pool_size)(residual)
        add_layer = add([max_pool1,residual_reshape])
        return add_layer


In [9]:
from keras.models import Model
from keras.layers import Dense, GlobalAveragePooling2D

In [48]:
res_block1 = build_residual_block(rescale_layer, filters=64, pool_size=2, pooling=True)
res_block2 = build_residual_block(res_block1, filters=128, pool_size=2, pooling=False)

# Apply Global Average Pooling
gap_layer = GlobalAveragePooling2D()(res_block2) # Used as an alternative to flatten() : averages the values of each channel across the spatial dimensions (width and height) of the feature maps, resulting in a single value per channel.

# Add a dense layer
output_layer = Dense(1, activation='softmax')(gap_layer)

# Define the model
model = Model(inputs=input_layer, outputs=output_layer)

# Display model summary
model.summary()

Model: "model_9"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_14 (InputLayer)          [(None, 32, 32, 3)]  0           []                               
                                                                                                  
 rescaling_4 (Rescaling)        (None, 32, 32, 3)    0           ['input_14[0][0]']               
                                                                                                  
 conv2d_59 (Conv2D)             (None, 32, 32, 64)   1792        ['rescaling_4[0][0]']            
                                                                                                  
 conv2d_60 (Conv2D)             (None, 32, 32, 64)   36928       ['conv2d_59[0][0]']              
                                                                                            

In [49]:
# Batch normalization : Used for normalization of outputs of the convolutional layers
# If possible, try to apply normalization before applying activation like below

In [10]:
from keras.layers import BatchNormalization, Activation

In [53]:
# One convolution layer
#conv_layer = Conv2D(32,3,use_bias=False)
#conv_layer = BatchNormalization()(conv_layer)
#conv_layer = Activation("relu")(conv_layer)

In [1]:
# Always freeze the batch normalization layers before fine tuning a model (otherwise they will keep updating their internal mean and variance
# used during normalization, which can interfere with the very small updates applied to surrounding convolutional layers)

In [2]:
# Depthwise seperable convolutional layers

In [3]:
# https://www.youtube.com/watch?v=vVaRhZXovbw  : Follow this link to understand how the depthwise convolution work
# Leads to less computation by dividing the input into different feature groups(each channel is 1 feature group) , and the kernels will be 
# (1 x 1 X number of channels or feature groups) pointwise convolutions, so that each cell of the output matrix will have info about each feature group

In [4]:
# For input layer, number of channels or features groups is 3 for RGB images and 1 for greyscale
# For deeper layers, number of channels or feature groups equals to the number of filters

In [5]:
# Applying all the necessery architecture patterns and building a simpler Xception model from scratch

In [15]:
from keras import Sequential
from keras.layers import RandomFlip,RandomRotation,RandomZoom , SeparableConv2D,Dropout

In [13]:
data_augmentation = Sequential(
    [
        RandomFlip("horizontal"),
        RandomRotation(0.1),
        RandomZoom(0.2)
    ]
)


In [20]:
input_layer = Input(shape=(180,180,3))

data_aug_layer = data_augmentation(input_layer)

rescale_layer = Rescaling(1./255)(data_aug_layer)
conv_layer1 = Conv2D(filters=32,kernel_size=5,use_bias=False)(rescale_layer) # We start with Conv2D instead of seperable
# Reason : Assumption is feature channels should be largely independent to use Seperable Conv2D, but in case of RGB Channels, they are depedent
# So, the first layer will be a normal conv2D layer, after that we will be using Seperable Conv2D

# For loop to build the convolutional blocks
for size in [32,64,128,256,512] : # size variable will increase when the deeper convolutional block layers are build to maintain the pyramid structure
    residual_input = conv_layer1

    # Applying Batch normalization before applying activation function
    batch_norm1 = BatchNormalization()(conv_layer1)
    activation1 = Activation("relu")(batch_norm1)
    seperable_conv_layer1 = SeparableConv2D(size,kernel_size=3,padding="same",use_bias=False)(activation1)

    # Applying Batch normalization before applying activation function
    batch_norm2 = BatchNormalization()(seperable_conv_layer1)
    activation2 = Activation("relu")(batch_norm2)
    seperable_conv_layer2 = SeparableConv2D(size,kernel_size=3,padding="same",use_bias=False)(activation2)

    max_pool_layer = MaxPool2D(3,strides=2,padding="same")(seperable_conv_layer2)

    residual_output = Conv2D(size,1,strides=2,padding="same",use_bias=False)(residual_input) # Converting the residual input tensor to the output shape of max pool layer
    add_layer = add([max_pool_layer,residual_output]) # adding the residual and the output tensor of max pool layer

global_avg_layer = GlobalAveragePooling2D()(add_layer) # Used in place of flatten() 
dropout_layer = Dropout(0.5)(global_avg_layer)
output_layer = Dense(1,activation="sigmoid")(dropout_layer)

model = Model(inputs=input_layer,outputs=output_layer)



Each convolution block composition:
1. Residual_input recorded
2. 1 Seperable Conv2D layer with batch normalization applied before activation function
3. 1 Seperable Conv2D layer with batch normalization applied before activation function
4. 1 Max Pooling layer
5. Converting the residual_input to a tensor which can be added by the output of the max pooling layer i.e the output of the convolution block
6. Adding the residual and the output tensor of max pooling layer

In [21]:
model.summary()

Model: "model_6"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_3 (InputLayer)           [(None, 180, 180, 3  0           []                               
                                )]                                                                
                                                                                                  
 sequential (Sequential)        (None, 180, 180, 3)  0           ['input_3[0][0]']                
                                                                                                  
 rescaling_2 (Rescaling)        (None, 180, 180, 3)  0           ['sequential[2][0]']             
                                                                                                  
 conv2d_12 (Conv2D)             (None, 176, 176, 32  2400        ['rescaling_2[0][0]']      

Model description:

Input Layer --> Data Augmentation layer --> Rescaling layer --> Convolution layer --> Seperable Conv2D block with number of filters 32 --> Seperable Conv2D block with number of filters 64 --> Seperable Conv2D block with number of filters 128 --> Seperable Conv2D block with number of filters 256 --> Seperable Conv2D block with number of filters 512 --> Global Average Pooling layer( same functionality as Flatten layer) --> Dropout layer --> Output layer