# Residual connections and CNN depth

In [0]:
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.layers import *
from tensorflow.keras.models import *
import matplotlib.pyplot as plt
from tensorflow.keras.utils import plot_model

In [0]:
( train_images, train_labels ), ( test_images, test_labels ) = mnist.load_data()
print('Validation dataset:')
print(test_images.shape)
print(test_labels.shape)

In [0]:
plt.matshow( test_images[2], cmap = 'gray')
plt.show()
print(test_labels[2])

In [0]:
# Use the Test Set to do the training (because it is smaller)
train_images = test_images.reshape( ( 10000, 28, 28, 1 ) )
train_images = train_images.astype( 'float32' ) / 255
print("Before:")
print( test_labels.shape )
print( test_labels[0] )

train_labels = to_categorical( test_labels )
print("After:")
print( train_labels.shape )
print( train_labels[0] )

### I. A deep fully convolutional network does not learn (when depth = 16)

In [0]:
input_img = Input(shape=(28,28,1))
output1 = Conv2D(16, (5, 5), activation='relu')(input_img)
for i in range(16):
    output2 = Conv2D(16, (5, 5), padding = 'same', activation='relu')(output1)
    output1 = output2
output3 = Flatten()(output2)
output4 = Dense(units = 16, activation = 'relu')(output3)
output5 = Dense(units = 16, activation = 'relu')(output4)
output6 = Dense(units = 10, activation = 'softmax')(output5)
model = Model(inputs=input_img, outputs=output6)
model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])
print(model.summary())

In [0]:
plot_model(model, show_shapes=True, show_layer_names=True)

In [0]:
history = model.fit( train_images, train_labels, epochs = 8, batch_size = 10, validation_split = 0.2 )

## II. Making residual connections makes it learn

In [0]:
input_img = Input(shape=(28,28,1))
output1 = Conv2D(16, (5, 5), activation='relu')(input_img)
for i in range(16):
    output2 = Conv2D(16, (5, 5), padding = 'same', activation='relu')(output1)
    output2 = add([output1, output2])
    output1 = output2
output3 = Flatten()(output2)
output4 = Dense(units = 16, activation = 'relu')(output3)
output5 = Dense(units = 16, activation = 'relu')(output4)
output6 = Dense(units = 10, activation = 'softmax')(output5)
model = Model(inputs=input_img, outputs=output6)
model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])
print(model.summary())

In [0]:
plot_model(model, show_shapes=True, show_layer_names=True)

In [0]:
history = model.fit( train_images, train_labels, epochs = 8, batch_size = 10, validation_split = 0.2 )

## III. A further deeper ResNet may yield better results (but may take longer to train)
Note that each residual block now has two Conv2D layers

In [0]:
input_img = Input(shape=(28,28,1))
output1 = Conv2D(16, (5, 5), activation='relu')(input_img)
for i in range(32):
    output2 = Conv2D(16, (5, 5), padding = 'same', activation='relu')(output1)
    output2 = BatchNormalization()(output2)
    output2 = Conv2D(16, (5, 5), padding = 'same', activation='relu')(output2)
    output2 = BatchNormalization()(output2)
    output2 = add([output1, output2])
    output1 = output2
output3 = Flatten()(output2)
output4 = Dense(units = 16, activation = 'relu')(output3)
output5 = Dense(units = 16, activation = 'relu')(output4)
output6 = Dense(units = 10, activation = 'softmax')(output5)
model = Model(inputs=input_img, outputs=output6)
model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])
print(model.summary())

In [0]:
plot_model(model, show_shapes=True, show_layer_names=True)

In [0]:
history = model.fit( train_images, train_labels, epochs = 8, batch_size = 10, validation_split = 0.2 )