# Design Choices in Convolutional Neural Networks

# **All examples below, I got very low accuracy values with optimizer Adadelta. So I had to change it to Adam.** 
# **Ömer Faruk Güzel**

###  Importing packages

In [1]:
import numpy as np
import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D
from keras import backend as K
from keras.preprocessing import image
from keras.applications.mobilenet import MobileNet
from keras.applications.vgg16 import preprocess_input, decode_predictions
from keras.models import Model
import timeit

import warnings
warnings.filterwarnings('ignore')

### Preparing Dataset

In [2]:
batch_size = 128
num_classes = 10
epochs = 2

# input image dimensions
img_rows, img_cols = 28, 28

# the data, shuffled and split between train and test sets
(x_train, y_train), (x_test, y_test) = mnist.load_data()

if K.image_data_format() == 'channels_first':
    x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
    x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
    input_shape = (1, img_rows, img_cols)
else:
    x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
    x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
    input_shape = (img_rows, img_cols, 1)

x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
x_train shape: (60000, 28, 28, 1)
60000 train samples
10000 test samples


## Part 1: Influence of convolution size

Try the models with different convolution sizes 5x5, 7x7 and 9x9 etc.

Analyze the number of model parameters, accuracy and training time

### Model with (3 x 3) Convolution

In [21]:
K.clear_session() 
model = Sequential()
model.add(Conv2D(8, kernel_size=(3, 3), activation='relu', input_shape=input_shape))
model.add(Conv2D(16, (3, 3), activation='relu'))
model.add(Flatten())
model.add(Dense(32, activation='relu'))
model.add(Dense(num_classes, activation='softmax'))
model.summary()
model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adam(), metrics=['accuracy'])
start = timeit.default_timer()
model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1, validation_data=(x_test, y_test))
score, acc = model.evaluate(x_test, y_test, batch_size=batch_size)
end = timeit.default_timer()
print('Test accuracy:', acc)
print("Time Taken to run the model:",end - start, "seconds")

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 26, 26, 8)         80        
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 24, 24, 16)        1168      
_________________________________________________________________
flatten (Flatten)            (None, 9216)              0         
_________________________________________________________________
dense (Dense)                (None, 32)                294944    
_________________________________________________________________
dense_1 (Dense)              (None, 10)                330       
Total params: 296,522
Trainable params: 296,522
Non-trainable params: 0
_________________________________________________________________
Epoch 1/2
Epoch 2/2
Test accuracy: 0.9778000116348267
Time Taken to run the model: 4.878852159999951 seconds


### Try models with different Convolution sizes

In [22]:
K.clear_session()
start = timeit.default_timer()   
model = Sequential()
model.add(Conv2D(8, kernel_size=(5, 5), activation='relu', input_shape=input_shape))
model.add(Conv2D(16, (5, 5), activation='relu'))
model.add(Flatten())
model.add(Dense(32, activation='relu'))
model.add(Dense(num_classes, activation='softmax'))
model.summary()
model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adam(), metrics=['accuracy'])
model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1, validation_data=(x_test, y_test))
score, acc = model.evaluate(x_test, y_test, batch_size=batch_size)
end = timeit.default_timer()
print('Test accuracy:', acc)
print("Time Taken to run the model:",end - start, "seconds")  


Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 24, 24, 8)         208       
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 20, 20, 16)        3216      
_________________________________________________________________
flatten (Flatten)            (None, 6400)              0         
_________________________________________________________________
dense (Dense)                (None, 32)                204832    
_________________________________________________________________
dense_1 (Dense)              (None, 10)                330       
Total params: 208,586
Trainable params: 208,586
Non-trainable params: 0
_________________________________________________________________
Epoch 1/2
Epoch 2/2
Test accuracy: 0.9818999767303467
Time Taken to run the model: 4.893595889000153 seconds


In [23]:
K.clear_session()
start = timeit.default_timer()   
model = Sequential()
model.add(Conv2D(8, kernel_size=(7, 7), activation='relu', input_shape=input_shape))
model.add(Conv2D(16, (7, 7), activation='relu'))
model.add(Flatten())
model.add(Dense(32, activation='relu'))
model.add(Dense(num_classes, activation='softmax'))
model.summary()
model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adam(), metrics=['accuracy'])
model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1, validation_data=(x_test, y_test))
score, acc = model.evaluate(x_test, y_test, batch_size=batch_size)
end = timeit.default_timer()
print('Test accuracy:', acc)
print("Time Taken to run the model:",end - start, "seconds")  


Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 22, 22, 8)         400       
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 16, 16, 16)        6288      
_________________________________________________________________
flatten (Flatten)            (None, 4096)              0         
_________________________________________________________________
dense (Dense)                (None, 32)                131104    
_________________________________________________________________
dense_1 (Dense)              (None, 10)                330       
Total params: 138,122
Trainable params: 138,122
Non-trainable params: 0
_________________________________________________________________
Epoch 1/2
Epoch 2/2
Test accuracy: 0.9825999736785889
Time Taken to run the model: 4.419941024999844 seconds


In [24]:
K.clear_session()
start = timeit.default_timer()   
model = Sequential()
model.add(Conv2D(8, kernel_size=(3, 3), activation='relu', input_shape=input_shape))
model.add(Conv2D(16, (9, 9), activation='relu'))
model.add(Flatten())
model.add(Dense(32, activation='relu'))
model.add(Dense(num_classes, activation='softmax'))
model.summary()
model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adam(), metrics=['accuracy'])
model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1, validation_data=(x_test, y_test))
score, acc = model.evaluate(x_test, y_test, batch_size=batch_size)
end = timeit.default_timer()
print('Test accuracy:', acc)
print("Time Taken to run the model:",end - start, "seconds")  


Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 26, 26, 8)         80        
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 18, 18, 16)        10384     
_________________________________________________________________
flatten (Flatten)            (None, 5184)              0         
_________________________________________________________________
dense (Dense)                (None, 32)                165920    
_________________________________________________________________
dense_1 (Dense)              (None, 10)                330       
Total params: 176,714
Trainable params: 176,714
Non-trainable params: 0
_________________________________________________________________
Epoch 1/2
Epoch 2/2
Test accuracy: 0.9858999848365784
Time Taken to run the model: 4.8116716320000705 seconds


### Write your findings about activations here?

1.   We can clearly see that increasing convolution size decreases the number of trainable parameters until 7x7 sized convolution and increased in 9x9. The training time also showed non-linear change. It decreased until 7x7 sized convolution and increased in 9x9. 
2.   Accuracy showed linear change. When we increase the size of convolution, accuracy increased too.




## Part 2: Influence of Striding

Try the models with different stride sizes such as 2,3,4 etc.

Analyze the number of model parameters, accuracy and training time

### Model with Convolution with 2 Steps

In [25]:
start = timeit.default_timer()   
model = Sequential()
model.add(Conv2D(8, kernel_size=(3, 3), strides=2, activation='relu', input_shape=input_shape))
model.add(Conv2D(16, (3, 3), strides=2, activation='relu'))
model.add(Flatten())
model.add(Dense(32, activation='relu'))
model.add(Dense(num_classes, activation='softmax'))
model.summary()
model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adam(), metrics=['accuracy'])
model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1, validation_data=(x_test, y_test))
score, acc = model.evaluate(x_test, y_test, batch_size=batch_size)
end = timeit.default_timer()
print('Test accuracy:', acc)
print("Time Taken to run the model:",end - start, "seconds")  

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_2 (Conv2D)            (None, 13, 13, 8)         80        
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 6, 6, 16)          1168      
_________________________________________________________________
flatten_1 (Flatten)          (None, 576)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 32)                18464     
_________________________________________________________________
dense_3 (Dense)              (None, 10)                330       
Total params: 20,042
Trainable params: 20,042
Non-trainable params: 0
_________________________________________________________________
Epoch 1/2
Epoch 2/2
Test accuracy: 0.9646999835968018
Time Taken to run the model: 4.559287178999966 seconds


In [28]:
start = timeit.default_timer()   
model = Sequential()
model.add(Conv2D(8, kernel_size=(3, 3), strides=3, activation='relu', input_shape=input_shape))
model.add(Conv2D(16, (3, 3), strides=2, activation='relu'))
model.add(Flatten())
model.add(Dense(32, activation='relu'))
model.add(Dense(num_classes, activation='softmax'))
model.summary()
model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adam(), metrics=['accuracy'])
model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1, validation_data=(x_test, y_test))
score, acc = model.evaluate(x_test, y_test, batch_size=batch_size)
end = timeit.default_timer()
print('Test accuracy:', acc)
print("Time Taken to run the model:",end - start, "seconds")  

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_4 (Conv2D)            (None, 9, 9, 8)           80        
_________________________________________________________________
conv2d_5 (Conv2D)            (None, 4, 4, 16)          1168      
_________________________________________________________________
flatten_2 (Flatten)          (None, 256)               0         
_________________________________________________________________
dense_4 (Dense)              (None, 32)                8224      
_________________________________________________________________
dense_5 (Dense)              (None, 10)                330       
Total params: 9,802
Trainable params: 9,802
Non-trainable params: 0
_________________________________________________________________
Epoch 1/2
Epoch 2/2
Test accuracy: 0.9480999708175659
Time Taken to run the model: 4.1516758960001425 seconds


In [29]:
start = timeit.default_timer()   
model = Sequential()
model.add(Conv2D(8, kernel_size=(3, 3), strides=4, activation='relu', input_shape=input_shape))
model.add(Conv2D(16, (3, 3), strides=2, activation='relu'))
model.add(Flatten())
model.add(Dense(32, activation='relu'))
model.add(Dense(num_classes, activation='softmax'))
model.summary()
model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adam(), metrics=['accuracy'])
model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1, validation_data=(x_test, y_test))
score, acc = model.evaluate(x_test, y_test, batch_size=batch_size)
end = timeit.default_timer()
print('Test accuracy:', acc)
print("Time Taken to run the model:",end - start, "seconds")  

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_6 (Conv2D)            (None, 7, 7, 8)           80        
_________________________________________________________________
conv2d_7 (Conv2D)            (None, 3, 3, 16)          1168      
_________________________________________________________________
flatten_3 (Flatten)          (None, 144)               0         
_________________________________________________________________
dense_6 (Dense)              (None, 32)                4640      
_________________________________________________________________
dense_7 (Dense)              (None, 10)                330       
Total params: 6,218
Trainable params: 6,218
Non-trainable params: 0
_________________________________________________________________
Epoch 1/2
Epoch 2/2
Test accuracy: 0.9345999956130981
Time Taken to run the model: 3.912895664000189 seconds


### Write your findings about influence of striding here?

1.   Accuracy and number of trainable parameters inversely proportional with the size of stride and training time is directly proportional with size of stride.
2.   It depends on the task which we should choose to sacrifice, 25% of time/computation power or 3% of accuracy.




## Part 3: Influence of Padding

Try the models with padding and without padding.

Analyze the number of model parameters, accuracy and training time

### Model with (3 x 3) Convolution with Same Padding

In [33]:
start = timeit.default_timer()   
model = Sequential()
model.add(Conv2D(8, kernel_size=(3, 3), strides=1, padding='same', activation='relu', input_shape=input_shape))
model.add(Conv2D(16, (3, 3), strides=1, padding='same', activation='relu'))
model.add(Flatten())
model.add(Dense(32, activation='relu'))
model.add(Dense(num_classes, activation='softmax'))
model.summary()
model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adam(), metrics=['accuracy'])
model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1, validation_data=(x_test, y_test))
score, acc = model.evaluate(x_test, y_test, batch_size=batch_size)
end = timeit.default_timer()
print('Test accuracy:', acc)
print("Time Taken to run the model:",end - start, "seconds")

Model: "sequential_7"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_14 (Conv2D)           (None, 28, 28, 8)         80        
_________________________________________________________________
conv2d_15 (Conv2D)           (None, 28, 28, 16)        1168      
_________________________________________________________________
flatten_7 (Flatten)          (None, 12544)             0         
_________________________________________________________________
dense_14 (Dense)             (None, 32)                401440    
_________________________________________________________________
dense_15 (Dense)             (None, 10)                330       
Total params: 403,018
Trainable params: 403,018
Non-trainable params: 0
_________________________________________________________________
Epoch 1/2
Epoch 2/2
Test accuracy: 0.982699990272522
Time Taken to run the model: 4.648490133999985 seconds


In [34]:
start = timeit.default_timer()   
model = Sequential()
model.add(Conv2D(8, kernel_size=(3, 3), strides=1, padding='valid', activation='relu', input_shape=input_shape))
model.add(Conv2D(16, (3, 3), strides=1, padding='valid', activation='relu'))
model.add(Flatten())
model.add(Dense(32, activation='relu'))
model.add(Dense(num_classes, activation='softmax'))
model.summary()
model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adam(), metrics=['accuracy'])
model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1, validation_data=(x_test, y_test))
score, acc = model.evaluate(x_test, y_test, batch_size=batch_size)
end = timeit.default_timer()
print('Test accuracy:', acc)
print("Time Taken to run the model:",end - start, "seconds")  

Model: "sequential_8"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_16 (Conv2D)           (None, 26, 26, 8)         80        
_________________________________________________________________
conv2d_17 (Conv2D)           (None, 24, 24, 16)        1168      
_________________________________________________________________
flatten_8 (Flatten)          (None, 9216)              0         
_________________________________________________________________
dense_16 (Dense)             (None, 32)                294944    
_________________________________________________________________
dense_17 (Dense)             (None, 10)                330       
Total params: 296,522
Trainable params: 296,522
Non-trainable params: 0
_________________________________________________________________
Epoch 1/2
Epoch 2/2
Test accuracy: 0.9800000190734863
Time Taken to run the model: 4.585634609000408 seconds


### Write your findings about influence of padding here?

1.   Pandding effects accuracy slightly, it increases accuracy 0.26% in this particular model.
2.   Training time is effected as ~0.1 seconds, it lasts longer when padding is applied. Number of trainable parameters increased almost twice by applying padding.




## Part 4: Influence of Pooling

Try the models with different pooling window sizes such as 2x2, 3x3, 4x4 etc.

Analyze the number of model parameters, accuracy and training time

### Model with (3 x 3) Convolution with Pooling (2 x 2) 

In [12]:
start = timeit.default_timer()   
model = Sequential()
model.add(Conv2D(8, kernel_size=(3, 3), activation='relu', input_shape=input_shape))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(16, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dense(32, activation='relu'))
model.add(Dense(num_classes, activation='softmax'))
model.summary()
model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adam(), metrics=['accuracy'])
model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1, validation_data=(x_test, y_test))
end = timeit.default_timer()
print("Time Taken to run the model:",end - start, "seconds")  

Model: "sequential_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_12 (Conv2D)           (None, 26, 26, 8)         80        
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 13, 13, 8)         0         
_________________________________________________________________
conv2d_13 (Conv2D)           (None, 11, 11, 16)        1168      
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 5, 5, 16)          0         
_________________________________________________________________
flatten_6 (Flatten)          (None, 400)               0         
_________________________________________________________________
dense_12 (Dense)             (None, 32)                12832     
_________________________________________________________________
dense_13 (Dense)             (None, 10)               

### Model with (3 x 3) Convolution with Pooling (3 x 3) 

In [13]:
start = timeit.default_timer()   
model = Sequential()
model.add(Conv2D(8, kernel_size=(3, 3), activation='relu', input_shape=input_shape))
model.add(MaxPooling2D(pool_size=(3, 3)))
model.add(Conv2D(16, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(3, 3)))
model.add(Flatten())
model.add(Dense(32, activation='relu'))
model.add(Dense(num_classes, activation='softmax'))
model.summary()
model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adam(), metrics=['accuracy'])
model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1, validation_data=(x_test, y_test))
end = timeit.default_timer()
print("Time Taken to run the model:",end - start, "seconds")  

Model: "sequential_7"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_14 (Conv2D)           (None, 26, 26, 8)         80        
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 8, 8, 8)           0         
_________________________________________________________________
conv2d_15 (Conv2D)           (None, 6, 6, 16)          1168      
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 2, 2, 16)          0         
_________________________________________________________________
flatten_7 (Flatten)          (None, 64)                0         
_________________________________________________________________
dense_14 (Dense)             (None, 32)                2080      
_________________________________________________________________
dense_15 (Dense)             (None, 10)               

### Model with (3 x 3) Convolution with Pooling (4 x 4) 

In [14]:
start = timeit.default_timer()   
model = Sequential()
model.add(Conv2D(8, kernel_size=(3, 3), activation='relu', input_shape=input_shape))
model.add(MaxPooling2D(pool_size=(4, 4)))
model.add(Conv2D(16, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(4, 4)))
model.add(Flatten())
model.add(Dense(32, activation='relu'))
model.add(Dense(num_classes, activation='softmax'))
model.summary()
model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adam(), metrics=['accuracy'])
model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1, validation_data=(x_test, y_test))
end = timeit.default_timer()
print("Time Taken to run the model:",end - start, "seconds")  

Model: "sequential_8"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_16 (Conv2D)           (None, 26, 26, 8)         80        
_________________________________________________________________
max_pooling2d_4 (MaxPooling2 (None, 6, 6, 8)           0         
_________________________________________________________________
conv2d_17 (Conv2D)           (None, 4, 4, 16)          1168      
_________________________________________________________________
max_pooling2d_5 (MaxPooling2 (None, 1, 1, 16)          0         
_________________________________________________________________
flatten_8 (Flatten)          (None, 16)                0         
_________________________________________________________________
dense_16 (Dense)             (None, 32)                544       
_________________________________________________________________
dense_17 (Dense)             (None, 10)               

### Write your findings about influence of pooling here?

1.   Effects of pooling on training time showed non-linear change. It increased first, then decreased slightly. 
2.   The accuracy and number of trainable parameters effected negatively by pooling. They both decreased clearly.




# **All examples above, I got very low accuracy values with optimizer Adadelta. So I had to change it to Adam.** 