In [21]:
import tensorflow as tf
import os
import numpy as np
import pandas as pd
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping

In [2]:
def load_data():
    (x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
    np.random.seed(42)
    tf.random.set_seed(42)
    shuffle_idx = np.arange(len(x_train))
    np.random.shuffle(shuffle_idx)
    x_train = x_train[shuffle_idx]
    y_train = y_train[shuffle_idx]

    x_train = x_train.reshape(len(x_train), 28, 28, 1)
    x_test = x_test.reshape(len(x_test), 28, 28, 1)
    return x_train, x_test, y_train, y_test

In [3]:
def prepare_data(x_train, x_test, y_train, y_test):
    train_datagen = ImageDataGenerator(height_shift_range=0.1,
                                       width_shift_range=0.25,
                                       rotation_range=30,
                                       zoom_range=0.2,
                                       rescale=1. / 255,
                                       fill_mode='nearest')
    test_datagen = ImageDataGenerator(rescale=1. / 255)
    
    training_data = train_datagen.flow(x_train, y_train, batch_size=32, seed=42)
    validation_data = test_datagen.flow(x_test, y_test, batch_size=32, seed=42, shuffle=False)
    return training_data, validation_data

In [4]:
def build_model_1():
    tf.random.set_seed(11)
    cnn = tf.keras.models.Sequential()
    cnn.add(tf.keras.layers.Conv2D(filters=32, kernel_size=3, strides=1,
                                   data_format='channels_last',
                                   activation='relu',
                                   input_shape=(28, 28, 1),
                                   padding="same"))
    cnn.add(tf.keras.layers.MaxPool2D(pool_size=(2, 2)))
    cnn.add(tf.keras.layers.Conv2D(filters=16, kernel_size=3, strides=1,
                                   data_format='channels_last',
                                   activation='relu', padding="same"))
    cnn.add(tf.keras.layers.MaxPool2D(pool_size=(2, 2)))
    cnn.add(tf.keras.layers.Conv2D(filters=8, kernel_size=3, strides=1,
                                   data_format='channels_last',
                                   activation='relu', padding="same"))
    cnn.add(tf.keras.layers.MaxPool2D(pool_size=(2, 2)))
    cnn.add(tf.keras.layers.Flatten())
    # cnn.add(tf.keras.layers.Dense(128, activation="relu"))
    # cnn.add(tf.keras.layers.Dropout(0.2))
    cnn.add(tf.keras.layers.Dense(16, activation="relu"))
    cnn.add(tf.keras.layers.Dropout(0.2))
    cnn.add(tf.keras.layers.Dense(10, activation="softmax"))
    return cnn

In [5]:
build_model_1().summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 28, 28, 32)        320       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 14, 14, 32)       0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 14, 14, 16)        4624      
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 7, 7, 16)         0         
 2D)                                                             
                                                                 
 conv2d_2 (Conv2D)           (None, 7, 7, 8)           1160      
                                                                 
 max_pooling2d_2 (MaxPooling  (None, 3, 3, 8)          0

In [6]:
def build_model_2():
    tf.random.set_seed(11)
    cnn = tf.keras.models.Sequential()
    cnn.add(tf.keras.layers.Conv2D(filters=32, kernel_size=3, strides=1,
                                   data_format='channels_last',
                                   activation='relu',
                                   input_shape=(28, 28, 1),
                                   padding="same"))
    cnn.add(tf.keras.layers.MaxPool2D(pool_size=(2, 2)))
    cnn.add(tf.keras.layers.Conv2D(filters=16, kernel_size=3, strides=1,
                                   data_format='channels_last',
                                   activation='relu', padding="same"))
    cnn.add(tf.keras.layers.MaxPool2D(pool_size=(2, 2)))
    cnn.add(tf.keras.layers.Conv2D(filters=8, kernel_size=3, strides=1,
                                   data_format='channels_last',
                                   activation='relu', padding="same"))
    cnn.add(tf.keras.layers.MaxPool2D(pool_size=(2, 2)))
    cnn.add(tf.keras.layers.Flatten())
    cnn.add(tf.keras.layers.Dense(20, activation="relu"))
    cnn.add(tf.keras.layers.Dropout(0.2))
    cnn.add(tf.keras.layers.Dense(10, activation="softmax"))
    return cnn

In [7]:
build_model_2().summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_3 (Conv2D)           (None, 28, 28, 32)        320       
                                                                 
 max_pooling2d_3 (MaxPooling  (None, 14, 14, 32)       0         
 2D)                                                             
                                                                 
 conv2d_4 (Conv2D)           (None, 14, 14, 16)        4624      
                                                                 
 max_pooling2d_4 (MaxPooling  (None, 7, 7, 16)         0         
 2D)                                                             
                                                                 
 conv2d_5 (Conv2D)           (None, 7, 7, 8)           1160      
                                                                 
 max_pooling2d_5 (MaxPooling  (None, 3, 3, 8)         

In [8]:
def build_model_3():
    tf.random.set_seed(11)
    cnn = tf.keras.models.Sequential()
    cnn.add(tf.keras.layers.Conv2D(filters=32, kernel_size=3, strides=1,
                                   data_format='channels_last',
                                   activation='relu',
                                   input_shape=(28, 28, 1),
                                   padding="same"))
    cnn.add(tf.keras.layers.MaxPool2D(pool_size=(2, 2)))
    cnn.add(tf.keras.layers.Conv2D(filters=15, kernel_size=3, strides=1,
                                   data_format='channels_last',
                                   activation='relu', padding="same"))
    cnn.add(tf.keras.layers.MaxPool2D(pool_size=(2, 2)))
    cnn.add(tf.keras.layers.Conv2D(filters=10, kernel_size=3, strides=1,
                                   data_format='channels_last',
                                   activation='relu', padding="same"))
    cnn.add(tf.keras.layers.MaxPool2D(pool_size=(2, 2)))
    cnn.add(tf.keras.layers.Flatten())
    cnn.add(tf.keras.layers.Dense(18, activation="relu"))
    cnn.add(tf.keras.layers.Dropout(0.2))
    cnn.add(tf.keras.layers.Dense(10, activation="softmax"))
    return cnn

In [9]:
build_model_3().summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_6 (Conv2D)           (None, 28, 28, 32)        320       
                                                                 
 max_pooling2d_6 (MaxPooling  (None, 14, 14, 32)       0         
 2D)                                                             
                                                                 
 conv2d_7 (Conv2D)           (None, 14, 14, 15)        4335      
                                                                 
 max_pooling2d_7 (MaxPooling  (None, 7, 7, 15)         0         
 2D)                                                             
                                                                 
 conv2d_8 (Conv2D)           (None, 7, 7, 10)          1360      
                                                                 
 max_pooling2d_8 (MaxPooling  (None, 3, 3, 10)        

In [16]:
def train_model(model_name: str="model1"):
    x_train, x_test, y_train, y_test = load_data()
    training_data, validation_data = prepare_data(x_train, x_test, y_train, y_test)
    if model_name == "model1":
        cnn = build_model_1()
    elif model_name == "model2":
        cnn = build_model_2()
    else:
        cnn = build_model_3()
    cnn.compile(optimizer="adam", loss="sparse_categorical_crossentropy",
            metrics="sparse_categorical_accuracy")
    early_stop = EarlyStopping(monitor='val_loss', patience=3)
    history = cnn.fit(training_data, epochs=15, callbacks=[early_stop],
                  validation_data=validation_data)
    test_score = cnn.evaluate(validation_data)[1]
    return cnn, history, test_score


In [17]:
model_1, history_1, test_score_1 = train_model("model1")

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15


In [18]:
model_2, history_2, test_score_2 = train_model("model2")

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15


In [19]:
model_3, history_3, test_score_3 = train_model("model3")

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15


In [25]:
def get_model_params_count(model):
  trainableParams = np.sum([np.prod(v.get_shape()) for v in model.trainable_weights])
  nonTrainableParams = np.sum([np.prod(v.get_shape()) for v in model.non_trainable_weights])
  totalParams = trainableParams + nonTrainableParams
  return totalParams

In [26]:
comparison = pd.DataFrame({'model': ['model_1', 'model_2', 'model_3'], 'accuracy': [test_score_1, test_score_2, test_score_3],
                           'Params': [get_model_params_count(model_1), get_model_params_count(model_2), get_model_params_count(model_3)]})

In [27]:
comparison

Unnamed: 0,model,accuracy,Params
0,model_1,0.9706,7442.0
1,model_2,0.9784,7774.0
2,model_3,0.9713,7843.0
