# Imports and initializations

In [None]:
# Imports and constants
import numpy as np
import pandas as pd
import os
import csv
import multiprocessing
import cv2
import PIL
import math

from sklearn.metrics import classification_report, confusion_matrix
import tensorflow as tf
import keras
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.models import Sequential, Model
from keras.layers import Conv2D, MaxPooling2D, DepthwiseConv2D, GlobalAveragePooling2D, Reshape, Rescaling, Normalization
from keras.layers import add, multiply
from keras.layers import Activation, Dropout, Flatten, Dense, Input, Add, AveragePooling2D, ZeroPadding2D
from keras.utils import to_categorical
from keras.layers import BatchNormalization
from keras.optimizers import Adam, SGD
from keras.optimizers.schedules import CosineDecayRestarts, CosineDecay
from keras.initializers import glorot_uniform
from keras.losses import CategoricalCrossentropy

import seaborn as sns
import matplotlib.pyplot as plt
from collections import defaultdict

# Folder containing the train, validation, test sets, and the CSV files
DL_FOLDER = 'project-deep-learning-data'

# Train, validation and test set image folders
TRAIN_FOLDER = 'train_images'
VAL_FOLDER = 'val_images'
TEST_FOLDER = 'test_images'
KAGGLE_INPUT = ''

# Paths to datasets
DATASET_FOLDER = os.path.join(KAGGLE_INPUT, DL_FOLDER)
TRAIN_PATH = os.path.join(KAGGLE_INPUT, DL_FOLDER, TRAIN_FOLDER)
VAL_PATH = os.path.join(KAGGLE_INPUT, DL_FOLDER, VAL_FOLDER)
TEST_PATH = os.path.join(KAGGLE_INPUT, DL_FOLDER, TEST_FOLDER)

# Paths to the CSV files containing image-label associations 
TRAIN_LABELS_PATH = os.path.join(KAGGLE_INPUT, DL_FOLDER, 'train.csv')
VAL_LABELS_PATH = os.path.join(KAGGLE_INPUT, DL_FOLDER, 'val.csv')
TEST_LABELS_PATH = os.path.join(KAGGLE_INPUT, DL_FOLDER, 'test.csv')

# Name of submission file
SUBMISSION_FILE = "submission.csv"

In [None]:
# Processing for training using GPU
sess = tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(log_device_placement=True))
physical_device = tf.config.experimental.list_physical_devices('GPU')
print(f'Device found : {physical_device}')
tf.config.experimental.set_memory_growth(physical_device[0], True)

In [None]:
# Some constants related to model training
BATCH_SIZE = 32
NUM_CLASSES = 100
IMG_SIZE = 64

In [None]:
# Read the train, validation and test labels from the CSV files
def get_df_labels(df_path):
    df = pd.read_csv(df_path)
    if 'Class' in df.columns:
        df['Class'] = df['Class'].astype(str)
    return df
    
df_train_labels = get_df_labels(TRAIN_LABELS_PATH)
df_val_labels = get_df_labels(VAL_LABELS_PATH)
df_test_labels = get_df_labels(TEST_LABELS_PATH)

# Create the dataset

In [None]:
# Read an image and resize it to a specified size
def get_image(image_path, image_size):
    image = cv2.imread(image_path)
    image = cv2.resize(image, image_size)
    return image

# Read dataset based on CSV files. Used for training and validation set (as we have labels)
def read_dataset(df_labels, folder, image_size):
    data = []
    labels = []
    for _, row in df_labels.iterrows():
        d = row.to_dict()
        image_name = d['Image']
        image_label = d['Class']
        
        image_path = os.path.join(DATASET_FOLDER, folder, image_name)
        data.append(get_image(image_path, (image_size, image_size)))
        labels.append(image_label)
    
    data = np.array(data)
    labels = np.array(labels, dtype=int)
    labels = to_categorical(labels, num_classes=NUM_CLASSES)
    return data, labels

# Read test set based on the test.csv file.
# Returns the dataset as a numpy array and the name of the test images.
def read_test_set(df_labels, folder, image_size):
    data = []
    filenames = []
    for _, row in df_labels.iterrows():
        d = row.to_dict()
        image_name = d['Image']
        filenames.append(image_name)
        
        image_path = os.path.join(DATASET_FOLDER, folder, image_name)
        data.append(get_image(image_path, (image_size, image_size)))
        
    return np.array(data), filenames

In [None]:
# Read train, validation and test set
X_train, y_train = read_dataset(df_train_labels, TRAIN_FOLDER, 64)
X_val, y_val = read_dataset(df_val_labels, VAL_FOLDER, 64)
X_test, test_filenames = read_test_set(df_test_labels, TEST_FOLDER, 64)

In [None]:
# Generators for augmentation and standardization
datagen = ImageDataGenerator(rotation_range=40, width_shift_range=0.2,
                             height_shift_range=0.2, rescale=1./255.0, 
							 shear_range=0.2, zoom_range=0.2,
                             horizontal_flip=True, fill_mode='nearest')

val_test_datagen = ImageDataGenerator(rescale=1./255)


In [None]:
# Create generators for train, validation and test set
train_generator = datagen.flow(X_train, y_train, batch_size=BATCH_SIZE, shuffle=True)
val_generator = val_test_datagen.flow(X_val, y_val, batch_size=BATCH_SIZE, shuffle=False)
test_generator = val_test_datagen.flow(X_test, batch_size=BATCH_SIZE, shuffle=False)

In [None]:
# Function to get number of steps per epoch (based on batch size)
def get_steps_per_epoch(generator):
    return len(generator)

# Models

In [None]:
# Reset train and validation sets
def reset_generators(train_generator, val_generator):
    train_generator.reset()
    val_generator.reset()

# Function to fit model. Fitting uses a checkpoint to save the model that has
# the best accuracy on the validation set (which will be saved to a h5 file).
def fit_model(model, train_generator, val_generator, num_epochs=40, checkpoint_name='best_model.h5'):
    reset_generators(train_generator, val_generator)
    
    checkpoint_callback = ModelCheckpoint(checkpoint_name, save_best_only=True, monitor='val_accuracy', mode='max', verbose=1)
    model.fit(train_generator,
              validation_data=val_generator,
              epochs=num_epochs,
              workers=multiprocessing.cpu_count(),
              callbacks=[checkpoint_callback])

## Vanilla CNN models

In [None]:
# CNN1 model architecture. 
# Can be trained using either Adam or SGD (with the default) values.
def cnn_1_model(use_adam=True):
    model = Sequential()
    model.add(Conv2D(16, (3, 3), input_shape=(IMG_SIZE, IMG_SIZE, 3), activation="relu"))
    model.add(MaxPooling2D(pool_size=(2, 2)))

    model.add(Conv2D(32, (3, 3), activation="relu"))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    
    model.add(Conv2D(16, (3, 3), activation="relu"))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    
    model.add(Flatten())
    model.add(Dense(64, activation="relu"))
    model.add(Dropout(0.5))
    model.add(Dense(NUM_CLASSES, activation="softmax"))
    
    if use_adam:
        model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    else:
        model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=['accuracy'])
    return model

In [None]:
# Create models for CNN1 using both Adam and SGD and train them
cnn_1_adam = cnn_1_model(True)
cnn_1_sgd = cnn_1_model(False)

In [None]:
fit_model(cnn_1_adam, train_generator, val_generator, 60, 'cnn_1_adam_weights.h5')

In [None]:
fit_model(cnn_1_sgd, train_generator, val_generator, 60, 'cnn_1_sgd_weights.h5')

In [None]:
# CNN2 model architecture.
# Can be trained either using the default Adam optimizer or using Cosine restarts
# (initial learning rate will be 1e-3 and final learning rate will be 1e-5)
def cnn_2_model(num_epochs, use_default_optimizer=False):
    model = Sequential()
    model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(64, 64, 3)))
    model.add(BatchNormalization())
    model.add(MaxPooling2D((2, 2)))
    model.add(Dropout(0.25))

    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D((2, 2)))
    model.add(Dropout(0.25))

    model.add(Conv2D(128, (3, 3), activation='relu'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D((2, 2)))
    model.add(Dropout(0.25))

    model.add(Flatten())

    model.add(Dense(256, activation='relu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.5))

    model.add(Dense(128, activation='relu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.5))
    model.add(Dense(NUM_CLASSES, activation="softmax"))
    
    steps_per_epoch = get_steps_per_epoch(train_generator)
    initial_learning_rate = 0.01
    final_learning_rate = 0.00001
    alpha = final_learning_rate / initial_learning_rate
    
    total_steps = steps_per_epoch * num_epochs
    first_decay_steps = total_steps * 0.33
    
    lr_schedule = CosineDecayRestarts(initial_learning_rate=initial_learning_rate, alpha=alpha, first_decay_steps=first_decay_steps, t_mul=1.0)
    
    if use_default_optimizer:
        model.compile(loss='categorical_crossentropy', 
                      optimizer='adam',
                      metrics=['accuracy'])
    else:
        model.compile(loss='categorical_crossentropy',
                      optimizer=Adam(learning_rate=lr_schedule),
                      metrics=['accuracy'])
    
    return model

In [None]:
# Create CNN2 models and train them
cnn_2_default = cnn_2_model(True)
cnn_2_cosine_restarts = cnn_2_model(False)

In [None]:
fit_model(cnn_2_default, train_generator, val_generator, 60, 'cnn_2_weights.h5')

In [None]:
fit_model(cnn_2_cosine_restarts, train_generator, val_generator, 60, 'cnn_2_weights_cos.h5')

In [None]:
# CNN3 model architecture. 
# Model is compiled either using Adam optimizer or SGD Nesterov with 0.9 momentum.
# Both are using a Cosine Decay Scheduler with 10 warmup epochs.
# Also supports a custom activation function and label smoothing.
def cnn_3_model(warmup_epochs=10, use_adam=True, initial_learning_rate=0.01, final_learning_rate=0.00001, 
                activation='relu', label_smoothing=0):
    model = Sequential()
    model.add(Conv2D(256, (3, 3), activation=activation, input_shape=(64, 64, 3)))
    model.add(BatchNormalization())
    model.add(MaxPooling2D((2, 2)))
    model.add(Dropout(0.25))

    model.add(Conv2D(512, (3, 3), activation=activation))
    model.add(BatchNormalization())
    model.add(MaxPooling2D((2, 2)))
    model.add(Dropout(0.25))
    
    model.add(Conv2D(512, (3, 3), activation=activation))
    model.add(BatchNormalization())
    model.add(MaxPooling2D((2, 2)))
    model.add(Dropout(0.25))
    
    model.add(Conv2D(1024, (3, 3), activation=activation))
    model.add(BatchNormalization())
    model.add(MaxPooling2D((2, 2)))
    model.add(Dropout(0.25))

    model.add(Flatten())

    model.add(Dense(1024, activation=activation))
    model.add(BatchNormalization())
    model.add(Dropout(0.5))

    model.add(Dense(1024, activation=activation))
    model.add(BatchNormalization())
    model.add(Dropout(0.5))

    model.add(Dense(512, activation=activation))
    model.add(BatchNormalization())
    model.add(Dropout(0.5))
    
    model.add(Dense(NUM_CLASSES, activation="softmax"))
    
    steps_per_epoch = get_steps_per_epoch(train_generator)
    alpha = final_learning_rate / initial_learning_rate

    first_decay_steps = steps_per_epoch * 180
    warmup_steps = steps_per_epoch * warmup_epochs
    lr_schedule = CosineDecay(0.0, first_decay_steps, alpha, warmup_target=initial_learning_rate, warmup_steps=warmup_steps)

    if use_adam:
        optimizer = Adam(learning_rate=lr_schedule, weight_decay=0.001)
        print("Using Adam optimzier")
    else:
        optimizer = SGD(learning_rate=lr_schedule, weight_decay=0.001, momentum=0.9, nesterov=True)
        print("Using SGD optimizer")
    
    model.compile(loss=CategoricalCrossentropy(label_smoothing=label_smoothing), optimizer=optimizer, metrics=['accuracy'])
    
    return model

In [None]:
cnn_3_cosine_swish = cnn_3_model(warmup_epochs=10, use_adam=True, initial_learning_rate=0.01, final_learning_rate=0.00001, activation='swish')

In [None]:
fit_model(cnn_3_cosine_swish, train_generator, val_generator, 200, 'cnn_3_adam_swish_weights.h5')

In [None]:
cnn_3_cosine_sgd = cnn_3_model(warmup_epochs=10, use_adam=False, initial_learning_rate=0.01, final_learning_rate=0.00001, activation='relu')

In [None]:
fit_model(cnn_3_cosine_sgd, train_generator, val_generator, 200, 'cnn_3_cosine_sgd_weights.h5')

In [None]:
# CNN4 model architecture.
# Compiled using Adam optimizer with Cosine Decay learning rate scheduler and 10 warmup epochs.
def cnn_4_model():
    model = Sequential()
    model.add(Conv2D(128, (3, 3), activation='relu', padding='same', input_shape=(64, 64, 3)))
    model.add(Conv2D(128, (3, 3), activation='relu', padding='same'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D((2, 2)))
    model.add(Dropout(0.25))

    model.add(Conv2D(256, (3, 3), padding="same", activation='relu'))
    model.add(Conv2D(256, (3, 3), padding="same", activation='relu'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D((2, 2)))
    model.add(Dropout(0.25))
    
    model.add(Conv2D(1024, (3, 3), activation='relu'))
    model.add(Conv2D(1024, (3, 3), activation='relu'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D((2, 2)))
    model.add(Dropout(0.25))

    model.add(Conv2D(1024, (3, 3), activation='relu'))
    model.add(Conv2D(1024, (3, 3), activation='relu'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D((2, 2)))
    model.add(Dropout(0.25))

    model.add(Flatten())

    model.add(Dense(1024, activation='relu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.25))

    model.add(Dense(1024, activation='relu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.25))

    model.add(Dense(1024, activation='relu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.25))
    
    model.add(Dense(NUM_CLASSES, activation="softmax"))
    
    steps_per_epoch = get_steps_per_epoch(train_generator)
    initial_learning_rate = 0.005
    final_learning_rate = 0.00001
    alpha = final_learning_rate / initial_learning_rate

    first_decay_steps = steps_per_epoch * 80
    warmup_steps = steps_per_epoch * 10
    lr_schedule = CosineDecay(0.0, first_decay_steps, alpha, warmup_target=initial_learning_rate, warmup_steps=warmup_steps)
    
    model.compile(loss='categorical_crossentropy',
              optimizer=Adam(learning_rate=lr_schedule, weight_decay=0.001),
              metrics=['accuracy'])
    
    return model

## Residual Network

In [None]:
# Identity block for the ResNet. Input and output activations have the same dimensions.
def identity_block(input, filters, conv_kernel_initializer='glorot_uniform', activation='relu'):
    filter1, filter2, filter3 = filters
    X_residual = input

    # First layer
    X = Conv2D(filters=filter1, kernel_size=(1, 1), kernel_initializer=conv_kernel_initializer)(input)
    X = BatchNormalization()(X)
    X = Activation(activation)(X)

    # Second layer
    X = Conv2D(filters=filter2, kernel_size=(3, 3), padding='same', kernel_initializer=conv_kernel_initializer)(X)
    X = BatchNormalization()(X)
    X = Activation(activation)(X)

    # Third layer
    X = Conv2D(filters=filter3, kernel_size=(1, 1), kernel_initializer=conv_kernel_initializer)(X)
    X = BatchNormalization()(X)

    # Add skip-connection
    X = Add()([X, X_residual])
    X = BatchNormalization()(X)
    X = Activation(activation)(X)

    return X

# Residual block for the ResNet. Input and output activations can have a different number of dimensions.
def residual_block(input, filters, strides, conv_kernel_initializer='glorot_uniform', activation='relu'):
    filter1, filter2, filter3 = filters
    X_residual = input

    # First layer
    X = Conv2D(filters=filter1, kernel_size=(1, 1), strides=strides, kernel_initializer=conv_kernel_initializer)(input)
    X = BatchNormalization()(X)
    X = Activation(activation)(X)

    # Second layer
    X = Conv2D(filters=filter2, kernel_size=(3, 3), padding='same', kernel_initializer=conv_kernel_initializer)(X)
    X = BatchNormalization()(X)
    X = Activation(activation)(X)

    # Third layer
    X = Conv2D(filters=filter3, kernel_size=(1, 1), kernel_initializer=conv_kernel_initializer)(X)
    X = BatchNormalization()(X)

    # Compute skip-connection
    X_residual = Conv2D(filters=filter3, kernel_size=(1, 1), strides=strides, kernel_initializer=conv_kernel_initializer)(X_residual)
    X_residual = BatchNormalization()(X_residual)

    # Add skip-connection
    X = Add()([X, X_residual])
    X = BatchNormalization()(X)
    X = Activation(activation)(X)

    return X

# CNN ResNet model.
# Can change the convolutional kernel initializer, the activation function,
# whether to use Adam or SGD, the initial and final learning rates, add label smoothing
# or not, and the weight decay value.
# Model is compiled using Cosine Decay scheduler
def cnn_residual_model(image_size=64, warmup_epochs=0, 
                       conv_kernel_initializer='glorot_uniform', activation='relu', use_adam=True, 
                       initial_learning_rate=0.01, final_learning_rate=0.00001,
                       label_smoothing=0, weight_decay=0.001):
    X_input = Input((image_size, image_size, 3))

    X = ZeroPadding2D(padding=(3, 3))(X_input)

    X = Conv2D(filters=128, kernel_size=(7, 7), strides=(2, 2),
               kernel_initializer=conv_kernel_initializer)(X)
    X = BatchNormalization()(X)
    X = Activation(activation)(X)

    X = MaxPooling2D((3, 3), strides=(2, 2))(X)

    # First layer of the ResNet
    X = residual_block(X, filters=[128, 128, 256], strides=(1, 1), activation=activation, conv_kernel_initializer=conv_kernel_initializer)
    X = identity_block(X, filters=[128, 128, 256], activation=activation, conv_kernel_initializer=conv_kernel_initializer)
    X = identity_block(X, filters=[128, 128, 256], activation=activation, conv_kernel_initializer=conv_kernel_initializer)
    X = identity_block(X, filters=[128, 128, 256], activation=activation, conv_kernel_initializer=conv_kernel_initializer)

    # Second layer of the ResNet
    X = residual_block(X, filters=[128, 128, 512], strides=(2, 2), activation=activation, conv_kernel_initializer=conv_kernel_initializer)
    X = identity_block(X, filters=[128, 128, 512], activation=activation, conv_kernel_initializer=conv_kernel_initializer)
    X = identity_block(X, filters=[128, 128, 512], activation=activation, conv_kernel_initializer=conv_kernel_initializer)
    X = identity_block(X, filters=[128, 128, 512], activation=activation, conv_kernel_initializer=conv_kernel_initializer)

    # Third layer
    X = residual_block(X, filters=[256, 256, 1024], strides=(2, 2), activation=activation, conv_kernel_initializer=conv_kernel_initializer)
    X = identity_block(X, filters=[256, 256, 1024], activation=activation, conv_kernel_initializer=conv_kernel_initializer)
    X = identity_block(X, filters=[256, 256, 1024], activation=activation, conv_kernel_initializer=conv_kernel_initializer)
    X = identity_block(X, filters=[256, 256, 1024], activation=activation, conv_kernel_initializer=conv_kernel_initializer)
    X = identity_block(X, filters=[256, 256, 1024], activation=activation, conv_kernel_initializer=conv_kernel_initializer)
    X = identity_block(X, filters=[256, 256, 1024], activation=activation, conv_kernel_initializer=conv_kernel_initializer)

    # Fourth layer
    X = residual_block(X, filters=[512, 512, 2048], strides=(2, 2), activation=activation, conv_kernel_initializer=conv_kernel_initializer)
    X = identity_block(X, filters=[512, 512, 2048], activation=activation, conv_kernel_initializer=conv_kernel_initializer)
    X = identity_block(X, filters=[512, 512, 2048], activation=activation, conv_kernel_initializer=conv_kernel_initializer)

    X = AveragePooling2D(pool_size=(2, 2))(X)

    # Output layer
    X = Flatten()(X)
    X = Dense(512, activation=activation, kernel_initializer=glorot_uniform(seed=42))(X)
    X = BatchNormalization()(X)
    X = Dropout(0.5)(X)

    X = Dense(512, activation=activation, kernel_initializer=glorot_uniform(seed=42))(X)
    X = BatchNormalization()(X)
    X = Dropout(0.5)(X)

    X = Dense(NUM_CLASSES, activation='softmax', kernel_initializer=glorot_uniform(seed=42))(X)
    model = Model(inputs=X_input, outputs=X)
    
    steps_per_epoch = get_steps_per_epoch(train_generator)
    alpha = final_learning_rate / initial_learning_rate

    first_decay_steps = steps_per_epoch * 180
    warmup_steps = steps_per_epoch * warmup_epochs
    lr_schedule = CosineDecay(0.0, first_decay_steps, alpha, warmup_target=initial_learning_rate, warmup_steps=warmup_steps)

    if use_adam:
        optimizer = Adam(learning_rate=lr_schedule, weight_decay=weight_decay)
        print("Using Adam optimzier")
    else:
        optimizer = SGD(learning_rate=lr_schedule, weight_decay=weight_decay, momentum=0.9, nesterov=True)
        print("Using SGD optimizer")
    
    model.compile(loss=CategoricalCrossentropy(label_smoothing=label_smoothing), optimizer=optimizer, metrics=['accuracy'])

    return model

In [None]:
cnn_res_glorot = cnn_residual_model(64, 10)

In [None]:
fit_model(cnn_res_glorot, train_generator, val_generator, 200, 'cnn_res_glorot_weights.h5')

In [None]:
cnn_res_glorot_smoothing_02 = cnn_residual_model(64, 10, label_smoothing=0.2)
fit_model(cnn_res_glorot_smoothing_02, train_generator, val_generator, 200, 'cnn_res_glorot_smoothing_02_weights.h5')

In [None]:
cnn_res_glorot_smoothing = cnn_residual_model(64, 10, label_smoothing=0.1)

In [None]:
fit_model(cnn_res_glorot_smoothing, train_generator, val_generator, 200, 'cnn_res_glorot_smoothing_weights.h5')

In [None]:
cnn_res_he = cnn_residual_model(64, 10, 'he_normal')

In [None]:
fit_model(cnn_res_he, train_generator, val_generator, 200, 'cnn_res_he_weights.h5')

In [None]:
cnn_res_sgd = cnn_residual_model(64, 10, use_adam=False)

In [None]:
fit_model(cnn_res_sgd, train_generator, val_generator, 200, 'cnn_res_sgd_weights.h5')

In [None]:
cnn_res_swish = cnn_residual_model(64, 10, use_adam=True, activation='swish')

In [None]:
fit_model(cnn_res_swish, train_generator, val_generator, 200, 'cnn_res_swish_weights.h5')

# Evaluation and submission

In [None]:
# Method for generating the submission file 
def generate_predictions(model, test_generator, test_filenames, checkpoint_name='best_model.h5', checkpoint_folder=''):
    test_generator.reset()
    model.load_weights(os.path.join(checkpoint_folder, checkpoint_name))
    pred = model.predict(test_generator, verbose=1)
    predictions = np.argmax(pred, axis=1)
    
    results = pd.DataFrame({"Image": test_filenames,
                            "Class": predictions})

    results.to_csv(SUBMISSION_FILE, index=False)

In [None]:
val_generator.reset()

cnn_res = cnn_residual_model(64, conv_kernel_initializer='he_normal')
cnn_res.load_weights("Model_weights/cnn_res_weights.h5")
print(cnn_res.evaluate(val_generator))
generate_predictions(cnn_res, test_generator, test_filenames, 'cnn_res_weights.h5', 'Model_weights')

In [None]:
cnn_3_cosine = cnn_3_model(0)
cnn_3_cosine.load_weights("Model_weights/cnn_3_cosine_weights.h5")
val_generator.reset()
print(cnn_3_cosine.evaluate(val_generator))

In [None]:
fit_model(cnn_3_cosine, train_generator, val_generator, 150, 'cnn_3_cosine_2_weights.h5')

In [None]:
val_generator.reset()
cnn_3_cosine.load_weights("cnn_3_cosine_2_weights.h5")
print(cnn_3_cosine.evaluate(val_generator))
generate_predictions(cnn_3_cosine, test_generator, test_filenames, 'cnn_3_cosine_2_weights.h5')

# Ensemble learning

In [None]:
# Load several trained models and evaluate them on the validation set
# Following are only models which I considered adding to the ensemble.
val_generator.reset()
model_cnn3_cosine_2 = cnn_3_model()
model_cnn3_cosine_2.load_weights("Model_weights/cnn_3_cosine_2_weights.h5")
print(model_cnn3_cosine_2.evaluate(val_generator))

val_generator.reset()
model_cnn3_cosine_1 = cnn_3_model()
model_cnn3_cosine_1.load_weights("Model_weights/cnn_3_cosine_weights.h5")
print(model_cnn3_cosine_1.evaluate(val_generator))

val_generator.reset()
model_cnn3_simple = cnn_3_model()
model_cnn3_simple.load_weights("Model_weights/cnn_3_weights.h5")
print(model_cnn3_simple.evaluate(val_generator))

val_generator.reset()
model_cnn3_cosine_swish = cnn_3_model(warmup_epochs=10, use_adam=True, initial_learning_rate=0.01, final_learning_rate=0.00001, activation='swish')
model_cnn3_cosine_swish.load_weights("Model_weights/cnn_3_adam_swish_weights.h5")
print(model_cnn3_cosine_swish.evaluate(val_generator))

val_generator.reset()
model_cnn3_cosine_sgd = cnn_3_model(warmup_epochs=10, use_adam=False, initial_learning_rate=0.01, final_learning_rate=0.00001, activation='relu')
model_cnn3_cosine_sgd.load_weights("Model_weights/cnn_3_cosine_sgd_weights.h5")
print(model_cnn3_cosine_sgd.evaluate(val_generator))

val_generator.reset()
model_cnn4 = cnn_4_model()
model_cnn4.load_weights("Model_weights/cnn_4_weights.h5")
print(model_cnn4.evaluate(val_generator))

val_generator.reset()
model_res = cnn_residual_model(64)
model_res.load_weights("Model_weights/cnn_res_glorot_weights.h5")
print(model_res.evaluate(val_generator))

val_generator.reset()
model_res_he = cnn_residual_model(64)
model_res_he.load_weights("Model_weights/cnn_res_he_weights.h5")
print(model_res_he.evaluate(val_generator))

val_generator.reset()
model_res_sgd = cnn_residual_model(64, 10, use_adam=False)
model_res_sgd.load_weights("Model_weights/cnn_res_sgd_weights.h5")
print(model_res_sgd.evaluate(val_generator))

val_generator.reset()
model_res_swish = cnn_residual_model(64, 10, use_adam=True, activation='swish')
model_res_swish.load_weights("Model_weights/cnn_res_swish_weights.h5")
print(model_res_swish.evaluate(val_generator))

val_generator.reset()
model_res_smoothing = cnn_residual_model(64)
model_res_smoothing.load_weights("Model_weights/cnn_res_glorot_smoothing_weights.h5")
print(model_res_smoothing.evaluate(val_generator))

val_generator.reset()
model_res_smoothing_02 = cnn_residual_model(64)
model_res_smoothing_02.load_weights("Model_weights/cnn_res_glorot_smoothing_02_weights.h5")
print(model_res_smoothing_02.evaluate(val_generator))


In [None]:
# Other models which I have not added to ensembles due to low metrics
model_cnn_2 = cnn_2_model(50)
model_cnn_2.load_weights("Model_weights/cnn_2_weights.h5")

model_cnn_1_sgd = cnn_1_model()
model_cnn_1_sgd.load_weights("Model_weights/cnn_1_sgd_weights.h5")

model_cnn_1 = cnn_1_model()
model_cnn_1.load_weights("Model_weights/cnn_1_adam_weights.h5")

model_cnn_2_cos = cnn_2_model(50)
model_cnn_2_cos.load_weights("Model_weights/cnn_2_weights_cos.h5")

model_cnn_1 = cnn_1_model()
model_cnn_1.load_weights("Model_weights/cnn_1_adam_weights.h5")

In [None]:
# Generate submission file for the ensemble, based on a list of trained models
def generate_predictions_ensemble(models, test_generator, test_filenames):
	test_generator.reset()
	preds = []
	for model in models:
		test_generator.reset()
		pred = model.predict(test_generator, verbose=1)
		preds.append(pred)
	
	preds = np.array(preds)
	preds_sum = np.sum(preds, axis=0)
	predictions = np.argmax(preds_sum, axis=1)

	results = pd.DataFrame({"Image": test_filenames,
                            "Class": predictions})
	results.to_csv(SUBMISSION_FILE, index=False)

In [None]:
# First model ensemble tried
models_ensemble_1 = [model_cnn3_cosine_1, model_cnn3_cosine_2, model_cnn3_cosine_swish,
					 model_res, model_res_he, model_res_sgd, 
					 model_res_smoothing, model_res_smoothing_02, model_res_swish]

generate_predictions_ensemble(models_ensemble_1, test_generator, test_filenames)

In [None]:
# Second model ensemble tried
models_ensemble_2 = [model_res, model_res_he, model_res_sgd, 
					 model_res_smoothing, model_res_smoothing_02, 
					 model_res_swish]

generate_predictions_ensemble(models_ensemble_2, test_generator, test_filenames)

# Metrics and Confusion Matrices

In [None]:
# Generate classification report for a model
def get_metrics(model, generator, y_true):
    generator.reset()
    pred = model.predict(generator, verbose=1)
    predictions = np.argmax(pred, axis=1)
    true_labels = np.argmax(y_true, axis=1)
    print(classification_report(true_labels, predictions, digits=3))

# Generate classification report for a model ensemble
def get_metrics_ensemble(models, generator, y_true):
    generator.reset()
    preds = []
    for model in models:
       generator.reset()
       pred = model.predict(generator, verbose=1)
       preds.append(pred)

    preds = np.array(preds)
    preds_sum = np.sum(preds, axis=0)
    predictions = np.argmax(preds_sum, axis=1)
    true_labels = np.argmax(y_true, axis=1)
    print(classification_report(true_labels, predictions, digits=3))

# Create confusion matrix image for a model
def get_confusion_matrix(model, generator, y_true, cm_img_name):
    generator.reset()
    pred = model.predict(generator, verbose=1)
    predictions = np.argmax(pred, axis=1)
    true_labels = np.argmax(y_true, axis=1)

    plt.figure(figsize=(35, 35))
    cm = confusion_matrix(true_labels, predictions)
    ax = plt.subplot()
    sns.heatmap(cm, annot=True, fmt='g', ax=ax)

    # labels, title and ticks
    ax.set_xlabel('Predicted labels')
    ax.set_ylabel('True labels')
    ax.set_title('Confusion Matrix')
    plt.savefig(cm_img_name, bbox_inches='tight')

# Create confusion matrix for a model ensemble
def get_confusion_matrix_ensemble(models, generator, y_true, cm_img_name):
    generator.reset()
    preds = []
    for model in models:
       generator.reset()
       pred = model.predict(generator, verbose=1)
       preds.append(pred)

    preds = np.array(preds)
    preds_sum = np.sum(preds, axis=0)
    predictions = np.argmax(preds_sum, axis=1)
    true_labels = np.argmax(y_true, axis=1)

    plt.figure(figsize=(35, 35))
    cm = confusion_matrix(true_labels, predictions)
    ax = plt.subplot()
    sns.heatmap(cm, annot=True, fmt='g', ax=ax)

    # labels, title and ticks
    ax.set_xlabel('Predicted labels')
    ax.set_ylabel('True labels')
    ax.set_title('Confusion Matrix')
    plt.savefig(cm_img_name, bbox_inches='tight')

In [None]:
get_metrics(model_cnn3_cosine_sgd, val_generator, y_val)

In [None]:
get_metrics(model_res_smoothing, val_generator, y_val)
get_confusion_matrix(model_res_smoothing, val_generator, y_val, "resnet_smoothing_cm")

In [None]:
get_metrics(model_res_smoothing_02, val_generator, y_val)

In [None]:
get_metrics(model_cnn3_cosine_swish, val_generator, y_val)
get_confusion_matrix(model_cnn3_cosine_swish, val_generator, y_val, "cnn3_cosine_swish_cm")

In [None]:
get_metrics(model_res_swish, val_generator, y_val)

In [None]:
get_metrics(model_res_sgd, val_generator, y_val)

In [None]:
get_metrics(model_res_he, val_generator, y_val)

In [None]:
get_metrics(model_res, val_generator, y_val)
get_confusion_matrix(model_res, val_generator, y_val, "resnet_glorot_cm")

In [None]:
get_metrics(model_cnn4, val_generator, y_val)

In [None]:
get_metrics(model_cnn3_simple, val_generator, y_val)

In [None]:
get_metrics(model_cnn3_cosine_1, val_generator, y_val)

In [None]:
get_metrics(model_cnn3_cosine_2, val_generator, y_val)
get_confusion_matrix(model_cnn3_cosine_2, val_generator, y_val, "cnn3_cos_2_cm")

In [None]:
get_metrics(model_cnn_2, val_generator, y_val)

In [None]:
get_metrics(model_cnn_2_cos, val_generator, y_val)

In [None]:
get_metrics(model_cnn_1, val_generator, y_val)

In [None]:
get_metrics(model_cnn_1_sgd, val_generator, y_val)

In [None]:
get_metrics_ensemble(models_ensemble_1, val_generator, y_val)
get_confusion_matrix_ensemble(models_ensemble_1, val_generator, y_val, "ensemble_1_cm")