<a href="https://colab.research.google.com/github/r-meleshko/kaggle/blob/main/dog_breed_identification_Comparing_models.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import os
import time
from pathlib import Path

import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.applications import NASNetLarge
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split

# Data preprocessing

In [2]:
from google.colab import files

uploaded = files.upload()

!mkdir ~/.kaggle
!mv kaggle.json ~/.kaggle
!chmod 600 /root/.kaggle/kaggle.json

Saving kaggle.json to kaggle.json


In [None]:
!kaggle competitions download -c dog-breed-identification
!unzip dog-breed-identification.zip
!mkdir models

In [4]:
# Load train and test image list
labels = pd.read_csv("labels.csv")
labels.id = labels.id.apply(lambda x: f"{x}.jpg")

train_df, validation_df = train_test_split(labels, stratify=labels['breed'], test_size=0.2, random_state=420)

# Create test generator
test_filenames = os.listdir('test')
test_df = pd.DataFrame({'id': test_filenames})

labels.head(2)

Unnamed: 0,id,breed
0,000bec180eb18c7604dcecc8fe0dba07.jpg,boston_bull
1,001513dfcb2ffafc82cccf4d8bbaba97.jpg,dingo


In [5]:
def create_data_generators(img_size):
    """Create test, validation and train generators with image size appropriate for each model."""
    batch_size = 32

    # Create a data generator with data augmentaiton
    train_datagen = ImageDataGenerator(horizontal_flip=True)

    # Prepare the generators for train and validation datasets
    train_generator = train_datagen.flow_from_dataframe(
        dataframe=train_df,
        directory='train',
        x_col="id",
        y_col="breed",
        target_size=img_size,
        class_mode="categorical",
        batch_size=batch_size
    )

    validation_datagen = ImageDataGenerator()

    validation_generator = validation_datagen.flow_from_dataframe(
        dataframe=validation_df,
        directory='train',
        x_col="id",
        y_col="breed",
        target_size=img_size,
        class_mode="categorical",
        batch_size=batch_size
    )

    test_datagen = ImageDataGenerator()

    test_generator = test_datagen.flow_from_dataframe(
        dataframe=test_df,  # the dataframe for the test set will be created later
        directory='test',
        x_col="id",
        class_mode=None,  # no labels
        target_size=img_size,
        batch_size=batch_size,
        shuffle=False  # keep data in the same order as filenames
    )

    return train_generator, validation_generator, test_generator

# Model evaluation

## Import models

In [6]:
from tensorflow.keras.applications.inception_v3 import InceptionV3, preprocess_input
InceptionV3_preprocessor = preprocess_input

from tensorflow.keras.applications.xception import Xception, preprocess_input
Xception_preprocessor = preprocess_input

from tensorflow.keras.applications.nasnet import NASNetLarge, preprocess_input
NASNetLarge_preprocessor = preprocess_input

from tensorflow.keras.applications.inception_resnet_v2 import InceptionResNetV2, preprocess_input
InceptionResNetV2_preprocessor = preprocess_input

from tensorflow.keras.applications.vgg19 import VGG19, preprocess_input
VGG19_preprocessor = preprocess_input


input_shapes = {
    "InceptionV3": (299, 299),
    "Xception": (299, 299),
    "NASNetLarge": (331, 331),
    "InceptionResNetV2": (299, 299),
    "VGG19": (224, 224),
}

models = [InceptionV3, Xception, NASNetLarge, InceptionResNetV2, VGG19]

## Model evaluation function

In [7]:
class TimeHistory(keras.callbacks.Callback):
    def on_train_begin(self, logs={}):
        self.times = []

    def on_epoch_begin(self, batch, logs={}):
        self.epoch_time_start = time.time()

    def on_epoch_end(self, batch, logs={}):
        self.times.append(time.time() - self.epoch_time_start)


def kaggle_submission(model, name, test_generator, test_df):
    # Submit results to Kaggle
    pred = model.predict(test_generator)
    submission = pd.read_csv("sample_submission.csv")
    submission = pd.DataFrame(pred, columns=submission.columns[1:])
    submission.insert(0, "id", test_df.id)
    submission.id = submission.id.str.rstrip('.jpg')
    submission.to_csv("submission.csv", index=False)
    !kaggle competitions submit -c dog-breed-identification -f submission.csv -m f"Evaluating models: {name} (2)."

In [12]:
def compare_models(models: list) -> tuple:
    performance = dict()

    for pretrained_model in models:
        name = pretrained_model.__name__
        print(f'\nEvaluating {name} model.')

        # Model specific parameters
        img_size = input_shapes[name]
        train_generator, validation_generator, test_generator = create_data_generators(img_size)
        preprocessor = eval(f"{name}_preprocessor")

        model_weights = pretrained_model(weights="imagenet", include_top=False, input_shape=img_size + (3,))
        model_weights.trainable = False

        # Define model
        model = keras.Sequential([
            layers.Lambda(preprocessor),
            model_weights,
            layers.GlobalAveragePooling2D(),
            layers.Dense(512, activation='relu'),
            layers.Dropout(0.5),
            layers.Dense(120, activation='softmax')
        ])
        model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=['accuracy'])

        # Define callbacks
        time_callback = TimeHistory()
        earlystop = keras.callbacks.EarlyStopping(monitor="val_accuracy", patience=1, min_delta=0.01, restore_best_weights=True)
        callbacks_list = [earlystop, time_callback]

        history = model.fit(
            train_generator,
            validation_data=train_generator,
            epochs=5,
            callbacks=callbacks_list
        )

        performance[name] = model.evaluate(validation_generator) + [np.mean(time_callback.times), model, history]
        print(performance[name][:3])

        if performance[name][1] > 0.8:
            kaggle_submission(model, name, test_generator, test_df)


    return performance

In [13]:
performance = compare_models(models)


Evaluating InceptionV3 model.
Found 8177 validated image filenames belonging to 120 classes.
Found 2045 validated image filenames belonging to 120 classes.
Found 10357 validated image filenames.
Epoch 1/5
Epoch 2/5
Epoch 3/5
[0.39798805117607117, 0.8718826174736023, 114.2386618455251]
100% 16.4M/16.4M [00:04<00:00, 3.46MB/s]
Successfully submitted to Dog Breed Identification
Evaluating Xception model.
Found 8177 validated image filenames belonging to 120 classes.
Found 2045 validated image filenames belonging to 120 classes.
Found 10357 validated image filenames.
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/xception/xception_weights_tf_dim_ordering_tf_kernels_notop.h5
Epoch 1/5
Epoch 2/5
Epoch 3/5
[0.39883479475975037, 0.8767726421356201, 137.94217586517334]
100% 16.4M/16.4M [00:04<00:00, 3.77MB/s]
Successfully submitted to Dog Breed Identification
Evaluating NASNetLarge model.
Found 8177 validated image filenames belonging to 120 classes.
Found 2

In [18]:
for i in performance:
    loss, acc, etime = [round(x, 2) for x in performance[i][:3]]
    print(f'Model: {i} | Val. accuracy: {acc} | Time per epoch {etime}\n')

Model: InceptionV3 | Val. accuracy: 0.87 | Time per epoch 114.24

Model: Xception | Val. accuracy: 0.88 | Time per epoch 137.94

Model: NASNetLarge | Val. accuracy: 0.92 | Time per epoch 449.85

Model: InceptionResNetV2 | Val. accuracy: 0.9 | Time per epoch 170.49

Model: VGG19 | Val. accuracy: 0.63 | Time per epoch 94.13



In [17]:
!kaggle competitions submissions -c dog-breed-identification

fileName        date                 description                                                 status    publicScore  privateScore  
--------------  -------------------  ----------------------------------------------------------  --------  -----------  ------------  
submission.csv  2023-07-26 09:15:42  fEvaluating models: InceptionResNetV2 (2).                  complete  0.30361      0.30361       
submission.csv  2023-07-26 09:03:47  fEvaluating models: NASNetLarge (2).                        complete  0.26150      0.26150       
submission.csv  2023-07-26 08:41:12  fEvaluating models: Xception (2).                           complete  0.37678      0.37678       
submission.csv  2023-07-26 08:32:19  fEvaluating models: InceptionV3 (2).                        complete  0.38870      0.38870       
submission.csv  2023-07-25 22:24:49  fEvaluating models: InceptionResNetV2.                      complete  5.21092      5.21092       
submission.csv  2023-07-25 22:16:16  fEvaluating models