# Model Selection Via Transfer Learning

In [1]:
import os
import warnings
from datetime import datetime

import numpy as np
import pandas as pd
from PIL import Image
import tensorflow as tf
from keras import applications
from keras.models import Sequential
from keras.layers import Dropout, Flatten, Dense, BatchNormalization, GlobalAveragePooling2D
from keras.utils import to_categorical
import keras.backend as K

warnings.filterwarnings('ignore')

2024-03-19 18:03:12.835191: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-03-19 18:03:12.835294: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-03-19 18:03:12.979178: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [2]:
train_data_dir = '/kaggle/input/landmark-data/Data/Training_Data'
validation_data_dir = '/kaggle/input/landmark-data/Data/Validation_Data'
test_data_dir= '/kaggle/input/landmark-data/Data/Test_Data'

train_labels_file = '/kaggle/input/landmark-data/Labels/training_labels.npy'
validation_labels_file = '/kaggle/input/landmark-data/Labels/validation_labels.npy'
test_labels_file = '/kaggle/input/landmark-data/Labels/test_labels.npy'

img_width, img_height = 224, 224
num_classes = 100
epochs = 16
batch_size = 512

In [3]:
train_labels = np.load(open(train_labels_file, 'rb'))
validation_labels = np.load(open(validation_labels_file, 'rb'))
test_labels = np.load(open(test_labels_file, 'rb'))

## Converting images to feature vectors using weights from ImageNet

In [4]:
def images_to_feature_vectors(model, directory, batch_size, steps):
    
    datagen = tf.keras.preprocessing.image.ImageDataGenerator()
    generator = datagen.flow_from_directory(
        directory,
        target_size = (img_width, img_height),
        batch_size = batch_size,
        class_mode = None,
        shuffle = False) # Keep the data in the same order
    
    features = model.predict(generator, steps, verbose=1) 
    
    return features

## Train the top part of the model

In [5]:
def top_model(training_features, validation_features, batch_size, epochs):
    model = Sequential()
    model.add(Flatten(input_shape=training_features.shape[1:], name='Main_input'))
    model.add(BatchNormalization())
    model.add(Dense(1096, activation='relu', name='D1'))
    model.add(Dense(512, activation='relu', name='D2'))
    model.add(Dense(num_classes, activation='softmax', name='Main_output'))
    model.compile(optimizer='adam',
                  loss='categorical_crossentropy', metrics=['accuracy'])
    train_labels_onehot = to_categorical(train_labels, num_classes)            #One Hot Encoder
    validation_labels_onehot = to_categorical(validation_labels, num_classes)  #One Hot Encoder
    
    model.fit(training_features, train_labels_onehot,
              epochs = epochs,
              batch_size = batch_size,
              validation_data = (validation_features, validation_labels_onehot))
   
    return model

## VGG-16 Training and Evaluation

In [6]:
start_time = datetime.now()

# Batch size has to be a multiple of the number of images  to keep our vectors consistents
training_batch_size = 1 # batch size for feature pre-training
validation_batch_size = 1 # batch size for feature pre-training

model = applications.VGG16(include_top=False, weights='imagenet', input_shape=(img_width,img_height,3)) #VGG16 trained on imagenet
training_features = images_to_feature_vectors(model, train_data_dir, training_batch_size, len(train_labels) // training_batch_size)
validation_features = images_to_feature_vectors(model, validation_data_dir, validation_batch_size, len(validation_labels) // validation_batch_size)

end_time = datetime.now()
features_extraction_time = end_time - start_time
print('Features extraction time : {}'.format(features_extraction_time))

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m58889256/58889256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Found 1273 images belonging to 99 classes.
[1m  21/1273[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m10s[0m 8ms/step

I0000 00:00:1710871508.952220     161 device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m1273/1273[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 7ms/step
Found 267 images belonging to 68 classes.
[1m267/267[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step
Features extraction time : 0:00:19.226479


In [7]:
start_time = datetime.now()

model = top_model(training_features, validation_features, batch_size, epochs)

end_time = datetime.now()
training_time = end_time - start_time
print('Total training duration : {}'.format(training_time))

Epoch 1/16
[1m1/3[0m [32m━━━━━━[0m[37m━━━━━━━━━━━━━━[0m [1m7s[0m 4s/step - accuracy: 0.0117 - loss: 5.0733

W0000 00:00:1710871569.452410     161 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update


[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 832ms/step - accuracy: 0.0733 - loss: 4.7209

W0000 00:00:1710871571.123150     163 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update


[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 1s/step - accuracy: 0.0840 - loss: 4.6450 - val_accuracy: 0.2360 - val_loss: 11.1941
Epoch 2/16
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step - accuracy: 0.9594 - loss: 0.2685 

W0000 00:00:1710871572.088760     160 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update


[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 64ms/step - accuracy: 0.9592 - loss: 0.2673 - val_accuracy: 0.1760 - val_loss: 10.6592
Epoch 3/16
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 65ms/step - accuracy: 0.9964 - loss: 0.0322 - val_accuracy: 0.1985 - val_loss: 10.3399
Epoch 4/16
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 63ms/step - accuracy: 0.9983 - loss: 0.0137 - val_accuracy: 0.2022 - val_loss: 10.6067
Epoch 5/16
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 63ms/step - accuracy: 0.9981 - loss: 0.0107 - val_accuracy: 0.2060 - val_loss: 10.5878
Epoch 6/16
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 62ms/step - accuracy: 0.9986 - loss: 0.0235 - val_accuracy: 0.2060 - val_loss: 9.8737
Epoch 7/16
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 62ms/step - accuracy: 0.9996 - loss: 0.0020 - val_accuracy: 0.2322 - val_loss: 9.0167
Epoch 8/16
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m

In [9]:
train_labels_onehot = to_categorical(train_labels, num_classes)
loss, training_accuracy = model.evaluate(training_features,train_labels_onehot)

[1m33/40[0m [32m━━━━━━━━━━━━━━━━[0m[37m━━━━[0m [1m0s[0m 3ms/step - accuracy: 1.0000 - loss: 6.5419e-04 

W0000 00:00:1710871599.791620     161 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update


[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step - accuracy: 1.0000 - loss: 6.0122e-04


W0000 00:00:1710871600.450938     161 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update


In [10]:
validation_labels_onehot = to_categorical(validation_labels, num_classes)
loss, validation_accuracy = model.evaluate(validation_features,validation_labels_onehot)

[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 78ms/step - accuracy: 0.2115 - loss: 7.7872


W0000 00:00:1710871614.683415     161 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update


In [11]:
'Training Accuracy : '+ str(round(training_accuracy*100,1)) + '%'

'Training Accuracy : 100.0%'

In [12]:
'Validation Accuracy : '+ str(round(validation_accuracy*100,1)) + '%'

'Validation Accuracy : 26.2%'

In [13]:
vgg16_results = (round(training_accuracy*100,1), round(validation_accuracy*100,1), round(features_extraction_time.total_seconds()), round(training_time.total_seconds()))

## ResNet Training and Evaluation

In [14]:
start_time = datetime.now()

model = applications.ResNet50(include_top=False, weights='imagenet', input_shape=(img_width,img_height,3)) #VGG16 trained on imagenet
training_features = images_to_feature_vectors(model, train_data_dir, training_batch_size, len(train_labels) // training_batch_size)
validation_features = images_to_feature_vectors(model, validation_data_dir, validation_batch_size, len(validation_labels) // validation_batch_size)

end_time = datetime.now()
features_extraction_time = end_time - start_time
print('Features extraction time : {}'.format(features_extraction_time))

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m94765736/94765736[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Found 1273 images belonging to 99 classes.
[1m1273/1273[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 7ms/step
Found 267 images belonging to 68 classes.
[1m267/267[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step
Features extraction time : 0:00:21.674574


In [15]:
start_time = datetime.now()

model = top_model(training_features, validation_features, batch_size, epochs)

end_time = datetime.now()
training_time = end_time - start_time
print('Total training duration : {}'.format(training_time))

Epoch 1/16
[1m1/3[0m [32m━━━━━━[0m[37m━━━━━━━━━━━━━━[0m [1m10s[0m 5s/step - accuracy: 0.0078 - loss: 5.2597

W0000 00:00:1710871712.617461     162 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update


[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - accuracy: 0.0842 - loss: 6.0060   

W0000 00:00:1710871715.154445     162 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update


[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 2s/step - accuracy: 0.0957 - loss: 6.1300 - val_accuracy: 0.2172 - val_loss: 11.4553
Epoch 2/16


W0000 00:00:1710871716.798744     162 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update


[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 202ms/step - accuracy: 0.8876 - loss: 0.7133 - val_accuracy: 0.2210 - val_loss: 12.8041
Epoch 3/16
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 203ms/step - accuracy: 0.9709 - loss: 0.1756 - val_accuracy: 0.2247 - val_loss: 12.4103
Epoch 4/16
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 202ms/step - accuracy: 0.9935 - loss: 0.0349 - val_accuracy: 0.2060 - val_loss: 12.4675
Epoch 5/16
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 207ms/step - accuracy: 0.9924 - loss: 0.0212 - val_accuracy: 0.2135 - val_loss: 12.4020
Epoch 6/16
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 203ms/step - accuracy: 0.9987 - loss: 0.0103 - val_accuracy: 0.2210 - val_loss: 12.2121
Epoch 7/16
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 201ms/step - accuracy: 0.9996 - loss: 0.0031 - val_accuracy: 0.2322 - val_loss: 12.0709
Epoch 8/16
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━

In [16]:
loss, training_accuracy = model.evaluate(training_features,train_labels_onehot)

[1m20/40[0m [32m━━━━━━━━━━[0m[37m━━━━━━━━━━[0m [1m0s[0m 9ms/step - accuracy: 0.9821 - loss: 0.0523

W0000 00:00:1710871743.136536     161 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update


[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 25ms/step - accuracy: 0.9820 - loss: 0.0576


W0000 00:00:1710871744.106109     163 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update


In [17]:
loss, validation_accuracy = model.evaluate(validation_features,validation_labels_onehot)

[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 97ms/step - accuracy: 0.2256 - loss: 12.0810


W0000 00:00:1710871755.541741     162 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update


In [18]:
'Training Accuracy : '+ str(round(training_accuracy*100,1)) + '%'

'Training Accuracy : 98.0%'

In [19]:
'Validation Accuracy : '+ str(round(validation_accuracy*100,1)) + '%'

'Validation Accuracy : 24.3%'

In [20]:
resnet_results = (round(training_accuracy*100,1), round(validation_accuracy*100,1), round(features_extraction_time.total_seconds()), round(training_time.total_seconds()))

In [21]:
results = {'Model': ['VGG16', 'Resnet'], 
           'Training Accuracy(%)': [vgg16_results[0], resnet_results[0]],
           'Validation Accuracy(%)': [vgg16_results[1], resnet_results[1]],
           'Features Extraction time(s)': [vgg16_results[2], resnet_results[2]],
           'Training time(s)': [vgg16_results[3], resnet_results[3]]}
df = pd.DataFrame(data=results)

In [22]:
df

Unnamed: 0,Model,Training Accuracy(%),Validation Accuracy(%),Features Extraction time(s),Training time(s)
0,VGG16,100.0,26.2,19,10
1,Resnet,98.0,24.3,22,22
