In [1]:
# for garbage collection
import gc

# for warnings
import warnings
warnings.filterwarnings("ignore")

# utility libraries
import os
import copy
import tqdm
import numpy as np 
import pandas as pd 
import cv2, random, time, shutil, csv
import tensorflow as tf 
import math

# keras libraries
from tensorflow import keras
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Model
from tensorflow.keras.layers import BatchNormalization, Dense, GlobalAveragePooling2D, Lambda, Dropout, InputLayer, Input
from tensorflow.keras.utils import to_categorical
from tensorflow.keras import backend as K

In [2]:
# checking if GPU is being used for training
os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'
if tf.test.gpu_device_name(): 
    print('Default GPU Device:{}'.format(tf.test.gpu_device_name()))
else:
    print("GPU is not detected")

Default GPU Device:/device:GPU:0


In [3]:
# set image size here
img_size = 363
data_dir = r'C:\Users\Dreamcore\Documents\machine-learning-4041-main\Datasets'
data_df = pd.read_csv(os.path.join(data_dir, 'labels.csv'))
class_names = sorted(data_df['breed'].unique())
print(f"No. of classes read - {len(class_names)}")
time.sleep(1)

images_list = sorted(os.listdir(os.path.join(data_dir, 'train')))
X = []
Y = []
i = 0
for image in tqdm.tqdm(images_list[:10222]):
    cls_name = data_df[data_df['id'] == image[:-4]].iloc[0,1]
    cls_index = int(class_names.index(cls_name)) 

    # Reading RGB Images
    image_path = os.path.join(data_dir, 'train',image)
    orig_image = cv2.cvtColor(cv2.imread(image_path), cv2.COLOR_BGR2RGB)
    res_image = cv2.resize(orig_image,(img_size, img_size))
    X.append(res_image)
    Y.append(cls_index)
    i+=1

No. of classes read - 120


100%|███████████████████████████████████████████████████████████████████████████| 10222/10222 [00:27<00:00, 371.88it/s]


In [4]:
# Converting to arrays
print(len(X), len(Y))
Xarr = np.array(X)
Yarr = np.array(Y).reshape(-1,1)

del(X)
print(Xarr.shape, Yarr.shape)
gc.collect()

10222 10222
(10222, 363, 363, 3) (10222, 1)


69

In [5]:
# converting labels to one hot
Yarr_hot = to_categorical(Y)
print(Xarr.shape, Yarr_hot.shape)

(10222, 363, 363, 3) (10222, 120)


In [6]:
# FEATURE EXTRACTION OF TRAINING ARRAYS
AUTO = tf.data.experimental.AUTOTUNE
def get_features(model_name, data_preprocessor, data):
    '''
    1- Create a feature extractor to extract features from the data.
    2- Returns the extracted features and the feature extractor.

    '''
    dataset = tf.data.Dataset.from_tensor_slices(data)


    def preprocess(x):
        x = tf.image.random_flip_left_right(x)
        x = tf.image.random_brightness(x, 0.5)
        return x

    ds = dataset.map(preprocess, num_parallel_calls=AUTO).batch(64)

    input_size = data.shape[1:]
    #Prepare pipeline.
    input_layer = Input(input_size)
    preprocessor = Lambda(data_preprocessor)(input_layer)

    base_model = model_name(weights='imagenet', include_top=False,
                                input_shape=input_size)(preprocessor)

    avg = GlobalAveragePooling2D()(base_model)
    feature_extractor = Model(inputs = input_layer, outputs = avg)


    #Extract feature.
    feature_maps = feature_extractor.predict(ds, verbose=1)
    print('Feature maps shape: ', feature_maps.shape)
    
    # deleting variables
    del(feature_extractor, base_model, preprocessor, dataset)
    gc.collect()
    return feature_maps

In [7]:
# FEATURE EXTRACTION OF VALIDAION AND TESTING ARRAYS
def get_valfeatures(model_name, data_preprocessor, data):
    '''
    Same as above except not image augmentations applied.
    Used for feature extraction of validation and testing.
    '''

    dataset = tf.data.Dataset.from_tensor_slices(data)

    ds = dataset.batch(64)

    input_size = data.shape[1:]
    #Prepare pipeline.
    input_layer = Input(input_size)
    preprocessor = Lambda(data_preprocessor)(input_layer)

    base_model = model_name(weights='imagenet', include_top=False,
                                input_shape=input_size)(preprocessor)

    avg = GlobalAveragePooling2D()(base_model)
    feature_extractor = Model(inputs = input_layer, outputs = avg)
    #Extract feature.
    feature_maps = feature_extractor.predict(ds, verbose=1)
    print('Feature maps shape: ', feature_maps.shape)
    return feature_maps

In [8]:
# RETURNING CONCATENATED FEATURES USING MODELS AND PREPROCESSORS
def get_concat_features(feat_func, models, preprocs, array):

    print(f"Beggining extraction with {feat_func.__name__}\n")
    feats_list = []

    for i in range(len(models)):
        
        print(f"\nStarting feature extraction with {models[i].__name__} using {preprocs[i].__name__}\n")
        # applying the above function and storing in list
        feats_list.append(feat_func(models[i], preprocs[i], array))

    # features concatenating
    final_feats = np.concatenate(feats_list, axis=-1)
    # memory saving
    del(feats_list, array)
    gc.collect()

    return final_feats

In [9]:
# DEFINING models and preprocessors imports 

from tensorflow.keras.applications.inception_v3 import InceptionV3, preprocess_input
inception_preprocessor = preprocess_input

from tensorflow.keras.applications.xception import Xception, preprocess_input
xception_preprocessor = preprocess_input

from tensorflow.keras.applications.nasnet import NASNetLarge, preprocess_input
nasnet_preprocessor = preprocess_input

from tensorflow.keras.applications.inception_resnet_v2 import InceptionResNetV2, preprocess_input
inc_resnet_preprocessor = preprocess_input

models = [InceptionV3,  InceptionResNetV2, Xception, ]
preprocs = [inception_preprocessor,  inc_resnet_preprocessor, 
            xception_preprocessor, ]

In [10]:
# calculating features of the data

final_train_features = get_concat_features(get_features, models, preprocs, Xarr)

#del(x_train, )
gc.collect()
print('Final feature maps shape', final_train_features.shape)

Beggining extraction with get_features


Starting feature extraction with InceptionV3 using preprocess_input

Feature maps shape:  (10222, 2048)

Starting feature extraction with InceptionResNetV2 using preprocess_input

Feature maps shape:  (10222, 1536)

Starting feature extraction with Xception using preprocess_input

Feature maps shape:  (10222, 2048)
Final feature maps shape (10222, 5632)


In [11]:
from tensorflow.keras.callbacks import EarlyStopping
EarlyStop_callback = keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=10, restore_best_weights=True,
                                                   verbose=0)

my_callback=[EarlyStop_callback]

In [12]:
from sklearn.model_selection import StratifiedKFold

splits = list(StratifiedKFold(n_splits=3, shuffle=True, random_state=10).split(final_train_features, Y))

trained_models = []
val_accuracy = []
val_losses = []
train_accuracy = []
train_losses = []

#Prepare And Train DNN model

for i, (train_idx, valid_idx) in enumerate(splits): 

    print(f"\nStarting fold {i+1}\n")
    x_train_fold = final_train_features[train_idx, :]
    y_train_fold = Yarr_hot[train_idx, :]
    x_val_fold = final_train_features[valid_idx]
    y_val_fold = Yarr_hot[valid_idx, :]

    dnn = keras.models.Sequential([
        InputLayer(final_train_features.shape[1:]),
        Dropout(0.7),
        Dense(120, activation='softmax')
    ])

    dnn.compile(optimizer='adam',
                loss='categorical_crossentropy',
                metrics=['accuracy'])

    print("Training...")
    #Train simple DNN on extracted features.
    h = dnn.fit(x_train_fold, y_train_fold,
                batch_size=128,
                epochs=80,
                verbose=0,
                validation_data = (x_val_fold, y_val_fold),
                callbacks=my_callback)  # max 95.07

    print("Evaluating model ...")
    model_res_val = dnn.evaluate(x_val_fold, y_val_fold)
    model_res_train = dnn.evaluate(x_train_fold, y_train_fold)
    train_accuracy.append(model_res_train[1])
    train_losses.append(model_res_train[0])
    val_accuracy.append(model_res_val[1])
    val_losses.append(model_res_val[0])
    trained_models.append(dnn)

print('\n CV Score -')
print(f"\nTrainAccuracy - {sum(train_accuracy)/len(train_accuracy)}")
print(f"\nTrainLoss - {sum(train_losses)/len(train_losses)}")
print(f"\nValAccuracy - {sum(val_accuracy)/len(val_accuracy)}")
print(f"\nValLoss - {sum(val_losses)/len(val_losses)}")



Starting fold 1

Training...
Evaluating model ...

Starting fold 2

Training...
Evaluating model ...

Starting fold 3

Training...
Evaluating model ...

 CV Score -

TrainAccuracy - 0.9752984841664633

TrainLoss - 0.09978195776542027

ValAccuracy - 0.9313243627548218

ValLoss - 0.22573373715082803
