# DPhi - Flower Recognition Challenge

The dataset contains images of 5 types of flowers.

Classes:-
- daisy
- dandelion
- rose
- sunflower
- tulip

# Reading & Understanding Data
## Importing Libraries

In [None]:
# importing libraries
import os, time, random, sys
os.environ['PYTHONHASHSEED']=str(1)
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
sns.set_style('whitegrid')
plt.style.use('seaborn-deep')
plt.style.use('fivethirtyeight')
plt.rcParams['font.family'] = 'sans-serif'
plt.rcParams['font.serif'] = 'Ubuntu'
plt.rcParams['font.monospace'] = 'Ubuntu Mono'
plt.rcParams['font.size'] = 10
plt.rcParams['axes.labelsize'] = 12
plt.rcParams['axes.titlesize'] = 12
plt.rcParams['xtick.labelsize'] = 8
plt.rcParams['ytick.labelsize'] = 8
plt.rcParams['legend.fontsize'] = 12
plt.rcParams['figure.titlesize'] = 14
plt.rcParams['figure.figsize'] = (12, 8)

pd.options.mode.chained_assignment = None
pd.options.display.float_format = '{:.2f}'.format
pd.set_option('display.max_columns', 200)
pd.set_option('display.width', 400)
import warnings
warnings.filterwarnings('ignore')
import sklearn.metrics as skm
import sklearn.model_selection as skms
import sklearn.preprocessing as skp
import sklearn.utils as sku
from skimage.io import imread
from skimage.transform import resize
seed = 12

In [None]:
import tensorflow as tf
import tensorflow_addons as tfa
print("TF version:-", tf.__version__)
import keras as k
from keras import backend as K

In [None]:
def runSeed():
    global seed
    os.environ['PYTHONHASHSEED']=str(seed)
    tf.random.set_seed(seed)
    np.random.seed(seed)
    random.seed(seed)

runSeed()

## Checking the GPU configuration
!nvidia-smi

### Loading Dataset

In [None]:
# from google_drive_downloader import GoogleDriveDownloader as gdd

# gdd.download_file_from_google_drive(file_id='1H0rJmSBmYQoWM2w2tqy-jmX0Y2Wg6k2v', 
#                                     dest_path='content/flowers.zip', unzip=True)

In [None]:
basePath = '/kaggle/input/flowers-dataset/'
trainPath = basePath + 'train/'
os.listdir(trainPath)

In [None]:
submission_test_set = pd.read_csv(basePath + 'Testing_set_flower.csv')
submission_test_set.head()

In [None]:
def showImage(img):
    plt.figure(figsize=(3,3))
    plt.imshow(img)
    plt.show()

# Data Preparation


## Setup Image Generator

In [None]:
# constants
batch_size = 128
img_dim = 299
def getImgTensor(img_d):
    return (img_d, img_d, 3)
getImgTensor(img_dim)

In [None]:
# reading training and validation separately to prevent overlapping 

train_datagen = k.preprocessing.image.ImageDataGenerator(rescale=1./255, 
                                                         validation_split=0.2,
                                                         shear_range=0.2, 
                                                         zoom_range=0.2, 
                                                         horizontal_flip=True, 
                                                         rotation_range=45,
                                                         width_shift_range=0.1, 
                                                         height_shift_range=0.1,
                                                         fill_mode='nearest'
                                                        )

train_generator=train_datagen.flow_from_directory(directory=trainPath,
                                                  subset="training",
                                                  batch_size=batch_size,
                                                  color_mode="rgb",
                                                  seed=seed,
                                                  shuffle=True,
                                                  class_mode="categorical",
                                                  target_size=getImgTensor(img_dim)[:2])

In [None]:
# generate class weights as classes are imbalanced
class_weights = sku.class_weight.compute_class_weight('balanced',
                                                      np.unique(train_generator.classes), 
                                                      train_generator.classes)
train_class_weights = {i:x for i, x in enumerate(class_weights)}
train_class_weights

In [None]:
batch = train_generator.next()[0]
showImage(batch[0])
showImage(batch[1])

In [None]:
valid_generator=train_datagen.flow_from_directory(directory=trainPath,
                                                  subset="validation",
                                                  batch_size=batch_size,
                                                  color_mode="rgb",
                                                  seed=seed,
                                                  shuffle=True,
                                                  class_mode="categorical",
                                                  target_size=getImgTensor(img_dim)[:2])

In [None]:
test_datagen = k.preprocessing.image.ImageDataGenerator(rescale=1./255)

test_generator=test_datagen.flow_from_directory(basePath, 
                                                batch_size=1,
                                                color_mode="rgb",
                                                seed=seed,
                                                shuffle=False,
                                                classes=['test'],
                                                target_size=getImgTensor(img_dim)[:2])

# Model Building

In [None]:
def plotModelHistory(h):
    fig, ax = plt.subplots(1, 2, figsize=(15,4))
    ax[0].plot(h.history['loss'])   
    ax[0].plot(h.history['val_loss'])
    ax[0].legend(['loss','val_loss'])
    ax[0].title.set_text("Train loss vs Validation loss")

    ax[1].plot(h.history['categorical_accuracy'])   
    ax[1].plot(h.history['val_categorical_accuracy'])
    ax[1].legend(['categorical_accuracy','val_categorical_accuracy'])
    ax[1].title.set_text("Train accuracy vs Validation accuracy")

    print("Max. Training Accuracy", max(h.history['categorical_accuracy']))
    print("Max. Validaiton Accuracy", max(h.history['val_categorical_accuracy']))

In [None]:
class myCallback(k.callbacks.Callback):
    def on_epoch_end(self, epoch, logs={}):
        ACCURACY_THRESHOLD = 0.99
        if(logs.get('categorical_accuracy') > ACCURACY_THRESHOLD):
            print("\n\nStopping training as we have reached %2.2f%% accuracy!" %(ACCURACY_THRESHOLD*100))   
            self.model.stop_training = True

In [None]:
def trainModel(model, epochs, optimizer, vb=1, modelName='model'):
    bestModelPath = './'+modelName+'_model.hdf5'
    callback = myCallback()
    callbacks_list = [
        callback,
        k.callbacks.ReduceLROnPlateau(monitor = 'val_loss', patience = 5, verbose = 1, min_lr=0.00001), 
        k.callbacks.EarlyStopping(monitor = 'val_loss', patience = 15, verbose = 1, restore_best_weights = True), 
        k.callbacks.ModelCheckpoint(filepath=bestModelPath, monitor='val_loss', verbose=1, save_best_only=True)
    ]
    model.compile(optimizer=optimizer,
                  loss=k.losses.CategoricalCrossentropy(label_smoothing=.05),
                  metrics=[k.metrics.CategoricalAccuracy()]
    )
    train_generator.reset()
    
    steps_per_epoch = np.ceil(train_generator.n/train_generator.batch_size)
    validation_steps = np.ceil(valid_generator.n/valid_generator.batch_size)

    return model.fit_generator(generator=train_generator, steps_per_epoch=steps_per_epoch, 
                               validation_data=valid_generator, validation_steps=validation_steps, 
                               epochs=epochs, verbose=vb,
                              #  class_weight=train_class_weights,
                               callbacks=callbacks_list)

In [None]:
# evaluate model with time
def evaluateModel(model, path=True):
    batch_size = valid_generator.batch_size
    num_train_sequences = valid_generator.n
    valid_generator.reset()
    steps_per_epoch = 0
    if (valid_generator.n%valid_generator.batch_size) == 0:
        steps_per_epoch = int(valid_generator.n/valid_generator.batch_size)
    else:
        steps_per_epoch = int(valid_generator.n//valid_generator.batch_size) + 1

    t1 = time.time()
    if path:
        model = k.models.load_model(model)
    eval_results = model.evaluate_generator(valid_generator, steps=steps_per_epoch)
    t2 = time.time()
    print(f'\nLoss: {eval_results[0]}, Accuracy: {eval_results[1]}')
    print(f'Prediction Time per Image: {(t2-t1)/valid_generator.n}')

In [None]:
# predict images using model
def predictModel(modelPath):
    batch_size = test_generator.batch_size
    num_train_sequences = test_generator.n
    steps_per_epoch = 0
    if (test_generator.n%test_generator.batch_size) == 0:
        steps_per_epoch = int(test_generator.n/test_generator.batch_size)
    else:
        steps_per_epoch = int(test_generator.n//test_generator.batch_size) + 1

    test_generator.reset()

    t1 = time.time()
    model = k.models.load_model(modelPath)
    predictions = model.predict_generator(test_generator, steps=steps_per_epoch, verbose=1)
    t2 = time.time()
    print(f'Prediction Time per Image: {(t2-t1)/test_generator.n}')
    
    print("Generating Predictions file..")
    labels = (train_generator.class_indices)
    labels = dict((v,k) for k,v in labels.items())
    predicted_class_indices=np.argmax(predictions, axis=1)
    predictions_label = [labels[k] for k in predicted_class_indices]
    filenames = list(map(lambda x: x.split('/')[-1], test_generator.filenames))
    submission=pd.DataFrame({
        "Filename":filenames, 
        "Class":predictions_label
    })
    # generate series of predictions as per testing_set
    submission_final = pd.Series([submission[submission['Filename'] == x].iloc[0,1] for x in np.ravel(submission_test_set.values)])
    submission_file = "submission_"+modelPath.split('/')[-1].split('_')[0]+".csv"
    submission_final.to_csv(submission_file,index=False, header=['prediction'])
    print(f"Submission file with {len(submission.values)} rows generated:", submission_file)
    submission.head()

## Train MobileNetV2 - Light Model

In [None]:
img_dim=224
mobilenet = k.applications.mobilenet_v2.MobileNetV2(weights='imagenet', input_shape=getImgTensor(img_dim), include_top=False)
mobilenet.trainable = False

model = k.models.Sequential([
                             mobilenet,
                             tf.keras.layers.GlobalAveragePooling2D(),
                             k.layers.Dropout(0.4),
                             k.layers.Dense(256, activation='relu'),
                            #  k.layers.BatchNormalization(),
                             k.layers.Dropout(0.3),
                             k.layers.Dense(128, activation='relu'),
#                              k.layers.BatchNormalization(),
                             k.layers.Dropout(0.2),
                             k.layers.Dense(5, activation='softmax')
])
print(model.summary())

In [None]:
history_1 = trainModel(model, 50, 'adam', modelName='mobilenet')

In [None]:
plotModelHistory(history_1)

## Train ResNet152 - Heavy Model

In [None]:
img_dim=224
resnet152 = k.applications.ResNet152V2(weights='imagenet', input_shape=getImgTensor(img_dim), include_top=False)
resnet152.trainable = False

model_2 = k.models.Sequential([
                             resnet152,
                             tf.keras.layers.GlobalAveragePooling2D(),
                             k.layers.Dropout(0.4),
                            #  k.layers.Dense(1024, activation='relu'),
                            #  k.layers.BatchNormalization(),
                            #  k.layers.Dropout(0.3),

                             k.layers.Dense(512, activation='relu'),
                             k.layers.BatchNormalization(),
                             k.layers.Dropout(0.3),

                             k.layers.Dense(256, activation='relu'),
                             k.layers.BatchNormalization(),
                             k.layers.Dropout(0.3),
                             k.layers.Dense(5, activation='softmax')
])
print(model_2.summary())

In [None]:
history_2 = trainModel(model_2, 50, 'adam', modelName='resnet152')

In [None]:
plotModelHistory(history_2)

## Train InceptionV3 - Medium Model

In [None]:
img_dim=224
inceptionv3 = k.applications.InceptionV3(weights='imagenet', input_shape=getImgTensor(img_dim), include_top=False)
inceptionv3.trainable = False

model_3 = k.models.Sequential([
                             inceptionv3,
                             tf.keras.layers.GlobalAveragePooling2D(),
                             k.layers.Dropout(0.4),
                             k.layers.Dense(256, activation='relu'),
                             k.layers.BatchNormalization(),
                             k.layers.Dropout(0.3),
                            #  k.layers.Dense(128, activation='relu'),
                            #  k.layers.BatchNormalization(),
                            #  k.layers.Dropout(0.2),
                             k.layers.Dense(5, activation='softmax')
])
print(model_3.summary())

In [None]:
history_3 = trainModel(model_3, 50, 'adam', modelName='inceptionv3')

In [None]:
plotModelHistory(history_3)

## Train NASNetLarge - Heavy Model

In [None]:
img_dim=331
nasnet = k.applications.nasnet.NASNetLarge(weights='imagenet', input_shape=getImgTensor(img_dim), include_top=False)
nasnet.trainable = False

model_4 = k.models.Sequential([
                             nasnet,
                             tf.keras.layers.GlobalAveragePooling2D(),
                             k.layers.Dropout(0.4),
                             k.layers.Dense(256, activation='relu'),
                             k.layers.BatchNormalization(),
                             k.layers.Dropout(0.3),
                            #  k.layers.Dense(128, activation='relu'),
                            #  k.layers.BatchNormalization(),
                            #  k.layers.Dropout(0.2),
                             k.layers.Dense(5, activation='softmax')
])
print(model_4.summary())

In [None]:
history_4 = trainModel(model_4, 50, 'adam', modelName='nasnet_large')

In [None]:
plotModelHistory(history_4)

## Train InceptionResNetV2 - Heavy Model

In [None]:
img_dim=299
inceptionresnet = k.applications.InceptionResNetV2(weights='imagenet', input_shape=getImgTensor(img_dim), include_top=False)
inceptionresnet.trainable = False

model_5 = k.models.Sequential([
                             inceptionresnet,
                             tf.keras.layers.GlobalAveragePooling2D(),
                             k.layers.Dropout(0.4),
                             k.layers.Dense(256, activation='relu'),
                             k.layers.BatchNormalization(),
                             k.layers.Dropout(0.3),
                            #  k.layers.Dense(128, activation='relu'),
                            #  k.layers.BatchNormalization(),
                            #  k.layers.Dropout(0.2),
                             k.layers.Dense(5, activation='softmax')
])
print(model_5.summary())

In [None]:
history_5 = trainModel(model_5, 50, 'adam', modelName='inceptionresnet')

In [None]:
plotModelHistory(history_5)

## Train DenseNet169 - Light Model

In [None]:
img_dim=299
densenet152 = k.applications.DenseNet169(weights='imagenet', input_shape=getImgTensor(img_dim), include_top=False)
densenet152.trainable = False

model_6 = k.models.Sequential([
                             densenet152,
                             tf.keras.layers.GlobalAveragePooling2D(),
                             k.layers.Dropout(0.4),
                             k.layers.Dense(256, activation='relu'),
                             k.layers.BatchNormalization(),
                             k.layers.Dropout(0.3),
                            #  k.layers.Dense(128, activation='relu'),
                            #  k.layers.BatchNormalization(),
                            #  k.layers.Dropout(0.2),
                             k.layers.Dense(5, activation='softmax')
])
print(model_6.summary())

In [None]:
history_6 = trainModel(model_6, 50, 'adam', modelName='densenet169')

In [None]:
plotModelHistory(history_6)

## CNN + XGB Model

In [None]:
model_7 = k.Model(model_5.input, model_5.layers[-3].output)
train_generator.reset()
# scan model feature representations
X_train_embed = []
y_train_embed = []
for x in range(int(np.ceil(train_generator.n/train_generator.batch_size))):
    x_batch, y_batch = next(train_generator)
    x_last = model_7.predict(x_batch)
    X_train_embed.extend(x_last)
    y_train_embed.extend(y_batch)

# generate predictions for embeddings
y_train_embed = np.array(np.argmax(y_train_embed, axis=1))
X_train_embed = np.array(X_train_embed)
print(y_train_embed.shape)

In [None]:
# create xgb classifier for classification
from xgboost import XGBClassifier
xgb = XGBClassifier(max_depth=10, objective='multi:softmax', n_estimators=1000, num_classes=5,
                    tree_meth ='gpu_hist', gpu_id=0, n_jobs=-1)
xgb.fit(X_train_embed,y_train_embed)

In [None]:
# generate predictions for test data
img_dim=299
X_test_embed = np.array(model_7.predict(test_generator))
predictions_xgb = xgb.predict(X_test_embed)

# generate submission file for predictions
labels = (train_generator.class_indices)
labels = dict((v,k) for k,v in labels.items())
predictions_label = [labels[k] for k in predictions_xgb]
filenames = list(map(lambda x: x.split('/')[-1], test_generator.filenames))
submission=pd.DataFrame({
    "Filename":filenames, 
    "Class":predictions_label
})
# generate series of predictions as per testing_set
submission_final = pd.Series([submission[submission['Filename'] == x].iloc[0,1] for x in np.ravel(submission_test_set.values)])
submission_file = "submission_DENSENET169_XGB.csv"
submission_final.to_csv(submission_file,index=False)
print(f"Submission file with {len(submission.values)} rows generated:", submission_file)
submission.head()

# Model Evaluation

In [None]:
# mobile net
img_dim=224
evaluateModel('./mobilenet_model.hdf5')

In [None]:
# resnet152
img_dim=224
evaluateModel('./resnet152_model.hdf5')

In [None]:
# inceptionv3
img_dim=224
evaluateModel('./inceptionv3_model.hdf5')

In [None]:
# nasnet
img_dim=331
evaluateModel('./nasnet_large_model.hdf5')
# evaluateModel(model_4, False)

In [None]:
# inceptionresnet
img_dim=299
evaluateModel('./inceptionresnet_model.hdf5')

In [None]:
# densenet169
img_dim=299
evaluateModel('./densenet169_model.hdf5')

# Model Prediction

In [None]:
# mobile net
img_dim=224
predictModel('./mobilenet_model.hdf5')

In [None]:
# resnet152
img_dim=224
predictModel('./resnet152_model.hdf5')

In [None]:
# inceptionv3
img_dim=224
predictModel('./inceptionv3_model.hdf5')

In [None]:
# nasnet
img_dim=331
predictModel('./nasnet_large_model.hdf5')

In [None]:
# inceptionresnet
img_dim=299
predictModel('./inceptionresnet_model.hdf5')

In [None]:
# densenet169
img_dim=299
predictModel('./densenet169_model.hdf5')