# HackerEarth DL Challenge - Holiday Season

You work for a social media platform. Your task is to create a solution using deep learning to discern whether a post is holiday-related in an effort to better monetize the platform.

Classes:-
- Miscellaneous
- Christmas_Tree
- Jacket
- Candle
- Airplane
- Snowman

# Reading & Understanding Data
## Importing Libraries

In [None]:
# importing libraries
import os, time, random, sys
os.environ['PYTHONHASHSEED']=str(1)
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
sns.set_style('whitegrid')
plt.style.use('seaborn-deep')
plt.style.use('fivethirtyeight')
plt.rcParams['font.family'] = 'sans-serif'
plt.rcParams['font.serif'] = 'Ubuntu'
plt.rcParams['font.monospace'] = 'Ubuntu Mono'
plt.rcParams['font.size'] = 10
plt.rcParams['axes.labelsize'] = 12
plt.rcParams['axes.titlesize'] = 12
plt.rcParams['xtick.labelsize'] = 8
plt.rcParams['ytick.labelsize'] = 8
plt.rcParams['legend.fontsize'] = 12
plt.rcParams['figure.titlesize'] = 14
plt.rcParams['figure.figsize'] = (12, 8)

pd.options.mode.chained_assignment = None
pd.options.display.float_format = '{:.2f}'.format
pd.set_option('display.max_columns', 200)
pd.set_option('display.width', 400)
import warnings
warnings.filterwarnings('ignore')
import sklearn.metrics as skm
import sklearn.model_selection as skms
import sklearn.preprocessing as skp
import sklearn.utils as sku
from skimage.io import imread
from skimage.transform import resize
seed = 12

In [None]:
import tensorflow as tf
import tensorflow_addons as tfa
print("TF version:-", tf.__version__)
import keras as k
from keras import backend as K

In [None]:
def runSeed():
    global seed
    os.environ['PYTHONHASHSEED']=str(seed)
    tf.random.set_seed(seed)
    np.random.seed(seed)
    random.seed(seed)

runSeed()

## Checking the GPU configuration
!nvidia-smi

### Loading Dataset

In [None]:
basePath = '/kaggle/input/hackerearth-deep-learning-challenge-holidayseason/dataset/'
trainPath = basePath + 'train/'
df_base = pd.read_csv('/kaggle/input/hackerearth-deep-learning-challenge-holidayseason/dataset/train.csv')
df_base.head()

### About the dataset

In [None]:
print("Dataset has",df_base.shape[0],"samples")
print("Count of samples")
df_base['Class'].value_counts().reset_index()

In [None]:
# def scanImgFeatures(path):
#     features = []
#     files = sorted(os.listdir(path))
#     for x in files:
#         fp = os.path.join(path, x)
#         img = imread(fp)/255.0
#         features.append(img)
#     return np.array(features), files

def showImage(img):
    plt.figure(figsize=(4,4))
    plt.imshow(img)
    plt.show()

# def getPathLabels(p):
#     return [df_base[df_base['Image'] == x].iloc[0,1] for x in p]

In [None]:
# train_data_img, train_files_img = scanImgFeatures(trainPath)
# test_data_img, test_files_img = scanImgFeatures(testPath)
# train_labels = getPathLabels(train_files_img)
# showImage(test_data_img[0])

# Data Preparation


## Split Train & Validation Sets

In [None]:
# shuffle samples
df_shuffle = df_base.sample(frac=1, random_state=seed).reset_index(drop=True)

# remove irrelevant columns
df_shuffle.drop(['Image'], axis=1, inplace=True)
df_y = df_shuffle.pop('Class')

# split into train dev and test
y_train, y_valid = skms.train_test_split(df_y, train_size=0.9, random_state=seed, stratify=df_y)

In [None]:
print(f"Train set has {y_train.shape[0]} records out of {len(df_shuffle)} which is {round(y_train.shape[0]/len(df_shuffle)*100)}%")
print(f"Test set has {y_valid.shape[0]} records out of {len(df_shuffle)} which is {round(y_valid.shape[0]/len(df_shuffle)*100)}%")

In [None]:
# stratified split check
print(y_train.value_counts())
print(y_valid.value_counts())

In [None]:
# divide df_base to df_train and df_valid
df_train = df_base.iloc[y_train.index.tolist(), :].reset_index(drop=True)
print("Train data:",df_train['Class'].value_counts())

df_valid = df_base.iloc[y_valid.index.tolist(), :].reset_index(drop=True)
print("Validation data:",df_valid['Class'].value_counts())

## Setup Image Generator

In [None]:
# constants
batch_size = 128
img_dim = 224
def getImgTensor(img_d):
    return (img_d, img_d, 3)
getImgTensor(img_dim)

In [None]:
# reading training and validation separately to prevent overlapping 

train_datagen = k.preprocessing.image.ImageDataGenerator(rescale=1./255, 
#                                                          shear_range=0.2, 
                                                         zoom_range=0.2, 
                                                         horizontal_flip=True, 
#                                                          width_shift_range=0.1, 
#                                                          height_shift_range=0.1
                                                        )

train_generator=train_datagen.flow_from_dataframe(dataframe=df_train,
                                                  directory=trainPath,
                                                  x_col="Image",
                                                  y_col="Class",
                                                  subset="training",
                                                  batch_size=batch_size,
                                                  color_mode="rgb",
                                                  seed=seed,
                                                  shuffle=True,
                                                  class_mode="categorical",
                                                  target_size=getImgTensor(img_dim)[:2])

In [None]:
# generate class weights as classes are imbalanced
class_weights = sku.class_weight.compute_class_weight('balanced',
                                                      np.unique(train_generator.classes), 
                                                      train_generator.classes)
train_class_weights = {i:x for i, x in enumerate(class_weights)}
train_class_weights

In [None]:
batch = train_generator.next()[0]
showImage(batch[0])
showImage(batch[1])

In [None]:
valid_datagen = k.preprocessing.image.ImageDataGenerator(rescale=1./255)

valid_generator=valid_datagen.flow_from_dataframe(dataframe=df_valid,
                                                  directory=trainPath,
                                                  x_col="Image",
                                                  y_col="Class",
                                                  subset="training",
                                                  batch_size=batch_size,
                                                  color_mode="rgb",
                                                  seed=seed,
                                                  shuffle=True,
                                                  class_mode="categorical",
                                                  target_size=getImgTensor(img_dim)[:2])

In [None]:
test_datagen = k.preprocessing.image.ImageDataGenerator(rescale=1./255)

test_generator=test_datagen.flow_from_directory(basePath, 
                                                batch_size=1,
                                                color_mode="rgb",
                                                seed=seed,
                                                shuffle=False,
                                                classes=['test'],
                                                target_size=getImgTensor(img_dim)[:2])

# Model Building

In [None]:
def plotModelHistory(h):
    fig, ax = plt.subplots(1, 2, figsize=(15,4))
    ax[0].plot(h.history['loss'])   
    ax[0].plot(h.history['val_loss'])
    ax[0].legend(['loss','val_loss'])
    ax[0].title.set_text("Train loss vs Validation loss")

    ax[1].plot(h.history['categorical_accuracy'])   
    ax[1].plot(h.history['val_categorical_accuracy'])
    ax[1].legend(['categorical_accuracy','val_categorical_accuracy'])
    ax[1].title.set_text("Train accuracy vs Validation accuracy")

    print("Max. Training Accuracy", max(h.history['categorical_accuracy']))
    print("Max. Validation Accuracy", max(h.history['val_categorical_accuracy']))

In [None]:
class myCallback(k.callbacks.Callback):
    def on_epoch_end(self, epoch, logs={}):
        ACCURACY_THRESHOLD = 0.98
        if(logs.get('val_categorical_accuracy') > ACCURACY_THRESHOLD):
            print("\n\nStopping training as we have reached %2.2f%% accuracy!" %(ACCURACY_THRESHOLD*100))   
            self.model.stop_training = True

In [None]:
def trainModel(model, epochs, optimizer, vb=1, modelName='model'):
    bestModelPath = './'+modelName+'_model.hdf5'
    callback = myCallback()
    callbacks_list = [
        callback,
        k.callbacks.ReduceLROnPlateau(monitor = 'val_loss', patience = 5, verbose = 1, min_lr=0.00001), 
        k.callbacks.EarlyStopping(monitor = 'val_loss', patience = 15, verbose = 1, restore_best_weights = True), 
        k.callbacks.ModelCheckpoint(filepath=bestModelPath, monitor='val_loss', verbose=1, save_best_only=True)
    ]
    model.compile(optimizer=optimizer,
                  loss='categorical_crossentropy',
                  metrics=[k.metrics.CategoricalAccuracy(), k.metrics.Precision(), k.metrics.Recall()]
    )
    train_generator.reset()
    if (train_generator.n%train_generator.batch_size) == 0:
        steps_per_epoch = int(train_generator.n/train_generator.batch_size)
    else:
        steps_per_epoch = (train_generator.n//train_generator.batch_size) + 1

    if (valid_generator.n%valid_generator.batch_size) == 0:
        validation_steps = int(valid_generator.n/valid_generator.batch_size)
    else:
        validation_steps = (valid_generator.n//valid_generator.batch_size) + 1

    return model.fit_generator(generator=train_generator, steps_per_epoch=steps_per_epoch, 
                               validation_data=valid_generator, validation_steps=validation_steps, 
                               epochs=epochs, verbose=vb, 
#                                class_weight=train_class_weights,
                               callbacks=callbacks_list)

In [None]:
# evaluate model with time
def evaluateModel(model, path=True):
    batch_size = valid_generator.batch_size
    num_train_sequences = valid_generator.n
    valid_generator.reset()
    steps_per_epoch = 0
    if (valid_generator.n%valid_generator.batch_size) == 0:
        steps_per_epoch = int(valid_generator.n/valid_generator.batch_size)
    else:
        steps_per_epoch = int(valid_generator.n//valid_generator.batch_size) + 1

    t1 = time.time()
    if path:
        model = k.models.load_model(model)
    eval_results = model.evaluate_generator(valid_generator, steps=steps_per_epoch)
    t2 = time.time()
    print(f'\nLoss: {eval_results[0]}, Accuracy: {eval_results[1]}, Precision: {eval_results[2]}, Recall: {eval_results[3]}')
    print(f'Prediction Time per Image: {(t2-t1)/valid_generator.n}')

In [None]:
# predict images using model
def predictModel(modelPath):
    batch_size = test_generator.batch_size
    num_train_sequences = test_generator.n
    steps_per_epoch = 0
    if (test_generator.n%test_generator.batch_size) == 0:
        steps_per_epoch = int(test_generator.n/test_generator.batch_size)
    else:
        steps_per_epoch = int(test_generator.n//test_generator.batch_size) + 1

    test_generator.reset()

    t1 = time.time()
    model = k.models.load_model(modelPath)
    predictions = model.predict_generator(test_generator, steps=steps_per_epoch, verbose=1)
    t2 = time.time()
    print(f'Prediction Time per Image: {(t2-t1)/test_generator.n}')
    
    print("Generating Predictions file..")    
    labels = (train_generator.class_indices)
    labels = dict((v,k) for k,v in labels.items())
    predicted_class_indices=np.argmax(predictions, axis=1)
    predictions_label = [labels[k] for k in predicted_class_indices]
    filenames = list(map(lambda x: x.split('/')[-1], test_generator.filenames))
    submission=pd.DataFrame({
        "Image":filenames, 
        "Class":predictions_label
    })
    submission_file = "submission_"+modelPath.split('/')[-1].split('_')[0]+".csv"
    submission.to_csv(submission_file,index=False)
    print(f"Submission file with {len(submission.values)} rows generated:", submission_file)
    submission.head()

## Train MobileNetV2 - Light Model

In [None]:
img_dim=224
mobilenet = k.applications.MobileNetV2(weights='imagenet', input_shape=getImgTensor(img_dim), include_top=False)
mobilenet.trainable = False

model = k.models.Sequential([
                             mobilenet,
                             tf.keras.layers.GlobalAveragePooling2D(),
                             k.layers.Dropout(0.3),
                             k.layers.Dense(256, activation='relu'),
#                              k.layers.BatchNormalization(),
                             k.layers.Dropout(0.25),
#                              k.layers.Dense(128, activation='relu'),
#                              k.layers.BatchNormalization(),
#                              k.layers.Dropout(0.25),
                             k.layers.Dense(6, activation='softmax')
])
print(model.summary())

In [None]:
history_1 = trainModel(model, 50, 'adam', modelName='mobilenet')

In [None]:
plotModelHistory(history_1)

## Train ResNet152 - Heavy Model

In [None]:
img_dim=224
resnet152 = k.applications.ResNet152V2(weights='imagenet', input_shape=getImgTensor(img_dim), include_top=False)
resnet152.trainable = False

model_2 = k.models.Sequential([
                             resnet152,
                             tf.keras.layers.GlobalAveragePooling2D(),
                             k.layers.Dropout(0.3),
#                              k.layers.Dense(1024, activation='relu'),
#                              k.layers.BatchNormalization(),
#                              k.layers.Dropout(0.3),

#                              k.layers.Dense(512, activation='relu'),
#                              k.layers.BatchNormalization(),
#                              k.layers.Dropout(0.3),

                             k.layers.Dense(256, activation='relu'),
                             k.layers.BatchNormalization(),
                             k.layers.Dropout(0.25),
                             k.layers.Dense(6, activation='softmax')
])
print(model_2.summary())

In [None]:
history_2 = trainModel(model_2, 20, 'adam', modelName='resnet152')

In [None]:
plotModelHistory(history_2)

## Train InceptionV3 - Medium Model

In [None]:
img_dim=224
inceptionv3 = k.applications.InceptionV3(weights='imagenet', input_shape=getImgTensor(img_dim), include_top=False)
inceptionv3.trainable = False

model_3 = k.models.Sequential([
                             inceptionv3,
                             tf.keras.layers.GlobalAveragePooling2D(),
                             k.layers.Dropout(0.3),
                             k.layers.Dense(256, activation='relu'),
                             k.layers.BatchNormalization(),
                             k.layers.Dropout(0.25),
#                              k.layers.Dense(128, activation='relu'),
#                              k.layers.BatchNormalization(),
#                              k.layers.Dropout(0.2),
                             k.layers.Dense(6, activation='softmax')
])
print(model_3.summary())

In [None]:
history_3 = trainModel(model_3, 50, 'adam', modelName='inceptionv3')

In [None]:
plotModelHistory(history_3)

## Train NASNetLarge - Heavy Model

In [None]:
img_dim=331
nasnet = k.applications.nasnet.NASNetLarge(weights='imagenet', input_shape=getImgTensor(img_dim), include_top=False)
nasnet.trainable = False

model_4 = k.models.Sequential([
                             nasnet,
                             tf.keras.layers.GlobalAveragePooling2D(),
                             k.layers.Dropout(0.3),
                             k.layers.Dense(256, activation='relu'),
                             k.layers.BatchNormalization(),
                             k.layers.Dropout(0.25),
#                              k.layers.Dense(128, activation='relu'),
#                              k.layers.BatchNormalization(),
#                              k.layers.Dropout(0.2),
                             k.layers.Dense(6, activation='softmax')
])
print(model_4.summary())

In [None]:
history_4 = trainModel(model_4, 50, k.optimizers.Adam(1e-4), modelName='nasnet_large')

In [None]:
plotModelHistory(history_4)

## Train InceptionResNetV2 - Heavy Model

In [None]:
img_dim=224
inceptionresnet = k.applications.InceptionResNetV2(weights='imagenet', input_shape=getImgTensor(img_dim), include_top=False)
inceptionresnet.trainable = False

model_5 = k.models.Sequential([
                             inceptionresnet,
                             tf.keras.layers.GlobalAveragePooling2D(),
                             k.layers.Dropout(0.3),
                             k.layers.Dense(256, activation='relu'),
                             k.layers.BatchNormalization(),
                             k.layers.Dropout(0.25),
#                              k.layers.Dense(128, activation='relu'),
#                              k.layers.BatchNormalization(),
#                              k.layers.Dropout(0.2),
                             k.layers.Dense(6, activation='softmax')
])
print(model_5.summary())

In [None]:
history_5 = trainModel(model_5, 50, k.optimizers.Adam(1e-4), modelName='inceptionresnet')

In [None]:
plotModelHistory(history_5)

## Train DenseNet169 - Light Model

In [None]:
img_dim=224
densenet152 = k.applications.DenseNet169(weights='imagenet', input_shape=getImgTensor(img_dim), include_top=False)
densenet152.trainable = False

model_6 = k.models.Sequential([
                             densenet152,
                             tf.keras.layers.GlobalAveragePooling2D(),
                             k.layers.Dropout(0.3),
                             k.layers.Dense(256, activation='relu'),
                             k.layers.BatchNormalization(),
                             k.layers.Dropout(0.25),
#                              k.layers.Dense(128, activation='relu'),
#                              k.layers.BatchNormalization(),
#                              k.layers.Dropout(0.2),
                             k.layers.Dense(6, activation='softmax')
])
print(model_6.summary())

In [None]:
history_6 = trainModel(model_6, 50, 'adam', modelName='densenet169')

In [None]:
plotModelHistory(history_6)

## Custom Conv2D Model

In [None]:
img_dim=224
model_7 = k.models.Sequential([
                             k.layers.Conv2D(128, 3, activation='relu', input_shape=getImgTensor(img_dim)),
                             k.layers.MaxPooling2D(2),

                             k.layers.Conv2D(128, 3, activation='relu'),
                             k.layers.MaxPooling2D(2),

                             k.layers.Conv2D(128, 3, activation='relu'),
                             k.layers.MaxPooling2D(2),
                             k.layers.BatchNormalization(),
                             k.layers.Dropout(0.3),
    
                             k.layers.Conv2D(256, 3, activation='relu'),
                             k.layers.MaxPooling2D(2),
                             k.layers.BatchNormalization(),
                             k.layers.Dropout(0.3),

                             k.layers.Conv2D(256, 3, activation='relu'),
                             k.layers.MaxPooling2D(2),
                             k.layers.BatchNormalization(),
                             k.layers.Dropout(0.4),

                             k.layers.Conv2D(256, 3, activation='relu'),
                             k.layers.MaxPooling2D(2),
                             k.layers.BatchNormalization(),
                             k.layers.Dropout(0.3),
    
                             k.layers.Flatten(),

                             k.layers.Dense(256, activation='relu'),
                             k.layers.BatchNormalization(),
                             k.layers.Dropout(0.2),

                             k.layers.Dense(128, activation='relu'),
                             k.layers.BatchNormalization(),
                             k.layers.Dropout(0.2),

                             k.layers.Dense(64, activation='relu'),
                             k.layers.BatchNormalization(),
                             k.layers.Dropout(0.2),

                             k.layers.Dense(6, activation='softmax')
])
print(model_7.summary())

In [None]:
history_7 = trainModel(model_7, 50, 'adam', modelName='custom')

In [None]:
plotModelHistory(history_7)

# Model Evaluation

In [None]:
# mobile net
img_dim=224
evaluateModel('./mobilenet_model.hdf5')

In [None]:
# resnet152
img_dim=224
evaluateModel('./resnet152_model.hdf5')

In [None]:
# inceptionv3
img_dim=224
evaluateModel('./inceptionv3_model.hdf5')

In [None]:
# nasnet
img_dim=331
evaluateModel('./nasnet_large_model.hdf5')
# evaluateModel(model_4, False)

In [None]:
# inceptionresnet
img_dim=224
evaluateModel('./inceptionresnet_model.hdf5')

In [None]:
# densenet169
img_dim=224
evaluateModel('./densenet169_model.hdf5')

In [None]:
# custom
img_dim=224
evaluateModel('./custom_model.hdf5')

# Model Prediction

In [None]:
# mobile net
img_dim=224
predictModel('./mobilenet_model.hdf5')

In [None]:
# resnet152
img_dim=224
predictModel('./resnet152_model.hdf5')

In [None]:
# inceptionv3
img_dim=224
predictModel('./inceptionv3_model.hdf5')

In [None]:
# nasnet
img_dim=331
predictModel('./nasnet_large_model.hdf5')

In [None]:
# inceptionresnet
img_dim=224
predictModel('./inceptionresnet_model.hdf5')

In [None]:
# densenet169
img_dim=224
predictModel('./densenet169_model.hdf5')

In [None]:
# custom
img_dim=224
predictModel('./custom_model.hdf5')