*** Please Upvote if this kernal help you***

In [None]:
# Lets check the GPU provided
!nvidia-smi 

In [None]:
# Import all the required packages
import os
import numpy as np 
import pandas as pd 
import seaborn as sns
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras import applications
from tensorflow.keras import layers
from tensorflow.keras import models
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.metrics import f1_score, precision_score, recall_score, confusion_matrix
from sklearn import model_selection
import warnings
import json
import cv2
import plotly.express as px
from collections import Counter
warnings.filterwarnings('ignore')
%matplotlib inline

In [None]:
IMAGE_SIZE=150
BATCH_SIZE = 32
EPOCHS = 15
CLASSES = 6
FOLDS=5

In [None]:
# Lets check the tensorflow version
tf.__version__

In [None]:
# GPU Initialize
device_name = tf.test.gpu_device_name()
if device_name!='/device:GPU:0':
    raise SystemError('GPU Device not found')
print('Found GPU at:{}'.format(device_name))

In [None]:
# Lets initialize the parent dir
PARENT_DIR = '../input/hackerearth-deep-learning-challenge-holidayseason/dataset'

In [None]:
# List folders are files
print(os.listdir(PARENT_DIR))

In [None]:
# Import train and sample csv
train_df = pd.read_csv(os.path.join(PARENT_DIR,'train.csv'))

In [None]:
#check the mapping of labels
df_c = train_df.Class.value_counts().reset_index()
df_c.columns = ['class','count']
fig = px.bar(df_c, x='class', y='count')
fig.show()

In [None]:
# Lets take a look into the images
train_df_Airplane = train_df[train_df['Class']=='Airplane'].head(10).Image
train_df_Candle = train_df[train_df['Class']=='Candle'].head(10).Image
train_df_Christmas_Tree = train_df[train_df['Class']=='Christmas_Tree'].head(10).Image
train_df_Jacket = train_df[train_df['Class']=='Jacket'].head(10).Image
train_df_Miscellaneous = train_df[train_df['Class']=='Miscellaneous'].head(10).Image
train_df_Snowman = train_df[train_df['Class']=='Snowman'].head(10).Image

In [None]:
c = ['Airplane','Candle','Christmas_Tree','Jacket','Miscellaneous','Snowman']

In [None]:
# helper function for printing different class of images
def show_image(img_dir):
    i_dir = img_dir
    train_dir= PARENT_DIR +'/'+'train'
    i = 1
    plt.figure(figsize=(20,10))
    for img in i_dir:
        img = cv2.imread(os.path.join(train_dir,img),cv2.COLOR_BGR2RGB)
        img = cv2.resize(img, (IMAGE_SIZE, IMAGE_SIZE),interpolation = cv2.INTER_NEAREST)
        plt.subplot(2,5,i)
        plt.imshow(img)
        i+=1


def display_confusion_matrix(cmat, score, precision, recall):
    plt.figure(figsize=(15,15))
    ax = plt.gca()
    ax.matshow(cmat, cmap='Reds')
    ax.set_xticks(range(len(c)))
    ax.set_xticklabels(c, fontdict={'fontsize': 7})
    plt.setp(ax.get_xticklabels(), rotation=45, ha="left", rotation_mode="anchor")
    ax.set_yticks(range(len(c)))
    ax.set_yticklabels(c, fontdict={'fontsize': 7})
    plt.setp(ax.get_yticklabels(), rotation=45, ha="right", rotation_mode="anchor")
    titlestring = ""
    if score is not None:
        titlestring += 'f1 = {:.3f} '.format(score)
    if precision is not None:
        titlestring += '\nprecision = {:.3f} '.format(precision)
    if recall is not None:
        titlestring += '\nrecall = {:.3f} '.format(recall)
    if len(titlestring) > 0:
        ax.text(101, 1, titlestring, fontdict={'fontsize': 18, 'horizontalalignment':'right', 'verticalalignment':'top', 'color':'#804040'})
    plt.show()
    
def display_training_curves(training, validation, title, subplot):
    if subplot%10==1: # set up the subplots on the first call
        plt.subplots(figsize=(10,10), facecolor='#F0F0F0')
        plt.tight_layout()
    ax = plt.subplot(subplot)
    ax.set_facecolor('#F8F8F8')
    ax.plot(training)
    ax.plot(validation)
    ax.set_title('model '+ title)
    ax.set_ylabel(title)
    #ax.set_ylim(0.28,1.05)
    ax.set_xlabel('epoch')
    ax.legend(['train', 'valid.'])

In [None]:
# Sample Airplane Images
show_image(train_df_Airplane)

In [None]:
# Sample Candle Images
show_image(train_df_Candle)

In [None]:
# Sample Christmas Tree
show_image(train_df_Christmas_Tree)

In [None]:
# Sample Jacket Images
show_image(train_df_Jacket)

In [None]:
# Miscllaneous Images
show_image(train_df_Miscellaneous)

In [None]:
# Sample Snowman Images
show_image(train_df_Snowman)

Lets do a cross validation

In [None]:
# Define Cross Validation
skf = model_selection.StratifiedKFold(n_splits = FOLDS, random_state = 42, shuffle = True,) 
Y = train_df[['Class']]

In [None]:
# Helper Function to save model
def get_model_name(k):
    return 'model_'+str(k)+'.h5'

In [None]:
# Initialize Image Generator
datagen = ImageDataGenerator(
                    rotation_range = 40,
                    width_shift_range = 0.2,
                    height_shift_range = 0.2,
                    shear_range = 0.2,
                    zoom_range = 0.2,
                    horizontal_flip = True,
                    vertical_flip = True,
                    fill_mode = 'nearest',
                    validation_split=0.25
                    )

In [None]:
# Define model
def create_new_model():
    model = tf.keras.Sequential([
        tf.keras.applications.Xception(
            input_shape=(IMAGE_SIZE, IMAGE_SIZE, 3),
            weights='imagenet',
            include_top=False
        #    drop_connect_rate=0.7
        ),
        tf.keras.layers.GlobalAveragePooling2D(),
        tf.keras.layers.Dense(CLASSES, activation='softmax')
    ])
    return model

In [None]:
VALIDATION_ACCURACY = []
VALIDAITON_LOSS = []
n = len(train_df)
save_dir = './'
fold_var = 1

for train_index, val_index in skf.split(np.zeros(n),Y):
    training_data = train_df.iloc[train_index]
    validation_data = train_df.iloc[val_index]
    
    # Creating training validation and test generator
    train_generator=datagen.flow_from_dataframe(
                        dataframe=train_df,
                        directory="../input/hackerearth-deep-learning-challenge-holidayseason/dataset/train/",
                        x_col="Image",
                        y_col="Class",
                        subset="training",
                        batch_size=32,
                        seed=42,
                        shuffle=True,
                        class_mode = 'categorical',
                        color_mode='rgb',
                        target_size=(IMAGE_SIZE,IMAGE_SIZE)
                        )


    valid_data_generator=datagen.flow_from_dataframe(
                        dataframe=train_df,
                        directory="../input/hackerearth-deep-learning-challenge-holidayseason/dataset/train/",
                        x_col="Image",
                        y_col="Class",
                        subset="validation",
                        batch_size=32,
                        seed=42,
                        shuffle=True,
                        class_mode="categorical",
                        color_mode='rgb',
                        target_size=(IMAGE_SIZE,IMAGE_SIZE)
                        )
    # Define CallBacks
    checkpoint_cb = tf.keras.callbacks.ModelCheckpoint(save_dir+get_model_name(fold_var),save_best_only=True,monitor = 'val_loss',mode='min')
    reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor = 'val_loss',factor = 0.3,patience = 3, min_lr = 1e-5, mode = 'min',verbose = 1)
    early_stopping_cb = tf.keras.callbacks.EarlyStopping(monitor='val_loss',mode='min',patience=5,restore_best_weights=True,verbose=1)
    callbacks_list = [checkpoint_cb,reduce_lr,early_stopping_cb]
    
    # Define Class Weight
    counter = Counter(train_generator.classes)       
    max_val = float(max(counter.values()))       
    class_weights = {class_id : max_val/num_images for class_id, num_images in counter.items()}     
    
    
    # Perform training 
    with tf.device('/gpu:0'):
        model = create_new_model()
        model.compile(
            optimizer=tf.keras.optimizers.Adam(learning_rate = 1e-3),
            loss='categorical_crossentropy',
            metrics=['categorical_accuracy'])
        history = model.fit(
                train_generator,
                steps_per_epoch = train_generator.n/BATCH_SIZE,
                epochs=EPOCHS,
                batch_size = BATCH_SIZE,
                validation_data=valid_data_generator,
                validation_steps = valid_data_generator.n/BATCH_SIZE,
                callbacks=[checkpoint_cb,reduce_lr,early_stopping_cb],
                class_weight=class_weights)
        
    # Plotting accuracy history
    plt.figure(figsize= (15,10))
    plt.plot(history.history['categorical_accuracy'])
    plt.plot(history.history['val_categorical_accuracy'])
    plt.title('Accuracy Tracker', fontsize=15)
    plt.xlabel('Epochs', fontsize=15)
    plt.ylabel('Accuracy', fontsize=15)
    plt.legend(['training', 'validation'])
    
    # Plotting loss history
    plt.figure(figsize= (15,10))
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('Loss Tracker', fontsize=15)
    plt.xlabel('Epochs', fontsize=15)
    plt.ylabel('Loss', fontsize=15)
    plt.legend(['training', 'validation'])
    
    # LOAD BEST MODEL to evaluate the performance of the model
    model.load_weights("./model_"+str(fold_var)+".h5")

    results = model.evaluate(valid_data_generator)
    results = dict(zip(model.metrics_names,results))

    VALIDATION_ACCURACY.append(results['categorical_accuracy'])
    VALIDAITON_LOSS.append(results['loss'])

    tf.keras.backend.clear_session()

    fold_var += 1
    

In [None]:
# display_training_curves(
#     history.history['loss'],
#     history.history['val_loss'],
#     'loss',
#     211,
# )
# display_training_curves(
#     history.history['categorical_accuracy'],
#     history.history['val_categorical_accuracy'],
#     'accuracy',
#     212,
# )

In [None]:
# train_generator.class_indices

In [None]:
# cm_correct_labels = np.array(val_generator.labels)
# cm_predictions = np.argmax(model.predict(val_generator),-1)
# labels = range(len(c))
# cmat = confusion_matrix(
#     cm_correct_labels,
#     cm_predictions,
#     labels=labels,
# )
# cmat = (cmat.T / cmat.sum(axis=1)).T # normalize

In [None]:
# score = f1_score(
#     cm_correct_labels,
#     cm_predictions,
#     labels=labels,
#     average='macro',
# )
# precision = precision_score(
#     cm_correct_labels,
#     cm_predictions,
#     labels=labels,
#     average='macro',
# )
# recall = recall_score(
#     cm_correct_labels,
#     cm_predictions,
#     labels=labels,
#     average='macro',
# )
# display_confusion_matrix(cmat, score, precision, recall)

In [None]:
# print(score, precision, recall)

In [None]:
submission = pd.DataFrame(columns=['Image','Class'])
for image_name in os.listdir(PARENT_DIR + '/test'):
    image_path = os.path.join(PARENT_DIR + '/test', image_name)
    image = tf.keras.preprocessing.image.load_img(image_path)
    resized_image = image.resize((IMAGE_SIZE, IMAGE_SIZE))
    numpied_image = np.expand_dims(resized_image, 0)
    tensored_image = tf.cast(numpied_image, tf.float32)
    submission = submission.append(pd.DataFrame({'Image': image_name,
                                                 'Class': model.predict_classes(tensored_image)}))

{'Airplane': 0,
 'Candle': 1,
 'Christmas_Tree': 2,
 'Jacket': 3,
 'Miscellaneous': 4,
 'Snowman': 5}

In [None]:
submission['Class'] = submission['Class'].map({
0:'Airplane',
1:'Candle',
2:'Christmas_Tree',
3:'Jacket',
4:'Miscellaneous',
5:'Snowman'
})

In [None]:
submission.head()

In [None]:
# Saving CSV to output folder
submission.to_csv('submission.csv',index=False)