# Importing Libraries.

In [None]:
import random
import os 
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
import tensorflow as tf 
from tensorflow import keras 
from tensorflow.keras.applications import EfficientNetB7
import cv2
from keras.preprocessing.image import ImageDataGenerator

In [None]:
#setting random seed for reproducability.
def set_seed(seed=7):
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)
    random.seed(seed)

set_seed(7)

# Setting the Variables.

In [None]:
Epochs=30
img_size=(120,120)
batch_size=32
model_filepath='EffNetB7_baseline_holiday.h5'

In [None]:
train_df=pd.read_csv('../input/hackerearth-deep-learning-challenge-holidayseason/dataset/train.csv')
train_path='../input/hackerearth-deep-learning-challenge-holidayseason/dataset/train'
test_path='../input/hackerearth-deep-learning-challenge-holidayseason/dataset/test'

train_df.head()

In [None]:
#class Frequency.
plt.figure(figsize=(16,8))
sns.countplot(train_df['Class'])
plt.title('Class Frequency')
plt.show()

# Visualizing Images:


In [None]:
def show_sample_images(df) :
    df_s=df.sample(5)
    plt.subplots(1,5, figsize=(20,6))
    for i,img in enumerate(df_s['Image']):
        plt.subplot(1,5,i+1)
        plt.title(f'{df_s.Class.iloc[i]}')
        img_path=os.path.join(train_path + '/' + img)
        image=cv2.imread(img_path)
        image=np.array(image)
        plt.imshow(image)
        plt.axis('off')
    plt.tight_layout()
    plt.show()
    

In [None]:
#showing sample images of each class

for cls in train_df['Class'].unique():
    show_sample_images(train_df[train_df['Class']==cls])
    

# Splitting Training and validation data .

In [None]:
#splitting training data into train and valid sets.
train,valid=train_test_split(train_df,test_size=.15,random_state=7,stratify=train_df.Class.values)


train.reset_index(inplace=True,drop=True)
valid.reset_index(inplace=True,drop=True)

In [None]:
#IMage_data Generator:
datagen=ImageDataGenerator(rotation_range=30,
    width_shift_range=(0.1,0.3),
    height_shift_range=(0.1,0.3),
    brightness_range=(0.5,1.4),
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    vertical_flip=True,
    rescale=1./255)

valid_datagen=ImageDataGenerator(rescale=1./255)

In [None]:
#training data:
train_gen=datagen.flow_from_dataframe(dataframe=train,
                                         directory=train_path,
                                         x_col='Image' ,
                                         y_col='Class'  ,
                                         target_size=img_size,
                                         batch_size=batch_size,
                                         class_mode='sparse',
                                         shuffle=True,
                                         seed=7)
#validation data:
valid_gen=valid_datagen.flow_from_dataframe(dataframe=valid,
                                           directory=train_path,
                                           x_col='Image',
                                           y_col='Class',
                                           target_size=img_size,
                                           batch_size=batch_size,
                                           class_mode='sparse',
                                           seed=7)

In [None]:
#defining model.

#base_layer
effnet=EfficientNetB7(include_top=False,weights='imagenet',input_shape=(120,120,3))

model=keras.Sequential([
    keras.Input(shape=(120,120,3)),
    keras.layers.experimental.preprocessing.Normalization(),
    effnet,
    
    keras.layers.GlobalAveragePooling2D(),
    keras.layers.Flatten(),
    keras.layers.Dense(16,activation='relu'),
    keras.layers.Dropout(.4,seed=7),
    
    keras.layers.Dense(6,activation='softmax')])
    
#compiling model.
model.compile(optimizer=keras.optimizers.Adam(lr=1e-4),                                
                  loss='sparse_categorical_crossentropy',metrics=['sparse_categorical_accuracy'])

In [None]:
#Summary
model.summary()

In [None]:
#callbacks:
#to reduce learning rate by factor of .25 if val_loss does not improve after 2 epochs.
reduce_lr=keras.callbacks.ReduceLROnPlateau(monitor='val_loss',factor=.25,patience=2,min_delta=0.01)

#stop training if validation loss does not decrease by atleast .001 in 5 epochs.
early_stopping=keras.callbacks.EarlyStopping(min_delta=.001,patience=4,monitor='val_loss',restore_best_weights=True)

#save the best weights and the model.
model_checkpoint=keras.callbacks.ModelCheckpoint(filepath=model_filepath,monitor='val_loss',
                                                 save_best_only=True)

callbacks_v1=[reduce_lr,early_stopping,model_checkpoint]

In [None]:
#fitting the model.
history=model.fit_generator(train_gen,
                           steps_per_epoch=train_gen.n//batch_size,
                           validation_data=valid_gen,
                           validation_steps=valid_gen.n//batch_size,
                           epochs=Epochs,
                           callbacks=callbacks_v1)

# Learning Curve.

In [None]:
hist=history.history

plt.figure(figsize=(16,8))

#plotting accuracy:
plt.subplot(1,2,1)
plt.title('Accuracy')
plt.plot(range(Epochs),hist['sparse_categorical_accuracy'],color='g',label='Training Accuracy')
plt.plot(range(Epochs),hist['val_sparse_categorical_accuracy'],color='r',label='Validation Accuracy')

#plotting loss 
plt.subplot(1,2,2)
plt.title('Loss')
plt.plot(range(Epochs),hist['loss'],color='g',label='Training_loss')
plt.plot(range(Epochs),hist['val_loss'],color='r',label='Validation loss')

plt.legend()
plt.show()

# Model Evaluation on Validation set.

In [None]:
score=model.evaluate_generator(valid_gen)
print(f'Validation_Loss={score[0]} \n Validation_Accuracy={score[1]}')

# Confusion Matrix

In [None]:
from sklearn.metrics import confusion_matrix,ConfusionMatrixDisplay

#predicted classes :
prob=model.predict_generator(valid_gen)
preds=np.argmax(prob,axis=1)

true_labels=valid_gen.classes

fig,ax=plt.subplots(figsize=(10,10))
cm=confusion_matrix(preds,true_labels)
disp=ConfusionMatrixDisplay(cm)
disp.plot(ax=ax)

# Predicting using the best saved model weights.

In [None]:
best_model=keras.models.load_model('./EffNetB7_baseline_holiday.h5')

**Used some code from this notebook for making predictions in the final part . [https://www.kaggle.com/nikhil741/hackerearth-holiday-season-starter-kernel]**

In [None]:
#decoding the integer values of predictions to the class. 
class_index=list(train_gen.class_indices) 
class_index

In [None]:
from keras.preprocessing.image import load_img,img_to_array

test_img_id=[]              #image_ids 
preds=[]                    # predictions
count=0                     #count of images that will throw up a error

for image in os.listdir(test_path):
    img=load_img(test_path +'/' + image)
    img=img.resize((120,120))
    img=img_to_array(img)
    img=np.expand_dims(img,axis=0)
    img=img/255
    
    try:
        pred=best_model.predict(img).argmax(axis=1)[0]
        pred=class_index[pred]
    except:
        pred= 'Miscellaneous'
        count+=1
    test_img_id.append(str(image))    
    preds.append(pred)
    
print(f'{count} number of images threw up a error.') 

In [None]:
submissions=pd.DataFrame({'Image':test_img_id , 'Class':preds})
print(submissions.head())
submissions.to_csv('submissions.csv',index=False)