In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle'):
    print(dirname)
    #for filename in filenames:
        #print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

## Importing all used Libraries.

In [None]:
from shutil import copyfile
from random import seed
from random import random
import cv2
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from matplotlib import pyplot
from matplotlib.image import imread
import tensorflow as tf
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import layers
from tensorflow.keras import Model


### Defining parent directory and listing files in it.

In [None]:
PARENT_DIR = '../input/hackerearth-deep-learning-challenge-holidayseason/dataset'
print(os.listdir(PARENT_DIR))


### Defining data directories.

In [None]:
train_csv = os.path.join(PARENT_DIR, 'train.csv')
train_dir = os.path.join(PARENT_DIR, 'train')
test_dir = os.path.join(PARENT_DIR, 'test')

### Reading CSV file containing training data labels.

In [None]:
train_labels = pd.read_csv(train_csv)

### Observing value counts of all Classes.

In [None]:
train_labels.Class.value_counts()

### Viewing Some Images of Training Dataset

In [None]:
sample_images = train_labels.head(20)
sample_images

In [None]:
i = 1
plt.figure(figsize=(10,10))
for img in sample_images.Image:
    img = cv2.imread(os.path.join(train_dir,img),cv2.COLOR_BGR2RGB)
    img = cv2.resize(img, (150, 150),interpolation = cv2.INTER_NEAREST)
    plt.subplot(5,4,i)
    plt.imshow(img)
    i+=1

### Splitting Training DataSet into Training and Validation Set

*Note* - Doing Stratified Splitting so that the Training and Validation Set have equal proportion of all the Classes.

In [None]:
train_df,val_df = train_test_split(train_labels,test_size=.20,stratify=train_labels['Class'].values,shuffle=True)

In [None]:
train_df.reset_index(inplace=True,drop=True)
val_df.reset_index(inplace=True,drop=True)

In [None]:
train_df.Class.value_counts()


In [None]:
val_df.Class.value_counts()

### Creating Seperate DataFrames for each classes, for both, training and validation set.

In [None]:
train_Airplane = train_df[train_df['Class']=='Airplane']
train_Candle = train_df[train_df['Class']=='Candle']
train_Christmas_Tree = train_df[train_df['Class']=='Christmas_Tree']
train_Jacket = train_df[train_df['Class']=='Jacket']
train_Miscellaneous = train_df[train_df['Class']=='Miscellaneous']
train_Snowman = train_df[train_df['Class']=='Snowman']

In [None]:
val_Airplane = val_df[val_df['Class']=='Airplane']
val_Candle = val_df[val_df['Class']=='Candle']
val_Christmas_Tree = val_df[val_df['Class']=='Christmas_Tree']
val_Jacket = val_df[val_df['Class']=='Jacket']
val_Miscellaneous = val_df[val_df['Class']=='Miscellaneous']
val_Snowman = val_df[val_df['Class']=='Snowman']

### Creating new mentioned directory under mentioned parent directory followed by creating sub directories of training and validation set along all the Classes.

In [None]:
# Directory
directory = "dataset_classes"
 
# Parent Directory path
parent_dir = "/kaggle/working"
 
# Path
path = os.path.join(parent_dir, directory)
try:
    os.makedirs(path, exist_ok = True)
    print("Directory '%s' created successfully" %directory)
except OSError as error:
    print("Directory '%s' can not be created")

In [None]:
dataset_home = '/kaggle/working/dataset_classes/'
subdirs = ['train/', 'test/']
for subdir in subdirs:
	# create label subdirectories
	labeldirs = ['airplane/', 'candle/', 'christmas_tree/', 'jacket/', 'miscellaneous/', 'snowman/']
	for labldir in labeldirs:
		newdir = dataset_home + subdir + labldir
		os.makedirs(newdir)

In [None]:
for dirname, _, filenames in os.walk('/kaggle/working'):
    print(dirname)

### Copying training data to the respective Class Sub directory,for both, training and validation set.

In [None]:
# seed random number generator
seed(1)
# define ratio of pictures to use for validation
val_ratio = 0.20
# copy training dataset images into subdirectories
src_directory = train_dir
for file in os.listdir(src_directory):
    src = src_directory + '/' + file
    dst_dir1 = 'train/'
    dst_dir2 = 'test/'
    if file in list(train_Airplane.Image):
        dst = dataset_home + dst_dir1 + 'airplane/'  + file
        copyfile(src, dst)
    elif file in list(train_Candle.Image):
        dst = dataset_home + dst_dir1 + 'candle/'  + file
        copyfile(src, dst)
    elif file in list(train_Christmas_Tree.Image):
        dst = dataset_home + dst_dir1 + 'christmas_tree/'  + file
        copyfile(src, dst)
    elif file in list(train_Jacket.Image):
        dst = dataset_home + dst_dir1 + 'jacket/'  + file
        copyfile(src, dst)
    elif file in list(train_Snowman.Image):
        dst = dataset_home + dst_dir1 + 'snowman/'  + file
        copyfile(src, dst)
    elif file in list(train_Miscellaneous.Image):
        dst = dataset_home + dst_dir1 + 'miscellaneous/'  + file
        copyfile(src, dst)
    elif file in list(val_Airplane.Image):
        dst = dataset_home + dst_dir2 + 'airplane/'  + file
        copyfile(src, dst)
    elif file in list(val_Candle.Image):
        dst = dataset_home + dst_dir2 + 'candle/'  + file
        copyfile(src, dst)
    elif file in list(val_Christmas_Tree.Image):
        dst = dataset_home + dst_dir2 + 'christmas_tree/'  + file
        copyfile(src, dst)
    elif file in list(val_Jacket.Image):
        dst = dataset_home + dst_dir2 + 'jacket/'  + file
        copyfile(src, dst)
    elif file in list(val_Snowman.Image):
        dst = dataset_home + dst_dir2 + 'snowman/'  + file
        copyfile(src, dst)
    else :
        dst = dataset_home + dst_dir2 + 'miscellaneous/'  + file
        copyfile(src, dst)
        
            

### Plotting Some Images from each Class.

In [None]:
# plot airplane photos 

# define location of dataset
folder = '/kaggle/working/dataset_classes/train/airplane/'
# plot first few images
for i in range(9):
	# define subplot
	pyplot.subplot(330 + 1 + i)
	# define filename
	filename = folder + list(train_Airplane.Image)[i]
	# load image pixels
	image = imread(filename)
	# plot raw pixel data
	pyplot.imshow(image)
# show the figure
pyplot.show()

In [None]:
# plot candle photos 

# define location of dataset
folder = '/kaggle/working/dataset_classes/train/candle/'
# plot first few images
for i in range(9):
	# define subplot
	pyplot.subplot(330 + 1 + i)
	# define filename
	filename = folder + list(train_Candle.Image)[i]
	# load image pixels
	image = imread(filename)
	# plot raw pixel data
	pyplot.imshow(image)
# show the figure
pyplot.show()

In [None]:
# plot christmas_tree photos 

# define location of dataset
folder = '/kaggle/working/dataset_classes/train/christmas_tree/'
# plot first few images
for i in range(9):
	# define subplot
	pyplot.subplot(330 + 1 + i)
	# define filename
	filename = folder + list(train_Christmas_Tree.Image)[i+10]
	# load image pixels
	image = imread(filename)
	# plot raw pixel data
	pyplot.imshow(image)
# show the figure
pyplot.show()

In [None]:
# plot jacket photos 

# define location of dataset
folder = '/kaggle/working/dataset_classes/train/jacket/'
# plot first few images
for i in range(9):
	# define subplot
	pyplot.subplot(330 + 1 + i)
	# define filename
	filename = folder + list(train_Jacket.Image)[i+5]
	# load image pixels
	image = imread(filename)
	# plot raw pixel data
	pyplot.imshow(image)
# show the figure
pyplot.show()

In [None]:
# plot snowman photos 

# define location of dataset
folder = '/kaggle/working/dataset_classes/train/snowman/'
# plot first few images
for i in range(9):
	# define subplot
	pyplot.subplot(330 + 1 + i)
	# define filename
	filename = folder + list(train_Snowman.Image)[i+33]
	# load image pixels
	image = imread(filename)
	# plot raw pixel data
	pyplot.imshow(image)
# show the figure
pyplot.show()

In [None]:
# plot miscellaneous photos 

# define location of dataset
folder = '/kaggle/working/dataset_classes/train/miscellaneous/'
# plot first few images
for i in range(9):
	# define subplot
	pyplot.subplot(330 + 1 + i)
	# define filename
	filename = folder + list(train_Miscellaneous.Image)[i]
	# load image pixels
	image = imread(filename)
	# plot raw pixel data
	pyplot.imshow(image)
# show the figure
pyplot.show()

### Creating a Simple CNN model to see how it performs on the prepared data.

In [None]:
model1 = tf.keras.models.Sequential([
    # Note the input shape is the desired size of the image 150x150 with 3 bytes color
    tf.keras.layers.Conv2D(16, (3,3), activation='relu', input_shape=(150, 150, 3)),
    tf.keras.layers.MaxPooling2D(2,2),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Conv2D(32, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Conv2D(64, (3,3), activation='relu'), 
    tf.keras.layers.MaxPooling2D(2,2),
    tf.keras.layers.Dropout(0.2),
    # Flatten the results to feed into a DNN
    tf.keras.layers.Flatten(), 
    # 512 neuron hidden layer
    tf.keras.layers.Dense(512, activation='relu'), 
    # 6 output neuron. It will contain a value from 0-5 for 6 classes
    tf.keras.layers.Dense(6, activation='softmax')  
])

In [None]:
model1.summary()

In [None]:
opt = tf.keras.optimizers.Adam(learning_rate=0.001)
model1.compile(optimizer=opt,
              loss='categorical_crossentropy',
              metrics = ['categorical_accuracy'])

In [None]:
# All images will be rescaled by 1./255.
train_datagen = ImageDataGenerator( rescale = 1.0/255. )
test_datagen  = ImageDataGenerator( rescale = 1.0/255. )

# --------------------
# Flow training images in batches of 20 using train_datagen generator
# --------------------
train_generator = train_datagen.flow_from_directory('/kaggle/working/dataset_classes/train/',
                                                    batch_size=20,
                                                    class_mode='categorical',
                                                    target_size=(150, 150))     
# --------------------
# Flow validation images in batches of 20 using test_datagen generator
# --------------------
validation_generator =  test_datagen.flow_from_directory('/kaggle/working/dataset_classes/test/',
                                                         batch_size=20,
                                                         class_mode  = 'categorical',
                                                         target_size = (150, 150))

In [None]:
history1 = model1.fit(train_generator,
                              validation_data=validation_generator,
                              steps_per_epoch=100,
                              epochs=15,
                              validation_steps=50,
                              verbose=2)

In [None]:
#-----------------------------------------------------------
# Retrieve a list of list results on training and test data
# sets for each training epoch
#-----------------------------------------------------------
acc      = history1.history[     'categorical_accuracy' ]
val_acc  = history1.history[ 'val_categorical_accuracy' ]
loss     = history1.history[    'loss' ]
val_loss = history1.history['val_loss' ]

epochs   = range(len(acc)) # Get number of epochs

#------------------------------------------------
# Plot training and validation accuracy per epoch
#------------------------------------------------
plt.plot  ( epochs,     acc )
plt.plot  ( epochs, val_acc )
plt.title ('Training and validation accuracy')
plt.figure()

#------------------------------------------------
# Plot training and validation loss per epoch
#------------------------------------------------
plt.plot  ( epochs,     loss )
plt.plot  ( epochs, val_loss )
plt.title ('Training and validation loss'   )

### Applying Data Augmentation Techniques to the Same Model.

In [None]:
train_datagen = ImageDataGenerator(
      rescale=1./255,
      rotation_range=40,
      width_shift_range=0.2,
      height_shift_range=0.2,
      shear_range=0.2,
      zoom_range=0.2,
      horizontal_flip=True,
      fill_mode='nearest')

test_datagen = ImageDataGenerator(rescale=1./255)

# Flow training images in batches of 20 using train_datagen generator
train_generator = train_datagen.flow_from_directory(
        '/kaggle/working/dataset_classes/train/',  # This is the source directory for training images
        target_size=(150, 150),  # All images will be resized to 150x150
        batch_size=20,
        # Since we use categorical_crossentropy loss, we need categoricals labels
        class_mode='categorical')

# Flow validation images in batches of 20 using test_datagen generator
validation_generator = test_datagen.flow_from_directory(
        '/kaggle/working/dataset_classes/test/',
        target_size=(150, 150),
        batch_size=20,
        class_mode='categorical')


In [None]:
history2 = model1.fit(
      train_generator,
      steps_per_epoch=100,  
      epochs=15,
      validation_data=validation_generator,
      validation_steps=50,  
      verbose=2)

In [None]:
#-----------------------------------------------------------
# Retrieve a list of list results on training and test data
# sets for each training epoch
#-----------------------------------------------------------
acc      = history2.history[     'categorical_accuracy' ]
val_acc  = history2.history[ 'val_categorical_accuracy' ]
loss     = history2.history[    'loss' ]
val_loss = history2.history['val_loss' ]

epochs   = range(len(acc)) # Get number of epochs

#------------------------------------------------
# Plot training and validation accuracy per epoch
#------------------------------------------------
plt.plot  ( epochs,     acc )
plt.plot  ( epochs, val_acc )
plt.title ('Training and validation accuracy')
plt.figure()

#------------------------------------------------
# Plot training and validation loss per epoch
#------------------------------------------------
plt.plot  ( epochs,     loss )
plt.plot  ( epochs, val_loss )
plt.title ('Training and validation loss'   )

## Applying Transfer Learning.

### 1.InceptionV3 model

In [None]:
from tensorflow.keras.applications.inception_v3 import InceptionV3

pre_trained_model1 = InceptionV3(input_shape = (150, 150, 3), 
                                include_top = False, 
                                weights = 'imagenet')


for layer in pre_trained_model1.layers:
  layer.trainable = False
  

last_output1 = pre_trained_model1.output

In [None]:

# Flatten the output layer to 1 dimension
x1 = layers.Flatten()(last_output1)

# Add a fully connected layer with 1,024 hidden units and ReLU activation
x1 = layers.Dense(1024, activation='relu')(x1)

# Add a dropout rate of 0.2
x1 = layers.Dropout(0.2)(x1) 

# Add a fully connected layer with 512 hidden units and ReLU activation
x1 = layers.Dense(512, activation='relu')(x1)

# Add a dropout rate of 0.2
x1 = layers.Dropout(0.2)(x1) 

# Add a final sigmoid layer for classification
x1 = layers.Dense  (6, activation='softmax')(x1)     


model3 = Model( pre_trained_model1.input, x1) 

model3.compile(optimizer = opt, 
              loss = 'categorical_crossentropy', 
              metrics = ['categorical_accuracy'])

In [None]:
model3.summary()

In [None]:
history3 = model3.fit(
            train_generator,
            validation_data = validation_generator,
            steps_per_epoch = 100,
            epochs = 50,
            validation_steps = 50,
            verbose = 2)


In [None]:
#-----------------------------------------------------------
# Retrieve a list of list results on training and test data
# sets for each training epoch
#-----------------------------------------------------------
acc      = history3.history[     'categorical_accuracy' ]
val_acc  = history3.history[ 'val_categorical_accuracy' ]
loss     = history3.history[    'loss' ]
val_loss = history3.history['val_loss' ]

epochs   = range(len(acc)) # Get number of epochs

#------------------------------------------------
# Plot training and validation accuracy per epoch
#------------------------------------------------
plt.plot  ( epochs,     acc )
plt.plot  ( epochs, val_acc )
plt.title ('Training and validation accuracy')
plt.figure()

#------------------------------------------------
# Plot training and validation loss per epoch
#------------------------------------------------
plt.plot  ( epochs,     loss )
plt.plot  ( epochs, val_loss )
plt.title ('Training and validation loss'   )

### 2.VGG16 Model

In [None]:
from tensorflow.keras.applications.vgg16 import VGG16

pretrained_model2 = VGG16(input_shape = (150, 150, 3), # Shape of our images
                        include_top = False, # Leave out the last fully connected layer
                        weights = 'imagenet')

for layer in pretrained_model2.layers:
    layer.trainable = False

In [None]:
# Flatten the output layer to 1 dimension
x = layers.Flatten()(pretrained_model2.output)

# Add a fully connected layer with 1024 hidden units and ReLU activation
x = layers.Dense(1024, activation='relu')(x)

# Add a dropout rate of 0.2
x = layers.Dropout(0.2)(x)

# Add a fully connected layer with 512 hidden units and ReLU activation
x = layers.Dense(512, activation='relu')(x)

# Add a dropout rate of 0.2
x = layers.Dropout(0.2)(x)

# Add a final sigmoid layer for classification
x = layers.Dense(6, activation='softmax')(x)

model4 = tf.keras.models.Model(pretrained_model2.input, x)

In [None]:
model4.summary()

In [None]:
model4.compile(optimizer = opt,
               loss = 'categorical_crossentropy',
               metrics = ['categorical_accuracy'])

In [None]:
history4 = model4.fit(
            train_generator,
            validation_data = validation_generator,
            steps_per_epoch = 100,
            epochs = 50,
            validation_steps = 50,
            verbose = 2)


In [None]:
#-----------------------------------------------------------
# Retrieve a list of list results on training and test data
# sets for each training epoch
#-----------------------------------------------------------
acc      = history4.history[     'categorical_accuracy' ]
val_acc  = history4.history[ 'val_categorical_accuracy' ]
loss     = history4.history[    'loss' ]
val_loss = history4.history['val_loss' ]

epochs   = range(len(acc)) # Get number of epochs

#------------------------------------------------
# Plot training and validation accuracy per epoch
#------------------------------------------------
plt.plot  ( epochs,     acc )
plt.plot  ( epochs, val_acc )
plt.title ('Training and validation accuracy')
plt.figure()

#------------------------------------------------
# Plot training and validation loss per epoch
#------------------------------------------------
plt.plot  ( epochs,     loss )
plt.plot  ( epochs, val_loss )
plt.title ('Training and validation loss'   )

## Im going to train few more pretrained models.

### Creating Submission CSV

In [None]:
from keras.preprocessing import image
prediction = []
Image = []
labels = (train_generator.class_indices)
labels = dict((v,k) for k,v in labels.items())

for i, file in enumerate(os.listdir(test_dir)):
    Image.append(file)
    file = test_dir +'/'+ file

    img = image.load_img(file, target_size=(150,150,3)) 
    img = image.img_to_array(img)
    img = img/255
    pred = model4.predict(img.reshape(1,150,150,3))

    prediction.append(labels[np.argmax(pred[0])])

In [None]:
Submission=pd.DataFrame((zip(Image, prediction)),columns=['Image','Class'])


In [None]:
Submission.Class.value_counts()

In [None]:
Submission['Class'] = Submission['Class'].map({
'airplane':'Airplane',
'candle':'Candle',
'christmas_tree':'Christmas_Tree',
'jacket':'Jacket',
'miscellaneous':'Miscellaneous',
'snowman':'Snowman'
})

In [None]:
Submission.to_csv('VGG16.csv',index=False)

## Scores On Submission

### Model 1(Simple CNN) gives score of 60.96496
### Model 2(Simple CNN with Data Augmentation) gives score of 64.43637
### Model 3(InceptionV3) gives score of 83.27723
### Model 4(VGG16) gives score of 76.27789
## ...