 **Dog Breed Classification**

In [None]:
import os
import warnings
import random
from shutil import copyfile
import numpy as np
import pandas as pd
import itertools
#data visualization libraries
import matplotlib.pyplot as plt
import seaborn as sns
import pathlib
#deep learning libraries
import tensorflow as tf
from keras.optimizers import Adam
from keras.layers import Conv2D, Flatten, Dense, MaxPooling2D, Dropout
from keras.models import Sequential
from mpl_toolkits.axes_grid1 import ImageGrid
from keras.preprocessing.image import ImageDataGenerator
#importing ResNet50 model
from tensorflow.keras.applications.resnet50 import ResNet50
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.resnet50 import preprocess_input, decode_predictions

In [None]:
#train and test directories
dir_train='../input/dog-breed-identification/train'
dir_test='../input/dog-breed-identification/test'

#labels have image name and dog breed in a csv file and sample_submission contain details of test set
df_train= pd.read_csv('../input/dog-breed-identification/labels.csv',dtype=str)
df_test= pd.read_csv('../input/dog-breed-identification/sample_submission.csv',dtype=str)

In [None]:
#images are in jpg format and to match their names in csv file we appending 'jpg' in it
def append_ext(fn):
    return fn+".jpg"

In [None]:
df_train["id"] = df_train["id"].apply(append_ext)
df_test["id"] = df_test["id"].apply(append_ext)

***Training Data***

In [None]:
#As instructed all other breeds except suggested ones are deleted
inc_breed=['beagle','chihuahua','doberman','french_bulldog','golden_retriever','malamute','pug','saint_bernard', 'scottish_deerhound',
'tibetan_mastiff']
for i,breed in df_train.iterrows():
    if breed[1] not in inc_breed:
        df_train=df_train.drop([i])
       
len(df_train)

***Training set and Test set***

In [None]:
print(df_train.head())
df_test.head()

***Training Images***

In [None]:
#display some images with help of matplotlib
source_path = "../input/dog-breed-identification/train"
sub_class = os.listdir(source_path)

fig = plt.figure(figsize=(10,5))
for i in range(len(sub_class[:8])):
    plt.subplot(2,4,i+1)
    imag = plt.imread(os.path.join(source_path,sub_class[i+7]))
    plt.imshow(imag, cmap=plt.get_cmap('gray'))
    plt.axis('off')

***Data Preprocessing***

In [None]:
#Generating batches of tensor image data with real-time data augmentation.
train_datagen=ImageDataGenerator( rescale=1./255.,
                                  rotation_range = 20,
                                  brightness_range=[0.2,1.0],
                                  width_shift_range = 0.2,
                                  height_shift_range = 0.2,
                        
                                  horizontal_flip = True,
                                
                                  validation_split=0.1
                                  )

In [None]:
#Generating batches of tensor image data with real-time data augmentation for training set.
train_generator=train_datagen.flow_from_dataframe(
directory=dir_train,
dataframe=df_train,
x_col="id",
y_col="breed",
subset="training",
batch_size=32,
seed=42,
shuffle=True,
class_mode="categorical",
target_size=(224,224)

)

***Validation Data***

In [None]:
#Generating batches of tensor image data with real-time data augmentation for validation set set.
validation_generator=train_datagen.flow_from_dataframe(
directory=dir_train,
dataframe=df_train,
x_col="id",
y_col="breed",
subset="validation",
batch_size=32,
seed=42,
shuffle=True,
class_mode="categorical",
target_size=(224,224)
)

***Test Data***

In [None]:
test_datagen=ImageDataGenerator(rescale=1./255.)
#Generating batches of tensor image data with real-time data augmentation for Test set.

test_generator=test_datagen.flow_from_dataframe(
directory=dir_test,
dataframe=df_test,
x_col="id",
y_col=None,
batch_size=32,
seed=42,
shuffle=False,
class_mode=None,
target_size=(224,224),
)

In [None]:
classes=len(inc_breed)
classes

***Using Pretrained Model : ResNet50***

In [None]:
#Freezing Resnet50 model to avoid weight updation
pretrained_model =ResNet50(
        weights='imagenet',
        include_top=False ,
        input_shape=(224,224,3)
    )

In [None]:
#defing model
model =Sequential([ 
        pretrained_model,  
        Flatten(),
#         tf.keras.layers.GlobalAveragePooling2D(),
        Dense(2048, activation='relu'),
        Dropout(0.5),

        Dense(256, activation='relu'),
        Dropout(0.5),
        Dense(64, activation='relu'),
        Dropout(0.3),
    
        Dense(10, activation='softmax')
    ])

In [None]:
#as mentioned f1score, recall, precision are defined as our judging criteria for model
from keras import backend as bd

def recall_m(y_true, y_pred):
    true_positives =bd.sum(bd.round(bd.clip(y_true*y_pred,0,1)))
    possible_positives =bd.sum(bd.round(bd.clip(y_true,0,1)))
    recall =true_positives/(possible_positives+bd.epsilon())
    return recall

def precision_m(y_true, y_pred):
    true_positives =bd.sum(bd.round(bd.clip(y_true*y_pred,0,1)))
    predicted_positives =bd.sum(bd.round(bd.clip(y_pred,0,1)))
    precision = true_positives / (predicted_positives+bd.epsilon())
    return precision

def f1_m(y_true, y_pred): 
    precision =precision_m(y_true,y_pred)
    recall =recall_m(y_true,y_pred)
    return (2*((precision*recall)/(precision+recall+bd.epsilon())))

In [None]:
#stochastic gradient descent is used as optimizer and categorical_crossentropy is used for multiclass classification.
opt=Adam(lr=1e-4)
model.compile(optimizer=opt,loss='categorical_crossentropy',metrics=['acc',f1_m,precision_m, recall_m])

In [None]:
model.summary()

***Model Fitting***

In [None]:
#model fitting with 50 epochs
step_size_ =train_generator.n//train_generator.batch_size
valid_step_size_ =validation_generator.n//validation_generator.batch_size
history =model.fit(train_generator,
                    steps_per_epoch=step_size_,
                    validation_data=validation_generator,
                    validation_steps=valid_step_size_ ,
                    epochs=50,
#                     
                   )

***Plotting Curves***

In [None]:
#plot of epoch vs accuracy for trainig set and epoch vs validation_accuracy for validation set
acc=history.history['acc']
val_acc=history.history['val_acc']
loss=history.history['loss']
val_loss=history.history['val_loss']

epochs=range(len(acc))

fig=plt.figure(figsize=(14,7))
plt.plot(epochs,acc,'r', label='training Accuracy')
plt.plot(epochs,val_acc,'b', label='Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('accuracy')
plt.title(' training vs validation accuracy')
plt.legend(loc='lower right')
plt.show()


In [None]:
#plot of epoch vs loss for trainig set and epoch vs validation_loss for validation set

fig2=plt.figure(figsize=(14,7))
plt.plot(epochs,loss,'r', label='training Accuracy')
plt.plot(epochs,val_loss,'b', label='Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('accuracy')
plt.title(' training vs validation accuracy')
plt.legend(loc='upper right')
plt.show()


In [None]:
#judging criterias of our model
loss, accuracy, f1_score, precision, recall = model.evaluate(validation_generator,batch_size=32)

print("Loss:", loss)
print("Accuracy:", accuracy)
print("F1 Score:", f1_score)

***Saving the Model for in future use

In [None]:
#saving model for further use 
model.save("DogClassificationByResNet2.h5")

***model prediction on test data(sample_submission.csv)***

In [None]:
#prediction on test set using test generator
pred=model.predict(test_generator)

In [None]:
#sample test file
df_submission = pd.read_csv('/kaggle/input/dog-breed-identification/sample_submission.csv', usecols= inc_breed+['id'])
df_submission.head()

***updating the values of probability***

In [None]:
#prediction on sample test file    
df_submission.iloc[:,1:] = pred
df_submission.head()

In [None]:
#size of data
df_submission.shape