## Dog Breed Identification

In this competition, given an image of a dog we are asked to predict a probability for each of the different 120 breeds of the dogs.

So let's get started.

In [None]:
import tensorflow as tf
print(tf.__version__)

In [None]:
!pip install livelossplot

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
from tensorflow import keras

import cv2
import PIL
import os
from IPython.display import Image, display

# Plotly for the interactive viewer (see last section)

import plotly.graph_objs as go
import plotly.graph_objects as go
from sklearn.metrics import cohen_kappa_score
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential, Model,load_model
from tensorflow.keras.applications import xception
from tensorflow.keras.preprocessing.image import ImageDataGenerator,load_img, img_to_array
from tensorflow.keras import Input
from tensorflow.keras.callbacks import EarlyStopping, LearningRateScheduler 
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, Dropout, Flatten,BatchNormalization,Activation
from tensorflow.keras.layers import GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam, SGD, RMSprop
from tensorflow.keras.callbacks import ModelCheckpoint, Callback, EarlyStopping
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import gc
import skimage.io
from livelossplot import PlotLossesKeras

I'm gonna be using the jpeg files for training and testing.

In [None]:
train_dir = '../input/dog-breed-identification/train/'
test_dir = '../input/dog-breed-identification/test/'
train_labels = pd.read_csv('../input/dog-breed-identification/labels.csv')
submission=pd.read_csv('../input/dog-breed-identification/sample_submission.csv')

In [None]:
train_size = len(os.listdir(train_dir))
test_size = len(os.listdir(test_dir))

print(train_size,test_size)
print(train_labels.shape)
print(submission.shape)

In [None]:
target, dog_breeds = pd.factorize(train_labels['breed'], sort = True)
train_labels['target'] = target

In [None]:
display(train_labels.head())
display(submission.head())

In [None]:
train_labels['breed'].value_counts()

In [None]:
plt.figure(figsize=(13, 6))
train_labels['breed'].value_counts().plot(kind='bar')
plt.show()

## Preparing the Datasets

In [None]:
labels=[]
data=[]
for i in range(train_labels.shape[0]):
    data.append(train_dir + train_labels['id'].iloc[i]+'.jpg')
    labels.append(train_labels['target'].iloc[i])
df=pd.DataFrame(data)
df.columns=['images']
df['target']=labels
print(df.shape)
display(df.head())

del labels
del data

In [None]:
test_data=[]
for i in range(submission.shape[0]):
    test_data.append(test_dir + submission['id'].iloc[i]+'.jpg')
df_test=pd.DataFrame(test_data)
df_test.columns=['images']
print(df_test.shape)
display(df_test.head())

del test_data

In [None]:
X_train, X_val, y_train, y_val = train_test_split(df['images'],df['target'], stratify = df['target'], test_size=0.2, random_state=1234)

train=pd.DataFrame(X_train)
train.columns=['images']
train['target']=y_train

validation=pd.DataFrame(X_val)
validation.columns=['images']
validation['target']=y_val

print(train.shape)
display(train.head())
print(validation.shape)
display(validation.head())

del X_train, X_val, y_train, y_val

## Global Variables

In [None]:
N_EPOCHS = 50
BATCH_SIZE = 32
IMG_HEIGHT = 299
IMG_WIDTH = 299

## I'll do some very basic preprocessing like 
* normalizing
* reshaping
* augmentation(only for tarin data)

In [None]:
train_datagen = ImageDataGenerator(rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,horizontal_flip=True, zoom_range=0.2)

val_datagen = ImageDataGenerator()

train_generator = train_datagen.flow_from_dataframe(
    train,
    x_col='images',
    y_col='target',
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=BATCH_SIZE,
    shuffle=True,
    class_mode='raw')

validation_generator = val_datagen.flow_from_dataframe(
    validation,
    x_col='images',
    y_col='target',
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    shuffle=False,
    batch_size=BATCH_SIZE,
    class_mode='raw')

## Modelling

In [None]:
base_model = xception.Xception(weights='imagenet', include_top=False, input_shape=(299,299,3)) 
# display(base_model.summary())

# train only the top layers (which were randomly initialized)
# i.e. freeze all convolutional Xception layers
base_model.trainable = False

inputs = Input(shape=(299, 299, 3))
x = xception.preprocess_input(inputs) ##  tf.keras.applications.xception.preprocess_input

# The base model contains batchnorm layers. We want to keep them in inference mode
# when we unfreeze the base model for fine-tuning, so we make sure that the
# base_model is running in inference mode here by passing `training=False`.
x = base_model(x, training=False)

x = GlobalAveragePooling2D()(x)

#     adding extra dense layer
#     x = Dense(1024, activation='relu')(x)
#     x = Dropout(.7)(x)
#     x = Dense(512, activation='relu')(x)

x = Dropout(.5)(x)
outputs = Dense(120, activation='softmax')(x)
model = Model(inputs, outputs)

display(model.summary())

In [None]:
optimizer = Adam(learning_rate=0.001)
model.compile(loss="sparse_categorical_crossentropy", metrics=['accuracy'], optimizer=optimizer)

In [None]:
n_train_steps = train.shape[0]//BATCH_SIZE
n_val_steps=validation.shape[0]//BATCH_SIZE
print("Number of training and validation steps: {} and {}".format(n_train_steps, n_val_steps))

In [None]:
EarlyStop_callback = EarlyStopping(min_delta=0.001, patience=10, restore_best_weights=True)

# # DECREASE LEARNING RATE EACH EPOCH
# annealer = LearningRateScheduler(lambda epoch: 1e-5 * 0.95 ** epoch, verbose=1)

# cb=[PlotLossesKeras(), annealer]

In [None]:
model.fit(
    train_generator,
    epochs=N_EPOCHS,
    validation_data=validation_generator,
    callbacks=[EarlyStop_callback],
#     use_multiprocessing = True,
#     workers = 4,
#     max_queue_size = 32
    )

In [None]:
del train_generator, validation_generator

## Prediction for Test Dataset

In [None]:
test_datagen = ImageDataGenerator()

test_generator = test_datagen.flow_from_dataframe(
    df_test,
    x_col='images',
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    shuffle=False,
    batch_size=BATCH_SIZE,
    class_mode=None
)

In [None]:
predictions = model.predict(
    test_generator,
    verbose=1         
)

In [None]:
print(predictions.shape)
print(predictions)

## Submission

In [None]:
submission.loc[:, dog_breeds] = predictions
submission.head()

In [None]:
submission.to_csv('submission.csv', index=False)