In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
#for dirname, _, filenames in os.walk('/kaggle/input'):
#    for filename in filenames:
#        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# Importing Modules

In [None]:
import numpy as np
import pandas as pd
import os
import random
import tensorflow as tf
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from shutil import copyfile
from shutil import move
from tensorflow.keras.utils import plot_model
%matplotlib inline
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
import shutil

# Understanding Dataset

In [None]:
len(os.listdir('../input/dog-breed-identification/test'))

In [None]:
df = pd.read_csv('../input/dog-breed-identification/labels.csv')

From initial analysis of the dataset we got to know that the test contains images with the name of the image as its id and the file labels.csv contains the classes related to each file.
We also get that the dataset is unbalanced

# Making Directory Flowchart

In [None]:
breeds = df.groupby('breed')

In [None]:
os.mkdir('dogbreeds_in')
os.mkdir('dogbreeds_in/train')
os.mkdir('dogbreeds_in/validation')

In [None]:
split_size = 0.7

In [None]:
for breed,group in breeds:
    files = list(group['id'])
    ln = len(files)
    train_p = 'dogbreeds/train/'+breed
    val_p = 'dogbreeds/validation/'+breed
    os.mkdir(train_p)
    os.mkdir(val_p)
    random.shuffle(files)
    for ind,fn in enumerate(files):
        fn_path = '../input/dog-breed-identification/train/' + fn + '.jpg'
        if ind<split_size*ln:
            tar_path = train_p +'/'+ fn + '.jpg'
            copyfile(fn_path,tar_path)
        else:
            tar_path = val_p  +'/'+ fn + '.jpg'
            copyfile(fn_path,tar_path)

In this subsection we devide the train data into train and validation. We also made a directory graph so that the ImageDataGenerator can use it to distrubte the data in it's various classes 

# Generating Databatches for Training

In [None]:
train_dir = './dogbreeds_in/train'

train_datagen = ImageDataGenerator(
    rotation_range = 20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range = 0.2,
    zoom_range = 0.2,
    horizontal_flip = True,
    fill_mode = 'nearest' 

)

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size = (331,331),
    batch_size = 64,
    class_mode = 'categorical',
    shuffle = True
)

In [None]:
val_dir = './dogbreeds_in/validation'

val_datagen = ImageDataGenerator()

val_generator = val_datagen.flow_from_directory(
    val_dir,
    target_size = (331,331),
    batch_size = 64,
    class_mode = 'categorical'
)

We generate data in a batch size of 64 and use image augmentation on the test data

# Training Using Pretrained Models

## Single Trained DeepNN

In [None]:
from tensorflow.keras.applications.inception_v3 import InceptionV3
from tensorflow.keras.applications.inception_v3 import preprocess_input as inceptpp

In [None]:
dnn_model = InceptionV3(
      input_shape=(331,331,3),
      include_top = False,
      weights = 'imagenet'
)

for layer in dnn_model.layers:
    layer.trainable = False

In [None]:
model = tf.keras.models.Sequential([
    tf.keras.layers.Lambda(inceptpp,input_shape=(331,331,3)),
    dnn_model,
    tf.keras.layers.GlobalAveragePooling2D(),
    tf.keras.layers.Dense(120,activation='softmax')
])

In [None]:
model.summary()

In [None]:
plot_model(model,show_shapes=True)

In [None]:
model.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['accuracy'])

In [None]:
history = model.fit(
    train_generator,
    epochs = 10,
    steps_per_epoch = 7213//64,
    validation_data = val_generator,
    validation_steps = 3009//64
)

In [None]:

%matplotlib inline

import matplotlib.image  as mpimg
import matplotlib.pyplot as plt

acc=history.history['accuracy']
val_acc=history.history['val_accuracy']
loss=history.history['loss']
val_loss=history.history['val_loss']

epochs=range(len(acc)) # Get number of epochs

plt.plot(epochs, acc, 'r', "Training Accuracy")
plt.plot(epochs, val_acc, 'b', "Validation Accuracy")
plt.title('Training and validation accuracy')
plt.figure()

plt.plot(epochs, loss, 'r', "Training Loss")
plt.plot(epochs, val_loss, 'b', "Validation Loss")


plt.title('Training and validation loss')

## Ensembling 5 Trained Deep NN

In [None]:
from tensorflow.keras.applications.inception_v3 import InceptionV3
from tensorflow.keras.applications.inception_v3 import preprocess_input as inceptpp
from tensorflow.keras.applications.vgg19 import VGG19
from tensorflow.keras.applications.vgg19 import preprocess_input as vggpp
from tensorflow.keras.applications.resnet import ResNet101
from tensorflow.keras.applications.resnet import preprocess_input as respp
from tensorflow.keras.applications.xception import Xception
from tensorflow.keras.applications.xception import preprocess_input as xceptpp
from tensorflow.keras.applications.inception_resnet_v2 import InceptionResNetV2
from tensorflow.keras.applications.inception_resnet_v2 import preprocess_input as inceptrespp

In [None]:
def pretrained_model_prediction(MODEL,Pre,data):
  dnn_model = MODEL(
      input_shape=(300,300,3),
      include_top = False,
      weights = 'imagenet'
  )
  for layer in dnn_model.layers:
    layer.trainable = False
  x = data
  x = tf.keras.layers.Lambda(Pre)(x)
  x = dnn_model(x)
  x = tf.keras.layers.GlobalAveragePooling2D()(x)
  return x

In [None]:
inputs = tf.keras.layers.Input((300,300,3))
inception_prediction = pretrained_model_prediction(InceptionV3,inceptpp,inputs)
vgg_prediction = pretrained_model_prediction(VGG19,vggpp,inputs)
xception_prediction = pretrained_model_prediction(Xception,xceptpp,inputs)
resnet_prediction = pretrained_model_prediction(ResNet101,respp,inputs)
inceptionres_prediction = pretrained_model_prediction(InceptionResNetV2,inceptrespp,inputs)
new_features = tf.keras.layers.Concatenate()([inception_prediction,vgg_prediction,xception_prediction,resnet_prediction,inceptionres_prediction])
concatenated_pretrained_model = tf.keras.Model(inputs,new_features)


In [None]:
concatenated_pretrained_model.summary()

In [None]:
plot_model(concatenated_pretrained_model,show_shapes=True)

In [None]:
model = tf.keras.models.Sequential([
    concatenated_pretrained_model,
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(120,activation='softmax')
])

In [None]:
model.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['accuracy'])

In [None]:
model.summary()

In [None]:
plot_model(model,show_shapes=True)

In [None]:
history = model.fit(
    train_generator,
    epochs = 10,
    steps_per_epoch = 7213//64,
    validation_data = val_generator,
    validation_steps = 3009//64
)

In [None]:
acc=history.history['accuracy']
val_acc=history.history['val_accuracy']
loss=history.history['loss']
val_loss=history.history['val_loss']

epochs=range(len(acc)) # Get number of epochs

plt.plot(epochs, acc, 'r', "Training Accuracy")
plt.plot(epochs, val_acc, 'b', "Validation Accuracy")
plt.title('Training and validation accuracy')
plt.figure()

plt.plot(epochs, loss, 'r', "Training Loss")
plt.plot(epochs, val_loss, 'b', "Validation Loss")


plt.title('Training and validation loss')

# Ensembling 9 Models

In [None]:
from tensorflow.keras.applications.inception_v3 import InceptionV3
from tensorflow.keras.applications.inception_v3 import preprocess_input as inceptpp
from tensorflow.keras.applications.vgg19 import VGG19
from tensorflow.keras.applications.vgg19 import preprocess_input as vggpp
from tensorflow.keras.applications.resnet import ResNet101
from tensorflow.keras.applications.resnet import preprocess_input as respp
from tensorflow.keras.applications.xception import Xception
from tensorflow.keras.applications.xception import preprocess_input as xceptpp
from tensorflow.keras.applications.inception_resnet_v2 import InceptionResNetV2
from tensorflow.keras.applications.inception_resnet_v2 import preprocess_input as inceptrespp
from tensorflow.keras.applications.nasnet import NASNetLarge
from tensorflow.keras.applications.nasnet import preprocess_input as naspp
from tensorflow.keras.applications.densenet import DenseNet201
from tensorflow.keras.applications.densenet import preprocess_input as densepp
from tensorflow.keras.applications.efficientnet import EfficientNetB7
from tensorflow.keras.applications.efficientnet import preprocess_input as effpp
from tensorflow.keras.applications.mobilenet_v2 import MobileNetV2
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input as mobpp

In [None]:
def pretrained_model_prediction(MODEL,Pre,data):
  dnn_model = MODEL(
      input_shape=(331,331,3),
      include_top = False,
      weights = 'imagenet'
  )
  for layer in dnn_model.layers:
    layer.trainable = False
  x = data
  x = tf.keras.layers.Lambda(Pre)(x)
  x = dnn_model(x)
  x = tf.keras.layers.GlobalAveragePooling2D()(x)
  return x

In [None]:
inputs = tf.keras.layers.Input((331,331,3))
inception_prediction = pretrained_model_prediction(InceptionV3,inceptpp,inputs)
vgg_prediction = pretrained_model_prediction(VGG19,vggpp,inputs)
xception_prediction = pretrained_model_prediction(Xception,xceptpp,inputs)
resnet_prediction = pretrained_model_prediction(ResNet101,respp,inputs)
inceptionres_prediction = pretrained_model_prediction(InceptionResNetV2,inceptrespp,inputs)
nasnet_prediction = pretrained_model_prediction(NASNetLarge,naspp,inputs)
densenet_prediction = pretrained_model_prediction(DenseNet201,densepp,inputs)
efficient_prediction = pretrained_model_prediction(EfficientNetB7,effpp,inputs)
mobile_prediction = pretrained_model_prediction(MobileNetV2,mobpp,inputs)
new_features = tf.keras.layers.Concatenate()([inception_prediction,vgg_prediction,xception_prediction,resnet_prediction,inceptionres_prediction,
                                              nasnet_prediction,densenet_prediction,efficient_prediction,mobile_prediction])
concatenated_pretrained_model = tf.keras.Model(inputs,new_features)


In [None]:
concatenated_pretrained_model.summary()

In [None]:
plot_model(concatenated_pretrained_model,show_shapes=True)

In [None]:
model = tf.keras.Sequential([
                         concatenated_pretrained_model,
                         tf.keras.layers.Dense(120,activation='softmax')    
])

In [None]:
model.summary()

In [None]:
plot_model(model,show_shapes = True)

In [None]:
model.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['accuracy'])

In [None]:
history = model.fit(
    train_generator,
    epochs = 10,
    steps_per_epoch = 7213//64,
    validation_data = val_generator,
    validation_steps = 3009//64
)

# Adding More Dataset for Training

In [None]:
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        if os.stat(os.path.join(dirname, filename)).st_size == 0:
            print(os.path.join(dirname, filename))

In [None]:
fldr = os.listdir('../input/stanford-dogs-dataset/images/Images')

In [None]:
fldr[0][10:]

In [None]:
df = pd.read_csv('../input/dog-breed-identification/labels.csv')

In [None]:
breeds = df.groupby('breed')

In [None]:
os.mkdir('dogbreeds')
os.mkdir('dogbreeds/everything')

In [None]:
for breed,group in breeds:
    files = list(group['id'])
    ln = len(files)
    train_p = 'dogbreeds/everything/'+breed
    os.mkdir(train_p)
    for ind,fn in enumerate(files):
        fn_path = '../input/dog-breed-identification/train/' + fn + '.jpg'
        tar_path = train_p +'/'+ fn + '.jpg'
        copyfile(fn_path,tar_path)

In [None]:
os.mkdir('./dogbreeds/train')

In [None]:
for bre in fldr:
    newnm = bre[10:].lower()
    newfldr = os.path.join('./dogbreeds/train',newnm)
    os.mkdir(newfldr)
    oldpth = os.path.join('../input/stanford-dogs-dataset/images/Images',bre)
    for f in os.listdir(oldpth):
        filepth = os.path.join(oldpth,f)
        newfilepth = os.path.join(newfldr,f)
        copyfile(filepth,newfilepth)

In [None]:
all_breeds = os.listdir('./dogbreeds/train')
len_dir = []
for dog_dir in all_breeds:
    tmp = os.path.join('./dogbreeds/train',dog_dir)
    len_dir.append(len(os.listdir(tmp)))

In [None]:
len_dir = np.array(len_dir)

In [None]:
max_at = np.argmax(len_dir)
print("max number of files is for : ",all_breeds[max_at],len_dir[max_at])

In [None]:
min_at = np.argmin(len_dir)
print("max number of files is for : ",all_breeds[min_at],len_dir[min_at])

In [None]:
print("Previous data we had for :",all_breeds[min_at],"is",len(os.listdir(os.path.join('./dogbreeds/everything',all_breeds[min_at]))))

In [None]:
all_breeds = os.listdir('./dogbreeds/everything')
len_dir = []
for dog_dir in all_breeds:
    tmp = os.path.join('./dogbreeds/everything',dog_dir)
    len_dir.append(len(os.listdir(tmp)))

In [None]:
len_dir = np.array(len_dir)
min_at = np.argmin(len_dir)
print("max number of files is for : ",all_breeds[min_at],len_dir[min_at])

In [None]:
print("Previous data we had for :",all_breeds[min_at],"is",len(os.listdir(os.path.join('./dogbreeds/train',all_breeds[min_at]))))

In [None]:
target_train_size = 210

In [None]:
os.mkdir('./dogbreeds/validation')

In [None]:
for breed in os.listdir('./dogbreeds/everything'):
    dog_train = os.path.join('./dogbreeds/train',breed)
    dog_val = os.path.join('./dogbreeds/validation',breed)
    os.mkdir(dog_val)
    dog_every = os.path.join('./dogbreeds/everything',breed)
    initial_length = len(os.listdir(dog_train))
    for dogs in os.listdir(dog_every):
        f_path = os.path.join(dog_every,dogs)
        if initial_length<target_train_size:
            dest_path = os.path.join(dog_train,dogs)
            copyfile(f_path,dest_path)
        else:
            dest_path = os.path.join(dog_val,dogs)
            copyfile(f_path,dest_path)
        initial_length +=1

In [None]:
all_breeds = os.listdir('./dogbreeds/train')
len_dir = []
for dog_dir in all_breeds:
    tmp = os.path.join('./dogbreeds/train',dog_dir)
    len_dir.append(len(os.listdir(tmp)))

In [None]:
print(max(len_dir))

In [None]:
print(sum(len_dir))

In [None]:
train_breed = {}
for i in len_dir:
    train_breed.setdefault(i,0)
    train_breed[i]+=1
print(train_breed)

In [None]:
all_breeds = os.listdir('./dogbreeds/validation')
len_dir = []
for dog_dir in all_breeds:
    tmp = os.path.join('./dogbreeds/validation',dog_dir)
    len_dir.append(len(os.listdir(tmp)))

In [None]:
print(max(len_dir))

In [None]:
print(sum(len_dir))

# Datagenerator

In [None]:
train_dir = './dogbreeds/train'

train_datagen = ImageDataGenerator()

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size = (331,331),
    batch_size = 64,
    class_mode = 'categorical',
    shuffle = True
)

In [None]:
val_dir = './dogbreeds/validation'

val_datagen = ImageDataGenerator()

val_generator = val_datagen.flow_from_directory(
    val_dir,
    target_size = (331,331),
    batch_size = 64,
    class_mode = 'categorical'
)

# Using our 9 Ensembled Model

In [None]:
from tensorflow.keras.applications.inception_v3 import InceptionV3
from tensorflow.keras.applications.inception_v3 import preprocess_input as inceptpp
from tensorflow.keras.applications.vgg19 import VGG19
from tensorflow.keras.applications.vgg19 import preprocess_input as vggpp
from tensorflow.keras.applications.resnet import ResNet101
from tensorflow.keras.applications.resnet import preprocess_input as respp
from tensorflow.keras.applications.xception import Xception
from tensorflow.keras.applications.xception import preprocess_input as xceptpp
from tensorflow.keras.applications.inception_resnet_v2 import InceptionResNetV2
from tensorflow.keras.applications.inception_resnet_v2 import preprocess_input as inceptrespp
from tensorflow.keras.applications.nasnet import NASNetLarge
from tensorflow.keras.applications.nasnet import preprocess_input as naspp
from tensorflow.keras.applications.densenet import DenseNet201
from tensorflow.keras.applications.densenet import preprocess_input as densepp
from tensorflow.keras.applications.efficientnet import EfficientNetB7
from tensorflow.keras.applications.efficientnet import preprocess_input as effpp
from tensorflow.keras.applications.mobilenet_v2 import MobileNetV2
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input as mobpp

In [None]:
def pretrained_model_prediction(MODEL,Pre,data):
  dnn_model = MODEL(
      input_shape=(331,331,3),
      include_top = False,
      weights = 'imagenet'
  )
  for layer in dnn_model.layers:
    layer.trainable = False
  x = data
  x = tf.keras.layers.Lambda(Pre)(x)
  x = dnn_model(x)
  x = tf.keras.layers.GlobalAveragePooling2D()(x)
  return x

In [None]:
inputs = tf.keras.layers.Input((331,331,3))
inception_prediction = pretrained_model_prediction(InceptionV3,inceptpp,inputs)
vgg_prediction = pretrained_model_prediction(VGG19,vggpp,inputs)
xception_prediction = pretrained_model_prediction(Xception,xceptpp,inputs)
#resnet_prediction = pretrained_model_prediction(ResNet101,respp,inputs)
inceptionres_prediction = pretrained_model_prediction(InceptionResNetV2,inceptrespp,inputs)
nasnet_prediction = pretrained_model_prediction(NASNetLarge,naspp,inputs)
#densenet_prediction = pretrained_model_prediction(DenseNet201,densepp,inputs)
#efficient_prediction = pretrained_model_prediction(EfficientNetB7,effpp,inputs)
#mobile_prediction = pretrained_model_prediction(MobileNetV2,mobpp,inputs)
new_features = tf.keras.layers.Concatenate()([inception_prediction,vgg_prediction,xception_prediction,inceptionres_prediction,
                                              nasnet_prediction])
concatenated_pretrained_model = tf.keras.Model(inputs,new_features)


In [None]:
concatenated_pretrained_model.summary()

In [None]:
plot_model(concatenated_pretrained_model,show_shapes=True)

In [None]:
model = tf.keras.Sequential([
                         concatenated_pretrained_model,
                         tf.keras.layers.Dense(120,activation='softmax')    
])

In [None]:
model.summary()

In [None]:
plot_model(model,show_shapes = True)

In [None]:
model.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['accuracy'])

In [None]:
history = model.fit(
    train_generator,
    epochs = 10,
    steps_per_epoch = 25333//64,
    validation_data = val_generator,
    validation_steps = 5469//64
)

In [None]:
train_generator.class_indices

It is in sorted manner

In [None]:
test_df = pd.read_csv('../input/dog-breed-identification/sample_submission.csv')

In [None]:
test_df.head()

In [None]:

test_df['file'] = test_df['id'] + '.jpg'

In [None]:
test_datagen = ImageDataGenerator()

test_generator = test_datagen.flow_from_dataframe(
    test_df,
    x_col = 'file',
    y_col = None,
    directory = '../input/dog-breed-identification/test',
    class_mode = None,
    target_size = (331,331)
)


In [None]:
pred = model.predict_generator(test_generator)

In [None]:
pred.shape

In [None]:
cols = list(train_generator.class_indices)

In [None]:
fin_df = pd.DataFrame(data=pred,columns=cols)

In [None]:
ids = list(test_df['id'])

In [None]:
ids

In [None]:
fin_df.insert(loc=0,column='id',value=ids)

In [None]:
fin_df.head()

In [None]:
fin_df.to_csv('./finallyyy.csv')