In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory
 
import os
directory = '../input/dog-breed-identification/train'
directory_test = '../input/dog-breed-identification/test'
traindf=pd.read_csv("../input/dog-breed-identification/labels.csv",dtype=str)
testdf=pd.read_csv("../input/dog-breed-identification/sample_submission.csv",dtype=str)
 
# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
_, _, files = next(os.walk(directory))
file_count = len(files)
file_count  #number of files

In [None]:
import matplotlib.pyplot as plt
sorted_data=traindf.groupby('breed')['id'].agg('count').sort_values(ascending=False)
sorted_data.plot(kind='bar',figsize=(20, 10))
plt.show()  

The class count in the data set is not uniform. We can use data augmentation to generate more data

In [None]:
from keras_preprocessing.image import ImageDataGenerator
from keras.layers import Dense, Activation, Flatten, Dropout, BatchNormalization
from keras.layers import Conv2D, MaxPooling2D
from keras import regularizers
from keras.models import Sequential
from tensorflow.keras import optimizers

In [None]:
datagen=ImageDataGenerator(rescale=1./255.,validation_split=0.25,horizontal_flip=True,
        vertical_flip=True,width_shift_range=0.2, height_shift_range=0.2)

In [None]:
#add jpg extension to the image name in pandas dataset
def append_ext(fn):
    return fn+".jpg"
traindf["id"]=traindf["id"].apply(append_ext)
testdf["id"]=testdf["id"].apply(append_ext)

In [None]:
traindf

In [None]:
train_generator=datagen.flow_from_dataframe(dataframe=traindf,directory=directory,x_col="id",y_col="breed",subset="training",
                                            batch_size=32,seed=42,shuffle=True,class_mode="categorical",target_size=(224,224))
valid_generator=datagen.flow_from_dataframe(dataframe=traindf,directory=directory,x_col="id",y_col="breed",
                                            subset="validation",batch_size=32,seed=42,shuffle=True,class_mode="categorical",target_size=(224,224))
test_datagen=ImageDataGenerator(rescale=1./255.)
test_generator=test_datagen.flow_from_dataframe(dataframe=testdf,directory=directory_test,x_col="id",y_col=None,batch_size=32,seed=42,
                                                shuffle=False,class_mode=None,target_size=(224,224))

In [None]:
 
import tensorflow as tf
model = Sequential()
model.add(Conv2D(32, (3, 3), padding='same',
                 input_shape=(224,224,3)))
model.add(Activation('relu'))
model.add(Conv2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Conv2D(64, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(120, activation='softmax'))
model.compile(optimizers.Adam(learning_rate=0.001, decay=1e-6),loss="categorical_crossentropy",metrics=["accuracy"])

In [None]:
STEP_SIZE_TRAIN=train_generator.n//train_generator.batch_size
STEP_SIZE_VALID=valid_generator.n//valid_generator.batch_size
STEP_SIZE_TEST=test_generator.n//test_generator.batch_size
model.fit(train_generator,
                    steps_per_epoch=STEP_SIZE_TRAIN,
                    validation_data=valid_generator,
                    validation_steps=STEP_SIZE_VALID,
                    epochs=30)

In [None]:
model.evaluate(valid_generator,
steps=STEP_SIZE_TEST)

In [None]:
test_generator.reset()
pred=model.predict(test_generator,steps=STEP_SIZE_TEST,verbose=1)

In [None]:
predicted_class_indices=np.argmax(pred,axis=1)

In [None]:
labels = (train_generator.class_indices)
labels = dict((v,k) for k,v in labels.items())
predictions = [labels[k] for k in predicted_class_indices]

In [None]:
filenames=test_generator.filenames
results=pd.DataFrame({"Filename":filenames,
                      "Predictions":predictions})
results.to_csv("results.csv",index=False)