In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load


# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

from os.path import join

image_dir = '../input/dog-breed-identification/train/'
img_paths = [join(image_dir, filename) for filename in 
                           ['0c8fe33bd89646b678f6b2891df8a1c6.jpg',
                            '0c3b282ecbed1ca9eb17de4cb1b6e326.jpg',
                            '04fb4d719e9fe2b6ffe32d9ae7be8a22.jpg',
                            '0e79be614f12deb4f7cae18614b7391b.jpg']]

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
from PIL import Image
import keras as keras
import tensorflow as tf
import tensorflow_datasets as tfds
from keras.preprocessing import image

In [None]:
model = keras.Sequential([
    keras.layers.Conv2D(32, kernel_size=(3,3), input_shape=(224,224,3), activation='relu', padding='same'),
    keras.layers.MaxPool2D(2, 2),
    keras.layers.Dropout(0.5),
    keras.layers.Conv2D(64, kernel_size=(3,3), activation='relu', padding='same'),
    keras.layers.MaxPool2D(2, 2),
    keras.layers.Dropout(0.5),
    keras.layers.Conv2D(64, kernel_size=(3,3), activation='relu', padding='same'),
    keras.layers.MaxPool2D(2, 2),
    keras.layers.Dropout(0.5),
    keras.layers.Flatten(),
    keras.layers.Dense(64, activation='relu'),
    keras.layers.Dropout(0.5),
    keras.layers.Dense(120, activation='softmax')
])

model.compile(optimizer='adam',
              loss=keras.losses.CategoricalCrossentropy(),
              metrics=['accuracy'])

model.summary()

In [None]:
batch_size = 32
img_size = 224

In [None]:
train_dir = "../input/dog-breed-identification/train"
test_dir = "../input/dog-breed-identification/test"

In [None]:
train_labels = pd.read_csv('/kaggle/input/dog-breed-identification/labels.csv')
test_labels = pd.read_csv('../input/dog-breed-identification/sample_submission.csv')
train_labels.columns
def append_ext(fn):
    return fn + '.jpg'

train_labels['id'] = train_labels['id'].apply(append_ext)
test_labels['id'] = test_labels['id'].apply(append_ext)

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [None]:
train_datagen = ImageDataGenerator(rescale = 1./255,
                                  horizontal_flip = True,
                                  validation_split = 0.1
                                  )

In [None]:
train_generator = train_datagen.flow_from_dataframe(
    dataframe = train_labels,
    directory = train_dir,
    x_col = "id",
    y_col = "breed",
    subset = "training",
    batch_size = batch_size,
    seed = 42,
    shuffle = True,
    class_mode = "categorical",
    target_size = (img_size, img_size),
    color_mode = "rgb"
)

In [None]:
val_generator = train_datagen.flow_from_dataframe(
    dataframe = train_labels,
    directory = train_dir,
    x_col = "id",
    y_col = "breed",
    subset = "validation",
    batch_size = batch_size,
    seed = 42,
    shuffle = True,
    class_mode = "categorical",
    target_size = (img_size, img_size),
    color_mode = "rgb"
)

In [None]:
test_datagen = ImageDataGenerator(rescale = 1./255)

test_generator = test_datagen.flow_from_dataframe(
    dataframe = test_labels,
    directory = test_dir,
    x_col = "id",
    y_col = None,
    batch_size = batch_size,
    seed = 42,
    shuffle = False,
    class_mode = None,
    target_size = (img_size, img_size),
    color_mode = "rgb"
)

In [None]:
early = tf.keras.callbacks.EarlyStopping(monitor = 'val_loss', patience=5)

In [None]:
STEP_SIZE_TRAIN = train_generator.n//train_generator.batch_size
STEP_SIZE_VAL = val_generator.n//val_generator.batch_size
STEP_SIZE_TEST = test_generator.n//test_generator.batch_size

model.fit(train_generator,
          steps_per_epoch=STEP_SIZE_TRAIN,
          validation_data=val_generator,
          validation_steps=STEP_SIZE_VAL,
          epochs=20,
          callbacks = [early])

In [None]:
Y_pred = model.predict(val_generator)
y_pred = np.argmax(Y_pred, axis=1)

In [None]:
from sklearn.metrics import f1_score

print("Micro F1: ", f1_score(val_generator.classes,y_pred,average='micro'))
print("Macro F1: ", f1_score(val_generator.classes,y_pred,average='macro'))
print("Weighted F1: ", f1_score(val_generator.classes,y_pred,average='weighted'))

In [None]:
pred = model.predict(test_generator)

In [None]:
df_submission = pd.read_csv('/kaggle/input/dog-breed-identification/sample_submission.csv')
df_submission.head()

In [None]:
import re

file_list = test_generator.filenames
id_list = []
for name in file_list:
    m = re.sub('test/', '', name)
    m = re.sub('.jpg', '', m)
    id_list.append(m)

In [None]:
df_submission['id'] = id_list
df_submission.iloc[:,1:] = pred
df_submission.head()

In [None]:
final_sub = df_submission.set_index('id')
final_sub.to_csv('Submission.csv')