In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

## Installation

## Import Necessary Libraries

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from PIL import Image

import tensorflow as tf
import tensorflow_datasets as tfds

from tensorflow import keras
tfds.disable_progress_bar()

from keras.preprocessing import image

## Dataset

In [None]:
train_labels = pd.read_csv('/kaggle/input/dog-breed-identification/labels.csv')
test_labels = pd.read_csv('../input/dog-breed-identification/sample_submission.csv')
train_labels.columns

In [None]:
from keras.preprocessing.image import ImageDataGenerator

In [None]:
train_dir = "../input/dog-breed-identification/train"
test_dir = "../input/dog-breed-identification/test"

In [None]:
def append_ext(fn):
    return fn + '.jpg'

train_labels['id'] = train_labels['id'].apply(append_ext)
test_labels['id'] = test_labels['id'].apply(append_ext)

## Examples

In [None]:
import os

src_path = "../input/dog-breed-identification/train"
sub_class = os.listdir(src_path)

fig = plt.figure(figsize = (10, 5))
for e in range(len(sub_class[:8])):
    plt.subplot(2, 4, e+1)
    img = plt.imread(os.path.join(src_path, sub_class[e]))
    plt.imshow(img, cmap = plt.get_cmap('gray'))

## Data Preparation

In [None]:
batch_size = 32
img_size = 224

In [None]:
train_datagen = ImageDataGenerator(rescale = 1./255,
                                  horizontal_flip = True,
                                  validation_split = 0.1
                                  )

In [None]:
train_generator = train_datagen.flow_from_dataframe(
    dataframe = train_labels,
    directory = train_dir,
    x_col = "id",
    y_col = "breed",
    subset = "training",
    batch_size = batch_size,
    seed = 42,
    shuffle = True,
    class_mode = "categorical",
    target_size = (img_size, img_size),
    color_mode = "rgb"
)

### Plot Augmented Images

In [None]:
x, y = next(train_generator)

In [None]:
print(type(x))
print(x.shape)
print(y.shape)

In [None]:
from mpl_toolkits.axes_grid1 import ImageGrid

def show_grid(image_list, nrows, ncols, figsize = (10,10), showaxis='off'):
    if type(image_list) is not list:
        if(image_list.shape[-1] == 1):
            image_list = [image_list[i,:,:,0] for i in range(image_list.shape[0])]
            
        elif(image_list.shape[-1]==3):
            image_list = [image_list[i,:,:,:] for i in range(image_list.shape[0])]
            
    fig = plt.figure(None, figsize,frameon=False)
    grid = ImageGrid(fig, 111,  # similar to subplot(111)
                     nrows_ncols=(nrows, ncols),  # creates 2x2 grid of axes
                     axes_pad=0.3,  # pad between axes in inch.
                     share_all=True,
                     )
    
    for i in range(nrows*ncols):
        ax = grid[i]
        ax.imshow(image_list[i],cmap='Greys_r')  # The AxesGrid object work as a list of axes.
        ax.axis('off')

In [None]:
show_grid(x, 4, 8, figsize=(25, 25))

### Validation Data

In [None]:
val_generator = train_datagen.flow_from_dataframe(
    dataframe = train_labels,
    directory = train_dir,
    x_col = "id",
    y_col = "breed",
    subset = "validation",
    batch_size = batch_size,
    seed = 42,
    shuffle = True,
    class_mode = "categorical",
    target_size = (img_size, img_size),
    color_mode = "rgb"
)

### Test Data

In [None]:
test_datagen = ImageDataGenerator(rescale = 1./255)

test_generator = test_datagen.flow_from_dataframe(
    dataframe = test_labels,
    directory = test_dir,
    x_col = "id",
    y_col = None,
    batch_size = batch_size,
    seed = 42,
    shuffle = False,
    class_mode = None,
    target_size = (img_size, img_size),
    color_mode = "rgb"
)

## Training the CNN

In [None]:
model = keras.Sequential([
    keras.layers.AveragePooling2D(6, 3, input_shape=(224, 224, 3)),
#     keras.layers.Conv2D(64, 3, activation='relu'),
#     keras.layers.Conv2D(32, kernel_size=(3,3), activation='relu', padding='same'),
#     keras.layers.MaxPool2D(2, 2),
#     keras.layers.Dropout(0.5),
    keras.layers.Flatten(),
    keras.layers.Dense(512, activation='relu'),
    keras.layers.Dropout(0.4),
    keras.layers.Dense(120, activation='softmax')
])

model.compile(optimizer=keras.optimizers.SGD(learning_rate = 0.01),
              loss=keras.losses.CategoricalCrossentropy(),
              metrics=['accuracy'])

model.summary()

In [None]:
early = tf.keras.callbacks.EarlyStopping(monitor = 'val_loss', patience=5)

In [None]:
STEP_SIZE_TRAIN = train_generator.n//train_generator.batch_size
STEP_SIZE_VAL = val_generator.n//val_generator.batch_size
STEP_SIZE_TEST = test_generator.n//test_generator.batch_size

model.fit(train_generator,
          steps_per_epoch=STEP_SIZE_TRAIN,
          validation_data=val_generator,
          validation_steps=STEP_SIZE_VAL,
          epochs=20,
          callbacks = [early])

In [None]:
import time

start = time.time()
score = model.evaluate(val_generator, batch_size = 32)
end = time.time()

print("Accuracy: {:.2f}%".format(score[1] * 100)) 
print("Loss: ",score[0])
print("Time per test instance: ", (end-start)/1022)      #no. of val images

In [None]:
Y_pred = model.predict(val_generator)
y_pred = np.argmax(Y_pred, axis=1)

In [None]:
from sklearn.metrics import f1_score

print("Micro F1: ", f1_score(val_generator.classes,y_pred,average='micro'))
print("Macro F1: ", f1_score(val_generator.classes,y_pred,average='macro'))
print("Weighted F1: ", f1_score(val_generator.classes,y_pred,average='weighted'))

### For Submission

In [None]:
pred = model.predict(test_generator)

In [None]:
df_submission = pd.read_csv('/kaggle/input/dog-breed-identification/sample_submission.csv')
df_submission.head()

In [None]:
import re

file_list = test_generator.filenames
id_list = []
for name in file_list:
    m = re.sub('test/', '', name)
    m = re.sub('.jpg', '', m)
    id_list.append(m)

In [None]:
df_submission['id'] = id_list
df_submission.iloc[:,1:] = pred
df_submission.head()

In [None]:
final_sub = df_submission.set_index('id')
final_sub.to_csv('Submission.csv')

## Hyperparameter Tuning

In [None]:
pip install -U keras-tuner

In [None]:
from kerastuner.tuners import RandomSearch

def build_model(hp):
    model = keras.Sequential()

    model.add(keras.layers.AveragePooling2D(6, 3, input_shape=(224, 224, 3)))

#     model.add(keras.layers.MaxPool2D(2, 2))
   
    model.add(keras.layers.Flatten())

    #hp.Choice allows the model to try out the different hyperparams to pick out the best performing one
    model.add(keras.layers.Dense(hp.Choice("Dense layer", [64, 128, 256, 512, 1024]), activation='relu'))
    model.add(keras.layers.Dropout(hp.Choice("Dropout", [0.1, 0.2, 0.3, 0.4, 0.5, 0.6])))
    model.add(keras.layers.Dense(120, activation='softmax'))

    hp_lr = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])
    
    model.compile(optimizer=keras.optimizers.SGD(learning_rate = hp_lr),
              loss = keras.losses.CategoricalCrossentropy(),
              metrics=['accuracy'])
    
    return model

In [None]:
tuner = RandomSearch(
    build_model,
    objective = 'val_accuracy',
    max_trials = 32,
    directory = './multi_conv'
)

In [None]:
early = tf.keras.callbacks.EarlyStopping(monitor = 'val_loss', patience=3)

In [None]:
STEP_SIZE_TRAIN = train_generator.n//train_generator.batch_size
STEP_SIZE_VAL = val_generator.n//val_generator.batch_size
STEP_SIZE_TEST = test_generator.n//test_generator.batch_size

tuner.search(train_generator,
            steps_per_epoch=STEP_SIZE_TRAIN,
            validation_data=val_generator,
            validation_steps=STEP_SIZE_VAL,
            epochs=20,
            callbacks=[early])

In [None]:
best_model = tuner.get_best_models()[0]

In [None]:
best_model.evaluate(X_test, y_test)

In [None]:
best_model.summary()

In [None]:
tuner.results_summary()

## Save & Load Models

In [None]:
best_model.save('./best_model')

In [None]:
loaded_model = keras.models.load_model('./best_model')

In [None]:
loaded_model.evaluate(X_test, y_test)

## Plot Image from Numpy Array

In [None]:
#RGB image
rgb_images = np.array([example['image'].numpy() for example in ds_train.take(1)])
rgb_image = rgb_images[0]

image = train_images[0].reshape(300, 300)

plt.imshow(rgb_image)
rgb_image.shape

#Greyscale image
# image = train_images[0].reshape(300, 300)

# plt.imshow(train_images[0], cmap='Greys_r')

## Convert PNG/JPG Imgaes to Numpy Format

In [None]:
import imageio

im = imageio.imread('')

print(type(im))

im_np = np.asarray(im)

print(im_np.shape)

In [None]:
# import glob

# # First of all we will extract the detail of all the data and save all of them in terms of dataframe with foldername, imagename, objectname and labels
# detail = sorted(glob.glob("../input/dog-breed-identification/train/*"))
# Folder_Name = [str(i.split("in/")[0]) + "in" for i in detail]
# Image_Name = [str(i.split("/")[4]) for i in detail]
# Train_Labels = np.array((pd.read_csv('../input/dog-breed-identification/labels.csv'))["breed"])

# # Defining dataframe and saving all the extracted information in that dataframe
# train_detail = pd.DataFrame() 
# train_detail["Folder Name"] = Folder_Name
# train_detail["Image Name"] = Image_Name
# train_detail["Train Labels"] = Train_Labels


# # Analying the train data detail
# print("\nNumber of images in training set = "+str(len(detail)))
# print(train_detail.columns)
# train_detail.head()