In [None]:
import pandas as pd
import numpy as np
import os
from os.path import join

import PIL.Image
from PIL import ImageOps
import matplotlib.pyplot as plt
%matplotlib inline

from tensorflow import keras
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.preprocessing import image
from tensorflow.keras.models import Sequential, load_model, Model
from tensorflow.keras.layers import Dense, Conv2D, MaxPooling2D, Flatten, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.applications.inception_resnet_v2 import InceptionResNetV2, preprocess_input

import itertools
from sklearn.metrics import classification_report, confusion_matrix

import warnings
warnings.filterwarnings('ignore')

# 1 - Dataset

In [None]:
# Local dos dados de treino e teste
train_dir = '../input/dog-breed-identification/train'
test_dir = '../input/dog-breed-identification/test'

In [None]:
# Tamanho dos dados de treino e teste
train_img_fpaths = [join(train_dir, f) for f in os.listdir(train_dir)]
test_img_fpaths = [join(test_dir, f) for f in os.listdir(test_dir)]

train_size = len(train_img_fpaths)
test_size = len(test_img_fpaths)

train_size,test_size

## 1.1 Treino

In [None]:
# Dataframe de treino
df_train = pd.read_csv('../input/dog-breed-identification/labels.csv')
df_train.head()

In [None]:
# Quantidade de raças
len(df_train.breed.unique())

In [None]:
# Reorganizando dataframe
df_train.rename(columns = {"breed": "label"}, inplace = True)
df_train["id"] = df_train["id"].apply(lambda x: x+"."+"jpg")
df_train.head()

## 1.2 Teste

In [None]:
test_files = os.listdir(test_dir)
df_test = pd.DataFrame({"id": test_files, "label": "boston_bull"})
df_test.head()

In [None]:
# Tamanho dos dataframes de treino e teste

train_size = len(df_train)
test_size = len(df_test)

train_size,test_size

# 2 - Image Generator Treino e Validação

In [None]:
# Dados de Treino
train_data_gen = ImageDataGenerator(rescale = 1./255, validation_split = 0.2)
train_generator = train_data_gen.flow_from_dataframe(df_train, train_dir, x_col = 'id', y_col = 'label', subset = 'training',
                                                     color_mode = 'rgb', class_mode = 'categorical', target_size = (299, 299),
                                                     batch_size = 32, shuffle = True, seed = 123)

In [None]:
# Dados de Validação
val_data_generator = ImageDataGenerator(rescale = 1./255, validation_split = 0.2)
validation_generator = val_data_generator.flow_from_dataframe(df_train, train_dir, x_col = 'id', y_col = 'label',
                                                              subset = 'validation', color_mode = 'rgb',
                                                              class_mode = 'categorical', target_size = (299, 299),
                                                              batch_size = 32, shuffle = True, seed = 123)

# 3 - Processando imagens

In [None]:
# Função para transformar imagens em array
def image_preocessing(path):
    image = PIL.Image.open(path)
    size = (299, 299)
    image = ImageOps.fit(image, size, PIL.Image.ANTIALIAS) #Retorna uma versão redimensionada e recortada da imagem, recortada no proporção e tamanho solicitados.
    image_array = np.asarray(image)
    
    return image_array

In [None]:
df_train_aux = df_train.copy()
df_train_aux['img_path'] = train_dir + '/' + df_train_aux['id']
df_train_aux

In [None]:
# Transformando imagens em arrays
dogs = []
label_names=[]

for i in range(0, len(df_train_aux)):
    label = df_train_aux.iloc[i,1]
    filename = df_train_aux.iloc[i,2]
    image = image_preocessing(filename)
    dogs.append(image)
    label_names.append(label)
    
print(np.shape(label_names))
print(np.shape(dogs))

In [None]:
# Visualizando algumas imagens
plt.figure(figsize = (15,15))
for i in range(9):
    plt.subplot(330 + 1 + i)
    image = dogs
    plt.imshow(image[i])
plt.show()

# 4 - CNN

In [None]:
model = Sequential()

model.add(Conv2D(32, kernel_size = (3, 3), activation = 'selu', input_shape=(299, 299, 3)))
model.add(MaxPooling2D(pool_size = (2, 2)))
model.add(Conv2D(64, kernel_size = (3,3), activation = 'relu'))
model.add(MaxPooling2D(pool_size = (2, 2)))
model.add(Conv2D(128, kernel_size = (3,3), activation = 'selu'))
model.add(Flatten())
model.add(Dense(40, activation = 'relu'))
model.add(Dropout(0.2))
model.add(Dense(20, activation = 'relu'))
model.add(Dropout(0.2))
model.add(Dense(len(list(train_generator.class_indices.keys())), activation = 'softmax'))

model.summary()

In [None]:
# Compila o modelo
model.compile(loss = 'categorical_crossentropy', optimizer = Adam(), metrics = ['accuracy'])

In [None]:
# Salvando melhor modelo
callbacks_list = [
    keras.callbacks.ModelCheckpoint(filepath = 'my_model.h5',monitor = 'val_loss', save_best_only = True, verbose = 1),
    keras.callbacks.EarlyStopping(monitor = 'val_loss', patience = 10,verbose = 1)
]

In [None]:
history = model.fit(train_generator,
                    steps_per_epoch = train_generator.samples // 32,
                    epochs = 20,
                    callbacks = callbacks_list,
                    validation_data = validation_generator,
                    verbose = 1,
                    validation_steps = validation_generator.samples // 32)

## 4.1 Avaliando o modelo

In [None]:
history_dict = history.history
loss_values = history_dict['loss']
val_loss_values = history_dict['val_loss']

epochs_x = range(1, len(loss_values) + 1)
plt.figure(figsize=(10,10))
plt.subplot(2,1,1)
plt.plot(epochs_x, loss_values, 'bo', label='Training loss')
plt.plot(epochs_x, val_loss_values, 'b', label='Validation loss')
plt.title('Training and validation Loss and Acc')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.subplot(2,1,2)
acc_values = history_dict['accuracy']
val_acc_values = history_dict['val_accuracy']
plt.plot(epochs_x, acc_values, 'bo', label='Training acc')
plt.plot(epochs_x, val_acc_values, 'b', label='Validation acc')
#plt.title('Training and validation accuracy')
plt.xlabel('Epochs')
plt.ylabel('Acc')
plt.legend()
plt.show()

In [None]:
# Load the best saved model
model = load_model('my_model.h5')

In [None]:
# Using the validation dataset
score = model.evaluate_generator(validation_generator)
print('Val loss:', score[0])
print('Val accuracy:', score[1])

## 4.2 Predição

In [None]:
test_data_gen = ImageDataGenerator(rescale = 1./255)
test_generator = test_data_gen.flow_from_dataframe(df_test, test_dir, x_col = 'id', y_col = 'label', color_mode = 'rgb',
                                                   class_mode = 'categorical', target_size = (299, 299), batch_size = 32, shuffle = False,
                                                   seed = 123)

In [None]:
# Using the test dataset
score = model.evaluate_generator(test_generator)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

In [None]:
label_map = (train_generator.class_indices)
label_map_inv = {v: k for k, v in label_map.items()}

In [None]:
ypreds = model.predict_generator(generator = test_generator, steps = len(test_generator), verbose = 1)
ypreds

In [None]:
ypred = ypreds.argmax(axis = -1)
ypred

In [None]:
submission = pd.read_csv('../input/dog-breed-identification/sample_submission.csv')
submission.head()

In [None]:
test_dir_files = os.listdir(test_dir)
test_gen_files = test_generator.filenames
submission_files = submission["id"]
len(submission_files)

In [None]:
submission_files

In [None]:
m = {}
l = len(test_gen_files)
for i in range(l):
    m[test_gen_files[i]] = ypreds[i] 

In [None]:
labels = (train_generator.class_indices)
labels = list(labels.keys())

In [None]:
ypreds_sync = []
for f in submission_files:
    ypreds_sync.append(m[f+".jpg"])

In [None]:
test_df = pd.DataFrame(data = ypreds_sync, columns = labels)
test_df.head()

In [None]:
test_df["id"] = submission_files

cols = test_df.columns.tolist()
cols = cols[-1:] + cols[:-1]
test_df = test_df[cols]
test_df.head()

# 5 - Transfer Learning from a Deep Model

## 5.1 - Image Generator Treino e Validação

In [None]:
# Dados de Treino
train_data_gen = ImageDataGenerator(preprocessing_function = preprocess_input, validation_split = 0.2)
train_generator = train_data_gen.flow_from_dataframe(df_train, train_dir, x_col = 'id', y_col = 'label', subset = 'training',
                                                     color_mode = 'rgb', class_mode = 'categorical', target_size = (299, 299),
                                                     batch_size = 32, shuffle = True, seed = 123)

In [None]:
# Dados de Validação
val_data_generator = ImageDataGenerator(preprocessing_function = preprocess_input, validation_split = 0.2)
validation_generator = val_data_generator.flow_from_dataframe(df_train, train_dir, x_col = 'id', y_col = 'label',
                                                              subset = 'validation', color_mode = 'rgb',
                                                              class_mode = 'categorical', target_size = (299, 299),
                                                              batch_size = 32, shuffle = True, seed = 123)

In [None]:
base_model = InceptionResNetV2(weights = 'imagenet', include_top = False, input_shape = ((299,299, 3)))

x = base_model.output
x = Flatten()(x)
x = Dense(100, activation = 'relu')(x)
predictions = Dense(len(list(train_generator.class_indices.keys())), activation = 'softmax',
                    kernel_initializer = 'random_uniform')(x)

model = Model(inputs = base_model.input, outputs = predictions)

# Freezing pretrained layers
for layer in base_model.layers:
    layer.trainable = False
    
optimizer = Adam()
model.compile(optimizer=  optimizer,loss = 'categorical_crossentropy',metrics = ['accuracy'])

In [None]:
# Salvando melhor modelo
callbacks_list = [
    keras.callbacks.ModelCheckpoint(filepath = 'my_model.h5',monitor = 'val_loss', save_best_only = True, verbose = 1),
    keras.callbacks.EarlyStopping(monitor = 'val_loss', patience = 10,verbose = 1)
]

In [None]:
history = model.fit(train_generator,
                    steps_per_epoch = train_generator.samples // 32,
                    epochs = 50,
                    callbacks = callbacks_list,
                    validation_data = validation_generator,
                    verbose = 1,
                    validation_steps = validation_generator.samples // 32)

## 5.1 Avaliando o modelo

In [None]:
history_dict = history.history
loss_values = history_dict['loss']
val_loss_values = history_dict['val_loss']

epochs_x = range(1, len(loss_values) + 1)
plt.figure(figsize = (10,10))
plt.subplot(2,1,1)
plt.plot(epochs_x, loss_values, 'bo', label = 'Training loss')
plt.plot(epochs_x, val_loss_values, 'b', label = 'Validation loss')
plt.title('Training and validation Loss and Acc')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.subplot(2,1,2)
acc_values = history_dict['accuracy']
val_acc_values = history_dict['val_accuracy']
plt.plot(epochs_x, acc_values, 'bo', label = 'Training acc')
plt.plot(epochs_x, val_acc_values, 'b', label = 'Validation acc')
#plt.title('Training and validation accuracy')
plt.xlabel('Epochs')
plt.ylabel('Acc')
plt.legend()
plt.show()

In [None]:
# Load the best saved model
model = load_model('my_model.h5')

In [None]:
# Using the validation dataset
score = model.evaluate_generator(validation_generator)
print('Val loss:', score[0])
print('Val accuracy:', score[1])

## 5.2 Predição

In [None]:
test_data_gen = ImageDataGenerator(preprocessing_function = preprocess_input)
test_generator = test_data_gen.flow_from_dataframe(df_test, test_dir, x_col = 'id', y_col = 'label', color_mode = 'rgb',
                                                   class_mode = 'categorical', target_size = (299, 299), batch_size = 32, shuffle = False,
                                                   seed = 123)

In [None]:
# Using the test dataset
score = model.evaluate_generator(test_generator)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

In [None]:
label_map = (train_generator.class_indices)
label_map_inv = {v: k for k, v in label_map.items()}

In [None]:
ypreds = model.predict_generator(generator = test_generator, steps = len(test_generator), verbose = 1)
ypreds

In [None]:
ypred = ypreds.argmax(axis = -1)
ypred

In [None]:
submission = pd.read_csv('../input/dog-breed-identification/sample_submission.csv')
submission.head()

In [None]:
test_dir_files = os.listdir(test_dir)
test_gen_files = test_generator.filenames
submission_files = submission["id"]
len(submission_files)

In [None]:
submission_files

In [None]:
m = {}
l = len(test_gen_files)
for i in range(l):
    m[test_gen_files[i]] = ypreds[i] 

In [None]:
labels = (train_generator.class_indices)
labels = list(labels.keys())

In [None]:
ypreds_sync = []
for f in submission_files:
    ypreds_sync.append(m[f+".jpg"])

In [None]:
test_df = pd.DataFrame(data = ypreds_sync, columns = labels)
test_df.head()

In [None]:
test_df["id"] = submission_files

cols = test_df.columns.tolist()
cols = cols[-1:] + cols[:-1]
test_df = test_df[cols]
test_df.head()

# Resultado

### *O modelo desenvolvido manualmente apresentou um comportamento estranho durante a predição dos dados de teste. Além disso, apresentou acurácia baixa para dados de validação e acurácia 0 para o dados de teste. Já o modelo baseado em arquiteturas clássicas e transfer learning apresentou um resultado mais razoável com acurácia de 84 %. No entanto, ambos os modelos apresentaram overfitting.*