In [None]:
# General Libs
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.preprocessing import image
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, BatchNormalization, Dropout, Flatten, Conv2D, MaxPooling2D
from tensorflow.keras.models import load_model
from tensorflow.keras.optimizers import Adam
import numpy as np
import random
import matplotlib.pyplot as plt
%matplotlib inline
tf.__version__

# Helper functions

## walk_on_dir

In [None]:
import os

def walk_on_dir(dir):
    print(dir)
    i = 0 
    for path, dirs, files in os.walk(dir):
        if len(dirs) > 0:
            print(len(dirs))
            print(dirs)
            _dirs = dirs
        else:
            print(path)
            print(_dirs[i])
            print(files)
            i+=1
    print(i)

## Train model

In [None]:
MAX_EPOCHS = 30
PATIENCE = 4

def train_model(model_id, model, train_generator, val_generator):

    # Salva o melhor modelo
    cb_save_best_model = keras.callbacks.ModelCheckpoint(filepath=model_id,
                                                         monitor='val_loss', 
                                                         save_best_only=True, 
                                                         verbose=1)

    # Encerra o treino antecipadamente se não houver evolução
    cb_early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', 
                                                  patience= PATIENCE,
                                                  verbose=1)


    history = model.fit(
            train_generator,
            steps_per_epoch = train_generator.samples // BATCH_SIZE,
            epochs=MAX_EPOCHS,
            callbacks = [cb_save_best_model, cb_early_stop],
            validation_data=val_generator,
            verbose = 1,
            validation_steps= val_generator.samples // BATCH_SIZE)
    
    return history

## print_metrics

In [None]:
def print_metrics(model, dataset_generator):
    score = model.evaluate(test_generator)
    print('Test loss:', score[0])
    print('Test accuracy:', score[1])
    print('Test ROC AUC:', score[2])

## plot_training_curves

In [None]:
def plot_training_curves(history):
    # Training curves
    import matplotlib.pyplot as plt

    history_dict = history.history
    loss_values = history_dict['loss']
    val_loss_values = history_dict['val_loss']

    epochs_x = range(1, len(loss_values) + 1)
    plt.figure(figsize=(10,10))
    plt.subplot(2,1,1)
    plt.plot(epochs_x, loss_values, 'bo', label='Training loss')
    plt.plot(epochs_x, val_loss_values, 'b', label='Validation loss')
    plt.title('Training and validation Loss and Accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.subplot(2,1,2)
    acc_values = history_dict['accuracy']
    val_acc_values = history_dict['val_accuracy']
    plt.plot(epochs_x, acc_values, 'bo', label='Training acc')
    plt.plot(epochs_x, val_acc_values, 'b', label='Validation acc')
    #plt.title('Training and validation accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Acc')
    plt.legend()
    plt.show()

## plot_confusion_matrix_and_classification_report

In [None]:
import itertools

#Plot the confusion matrix. Set Normalize = True/False
def plot_confusion_matrix(cm, classes, normalize=True, title='Confusion matrix', cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    plt.figure(figsize=(15,15))
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        cm = np.around(cm, decimals=2)
        cm[np.isnan(cm)] = 0.0
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, cm[i, j],
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")
    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

In [None]:
from sklearn.metrics import classification_report, confusion_matrix
import numpy as np

def plot_confusion_matrix_and_classification_report(model, dataset_generator, classes):
    Y_pred = model.predict(dataset_generator)
    y_pred = np.argmax(Y_pred, axis=1)
    
    #Confution Matrix
    cm = confusion_matrix(test_generator.classes, y_pred)
    plot_confusion_matrix(cm, classes, normalize=False, title='Confusion Matrix')

    #Classification Report
    print('Classification Report')
    print(classification_report(test_generator.classes, y_pred, target_names=classes))

# Dataset 100 Bird Species
* Fonte: https://www.kaggle.com/gpiosenka/100-bird-species


Data set of 300 bird species.42622 training images, 1500 test images(5 images per species) and 1500 validation images(5 images per species.
All images are 224 X 224 X 3 color images in jpg format. Data set includes a train set, test set and validation set. Each set contains 300 sub directories, one for each bird species. The data structure is convenient if you use the Keras ImageDataGenerator.flowfromdirectory to create the train, test and valid data generators. The data set also include a file Bird Species.csv. This cvs file contains three columns. The filepaths column contains the file path to an image file. The labels column contains the class name associated with the image file. The Bird Species.csv file if read in using df= pandas.birdscsv(Bird Species.csv) will create a pandas dataframe which then can be split into traindf, testdf and validdf dataframes to create your own partitioning of the data into train, test and valid data sets.
NOTE: The test and validation images in the data set were hand selected to be the "best" images so your model will probably get the highest accuracy score using those data sets versus creating your own test and validation sets. However the latter case is more accurate in terms of model performance on unseen images.
Images were gather from internet searches by species name. Once the image files for a species was downloaded they were checked for duplicate images using a python duplicate image detector program I developed. All duplicates detected were deleted in order to prevent their being images common between the training, test and validation sets.
After that the images were cropped so that the bird occupies at least 50% of the pixel in the image. Then the images were resized to 224 X 224 X3 in jpg format. The cropping ensures that when processed by a CNN their is adequate information in the images to create a highly accurate classifier. Even a moderately robust model should achieve training, validation and test accuracies in the high 90% range. All files were also numbered sequential starting from one for each species. So test images are named 1.jpg to 5.jpg. Similarly for validation images. Training images are also numbered sequentially with "zeros" padding. For example 001.jpg, 002.jpg ….010.jpg, 011.jpg …..099.jpg, 100jpg, 102.jpg etc. The zero's padding preserves the file order when used with python file functions and Keras flow from directory.
The training set is not balanced, having a varying number of files per species. However each species has at least 120 training image files. This imbalanced did not effect my kernel classifier as it achieved over 98% accuracy on the test set.
One significant imbalance in the data set is the ratio of male species images to female species images. About 85% of the images are of the male and 15% of the female. Males typical are far more diversely colored while the females of a species are typically bland. Consequently male and female images may look entirely different .Almost all test and validation images are taken from the male of the species. Consequently the classifier may not perform as well on female specie images.

# Exploratory Analysis

In [None]:
IMG_SHAPE = (244,244)
INPUT_SHAPE = (IMG_SHAPE[0], IMG_SHAPE[1], 3)

TRAIN_DIR = '../input/100-bird-species/train'
TEST_DIR = '../input/100-bird-species/test'
VAL_DIR = '../input/100-bird-species/valid'

BATCH_SIZE = 16

RANDOM_SEED = 33

MAX_CLASSES = 30

In [None]:
classes = os.listdir(TRAIN_DIR)
num_classes = len(classes)
num_classes

In [None]:
# Para experimentação, vamos escolher aleatóriamente N classes

import random

random.Random(RANDOM_SEED).shuffle(classes)

classes = classes[:MAX_CLASSES]
num_classes = len(classes)

classes

# Pre-processing

In [None]:
from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input

In [None]:
generator_args = {
    "preprocessing_function":preprocess_input,
}
flow_args = {
    "target_size":IMG_SHAPE,
    "seed":RANDOM_SEED,
    "batch_size":BATCH_SIZE,
    "class_mode":"categorical",
    "classes":classes,
}

In [None]:
img_generator = ImageDataGenerator(**generator_args)
augmented_img_generator = ImageDataGenerator(rotation_range=20,
                                             width_shift_range=0.1,
                                             height_shift_range=0.1,
                                             shear_range=0.1,
                                             zoom_range=0.1,
                                             horizontal_flip=True,
                                             fill_mode='nearest',
                                             **generator_args)

In [None]:
train_generator = augmented_img_generator.flow_from_directory(TRAIN_DIR, shuffle=True, **flow_args)
test_generator = img_generator.flow_from_directory(TEST_DIR, shuffle=False, **flow_args)
val_generator = img_generator.flow_from_directory(VAL_DIR, shuffle=False, **flow_args)

In [None]:
# Visualizing some examples
plt.figure(figsize=(15,15))
for i in range(9):
    #gera subfigures
    plt.subplot(330 + 1 + i)
    batch = train_generator.next()[0]*255
    image = batch[0].astype('uint8')
    plt.imshow(image)
plt.show()

# Resultado da Versão 2:
```
Test loss: 1.8166121244430542
Test accuracy: 0.4866666793823242
Test ROC AUC: 0.9163340926170349
```
```
Classification Report
                        precision    recall  f1-score   support

       RED HEADED DUCK       0.57      0.80      0.67         5
            PINK ROBIN       0.62      1.00      0.77         5
   RED FACED CORMORANT       0.33      0.40      0.36         5
       BARRED PUFFBIRD       1.00      0.20      0.33         5
              WHIMBREL       0.50      0.40      0.44         5
          WALL CREAPER       0.33      0.40      0.36         5
      EVENING GROSBEAK       0.50      0.80      0.62         5
         MOURNING DOVE       0.33      0.20      0.25         5
     NORTHERN CARDINAL       1.00      0.20      0.33         5
 RED BEARDED BEE EATER       1.00      0.60      0.75         5
       RUDY KINGFISHER       0.83      1.00      0.91         5
  NORTHERN MOCKINGBIRD       0.25      0.20      0.22         5
               QUETZAL       0.11      0.20      0.14         5
BLACK THROATED WARBLER       0.60      0.60      0.60         5
  BLACKBURNIAM WARBLER       0.50      0.40      0.44         5
                PUFFIN       0.44      0.80      0.57         5
      TURQUOISE MOTMOT       0.67      0.80      0.73         5
      NORTHERN FLICKER       1.00      0.40      0.57         5
      EASTERN BLUEBIRD       0.50      0.40      0.44         5
       SCARLET TANAGER       0.44      0.80      0.57         5
         CEDAR WAXWING       0.29      0.40      0.33         5
                CANARY       0.67      0.40      0.50         5
 AFRICAN CROWNED CRANE       0.50      0.20      0.29         5
          MAGPIE GOOSE       0.75      0.60      0.67         5
   VERMILION FLYCATHER       0.60      0.60      0.60         5
                 ROBIN       0.50      0.20      0.29         5
       COMMON POORWILL       1.00      0.20      0.33         5
      BANDED BROADBILL       0.29      0.40      0.33         5
        HAWAIIAN GOOSE       0.57      0.80      0.67         5
  RED WINGED BLACKBIRD       0.14      0.20      0.17         5

              accuracy                           0.49       150
             macro avg       0.56      0.49      0.48       150
          weighted avg       0.56      0.49      0.48       150
```

# Resultados da Versão 4: image augmentation
```
Test loss: 0.3912387192249298
Test accuracy: 0.846666693687439
Test ROC AUC: 0.9973816871643066
```
```
Classification Report
                        precision    recall  f1-score   support

       RED HEADED DUCK       0.83      1.00      0.91         5
            PINK ROBIN       1.00      1.00      1.00         5
   RED FACED CORMORANT       1.00      0.40      0.57         5
       BARRED PUFFBIRD       1.00      1.00      1.00         5
              WHIMBREL       1.00      0.80      0.89         5
          WALL CREAPER       0.83      1.00      0.91         5
      EVENING GROSBEAK       0.80      0.80      0.80         5
         MOURNING DOVE       0.80      0.80      0.80         5
     NORTHERN CARDINAL       0.67      0.40      0.50         5
 RED BEARDED BEE EATER       1.00      0.80      0.89         5
       RUDY KINGFISHER       0.83      1.00      0.91         5
  NORTHERN MOCKINGBIRD       0.50      0.40      0.44         5
               QUETZAL       1.00      1.00      1.00         5
BLACK THROATED WARBLER       1.00      1.00      1.00         5
  BLACKBURNIAM WARBLER       1.00      0.60      0.75         5
                PUFFIN       1.00      1.00      1.00         5
      TURQUOISE MOTMOT       1.00      1.00      1.00         5
      NORTHERN FLICKER       0.71      1.00      0.83         5
      EASTERN BLUEBIRD       0.80      0.80      0.80         5
       SCARLET TANAGER       1.00      0.80      0.89         5
         CEDAR WAXWING       0.83      1.00      0.91         5
                CANARY       0.83      1.00      0.91         5
 AFRICAN CROWNED CRANE       0.67      0.80      0.73         5
          MAGPIE GOOSE       0.80      0.80      0.80         5
   VERMILION FLYCATHER       0.62      1.00      0.77         5
                 ROBIN       1.00      1.00      1.00         5
       COMMON POORWILL       0.83      1.00      0.91         5
      BANDED BROADBILL       1.00      1.00      1.00         5
        HAWAIIAN GOOSE       0.75      0.60      0.67         5
  RED WINGED BLACKBIRD       0.60      0.60      0.60         5

              accuracy                           0.85       150
             macro avg       0.86      0.85      0.84       150
          weighted avg       0.86      0.85      0.84       150
```

# Resultado da Versão 5: ResNet50 pre_processing
```
Test loss: 0.31490498781204224
Test accuracy: 0.8999999761581421
Test ROC AUC: 0.9982674717903137
```
```
Classification Report
                        precision    recall  f1-score   support

       RED HEADED DUCK       1.00      1.00      1.00         5
            PINK ROBIN       0.83      1.00      0.91         5
   RED FACED CORMORANT       0.75      0.60      0.67         5
       BARRED PUFFBIRD       1.00      1.00      1.00         5
              WHIMBREL       1.00      1.00      1.00         5
          WALL CREAPER       0.80      0.80      0.80         5
      EVENING GROSBEAK       0.80      0.80      0.80         5
         MOURNING DOVE       1.00      0.60      0.75         5
     NORTHERN CARDINAL       0.75      0.60      0.67         5
 RED BEARDED BEE EATER       1.00      1.00      1.00         5
       RUDY KINGFISHER       1.00      0.80      0.89         5
  NORTHERN MOCKINGBIRD       1.00      0.60      0.75         5
               QUETZAL       1.00      1.00      1.00         5
BLACK THROATED WARBLER       0.71      1.00      0.83         5
  BLACKBURNIAM WARBLER       1.00      0.80      0.89         5
                PUFFIN       1.00      1.00      1.00         5
      TURQUOISE MOTMOT       1.00      1.00      1.00         5
      NORTHERN FLICKER       1.00      1.00      1.00         5
      EASTERN BLUEBIRD       1.00      1.00      1.00         5
       SCARLET TANAGER       0.71      1.00      0.83         5
         CEDAR WAXWING       0.71      1.00      0.83         5
                CANARY       0.83      1.00      0.91         5
 AFRICAN CROWNED CRANE       1.00      1.00      1.00         5
          MAGPIE GOOSE       1.00      1.00      1.00         5
   VERMILION FLYCATHER       1.00      0.60      0.75         5
                 ROBIN       0.71      1.00      0.83         5
       COMMON POORWILL       1.00      1.00      1.00         5
      BANDED BROADBILL       1.00      1.00      1.00         5
        HAWAIIAN GOOSE       1.00      1.00      1.00         5
  RED WINGED BLACKBIRD       0.80      0.80      0.80         5

              accuracy                           0.90       150
             macro avg       0.91      0.90      0.90       150
          weighted avg       0.91      0.90      0.90       150
```

# Transfer learning Model & ResNet50

In [None]:
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=INPUT_SHAPE)

x = base_model.output
x = Flatten()(x)
x = Dense(120, activation='sigmoid')(x)
x = Dropout(0.1)(x)
predictions = Dense(num_classes, activation='softmax', kernel_initializer='random_uniform')(x)

model = Model(inputs=base_model.input, 
              outputs=predictions)
model.summary()

# Freezing pretrained layers
for layer in base_model.layers:
    layer.trainable=False
    
model.compile(optimizer=Adam(),
              loss='categorical_crossentropy',
              metrics=['accuracy', "AUC"])

In [None]:
MODEL_ID = 'resnet50.model.bruno.h5'

history = train_model(MODEL_ID, model, train_generator, val_generator)

In [None]:
plot_training_curves(history)

In [None]:
print_metrics(model, test_generator)

In [None]:
plot_confusion_matrix_and_classification_report(model, test_generator, classes)

# Resultados com ResNet50
```
Test loss: 0.1234726831316948
Test accuracy: 0.9866666793823242
Test ROC AUC: 0.9999072551727295
```
```
Classification Report
                        precision    recall  f1-score   support

       RED HEADED DUCK       1.00      1.00      1.00         5
            PINK ROBIN       1.00      1.00      1.00         5
   RED FACED CORMORANT       1.00      1.00      1.00         5
       BARRED PUFFBIRD       0.83      1.00      0.91         5
              WHIMBREL       1.00      1.00      1.00         5
          WALL CREAPER       1.00      1.00      1.00         5
      EVENING GROSBEAK       1.00      1.00      1.00         5
         MOURNING DOVE       1.00      1.00      1.00         5
     NORTHERN CARDINAL       1.00      1.00      1.00         5
 RED BEARDED BEE EATER       1.00      1.00      1.00         5
       RUDY KINGFISHER       1.00      1.00      1.00         5
  NORTHERN MOCKINGBIRD       1.00      1.00      1.00         5
               QUETZAL       1.00      1.00      1.00         5
BLACK THROATED WARBLER       1.00      1.00      1.00         5
  BLACKBURNIAM WARBLER       1.00      0.80      0.89         5
                PUFFIN       1.00      1.00      1.00         5
      TURQUOISE MOTMOT       1.00      1.00      1.00         5
      NORTHERN FLICKER       1.00      1.00      1.00         5
      EASTERN BLUEBIRD       1.00      1.00      1.00         5
       SCARLET TANAGER       1.00      1.00      1.00         5
         CEDAR WAXWING       1.00      1.00      1.00         5
                CANARY       0.83      1.00      0.91         5
 AFRICAN CROWNED CRANE       1.00      1.00      1.00         5
          MAGPIE GOOSE       1.00      1.00      1.00         5
   VERMILION FLYCATHER       1.00      1.00      1.00         5
                 ROBIN       1.00      1.00      1.00         5
       COMMON POORWILL       1.00      1.00      1.00         5
      BANDED BROADBILL       1.00      1.00      1.00         5
        HAWAIIAN GOOSE       1.00      1.00      1.00         5
  RED WINGED BLACKBIRD       1.00      0.80      0.89         5

              accuracy                           0.99       150
             macro avg       0.99      0.99      0.99       150
          weighted avg       0.99      0.99      0.99       150
```