In [None]:
# inspired in https://www.kaggle.com/kritidoneria/responsible-ai-model-explainability
# inspired in https://www.kaggle.com/databeru/fruit-and-vegetable-classification

In [None]:
import numpy as np
import pandas as pd
from pathlib import Path
import os.path
import matplotlib.pyplot as plt
import tensorflow as tf

# Create a list with the filepaths for training and testing
train_dir = Path('../input/100-bird-species/train')
train_filepaths = list(train_dir.glob(r'**/*.jpg'))

val_dir = Path('../input/100-bird-species/valid')
val_filepaths = list(val_dir.glob(r'**/*.jpg'))

test_dir = Path('../input/100-bird-species/test')
test_filepaths = list(test_dir.glob(r'**/*.jpg'))

In [None]:
def proc_img(filepath):

    labels = [str(filepath[i]).split('/')[-2] for i in range(len(filepath))]
    filepath = pd.Series(filepath, name='Filepath').astype(str)
    labels = pd.Series(labels, name='Label')

    df = pd.concat([filepath, labels], axis=1)

    # Shuffle the DataFrame and reset index
    df = df.sample(frac=1).reset_index(drop = True)

    return df

train_df = proc_img(train_filepaths)
val_df = proc_img(val_filepaths)
test_df = proc_img(test_filepaths)

In [None]:
train_df.head(5)

In [None]:
# Create a DataFrame with one Label of each category
df_unique = train_df.copy().drop_duplicates(subset=["Label"]).reset_index()

# Display some pictures of the dataset
fig, axes = plt.subplots(nrows=6, ncols=6, figsize=(12, 12),
                        subplot_kw={'xticks': [], 'yticks': []})

for i, ax in enumerate(axes.flat):
    ax.imshow(plt.imread(df_unique.Filepath[i]))
    ax.set_title(df_unique.Label[i], fontsize = 12, color = 'white')
plt.tight_layout(pad=0.5)
plt.show()

In [None]:
train_generator = tf.keras.preprocessing.image.ImageDataGenerator(
    preprocessing_function=tf.keras.applications.mobilenet_v2.preprocess_input
)

test_generator = tf.keras.preprocessing.image.ImageDataGenerator(
    preprocessing_function=tf.keras.applications.mobilenet_v2.preprocess_input
)

In [None]:
train_images = train_generator.flow_from_dataframe(
    dataframe=train_df,
    x_col='Filepath',
    y_col='Label',
    target_size=(224, 224),
    color_mode='rgb',
    class_mode='categorical',
    batch_size=32,
    shuffle=True,
    seed=0,
    rotation_range=30,
    zoom_range=0.15,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.15,
    horizontal_flip=True,
    fill_mode="nearest"
)

val_images = train_generator.flow_from_dataframe(
    dataframe=val_df,
    x_col='Filepath',
    y_col='Label',
    target_size=(224, 224),
    color_mode='rgb',
    class_mode='categorical',
    batch_size=32,
    shuffle=True,
    seed=0,
    rotation_range=30,
    zoom_range=0.15,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.15,
    horizontal_flip=True,
    fill_mode="nearest"
)

test_images = test_generator.flow_from_dataframe(
    dataframe=test_df,
    x_col='Filepath',
    y_col='Label',
    target_size=(224, 224),
    color_mode='rgb',
    class_mode='categorical',
    batch_size=32,
    shuffle=False
)


In [None]:
# Load the pretained model

from tensorflow.keras.applications.mobilenet_v2 import MobileNetV2

pretrained_model = MobileNetV2(
    input_shape=(224, 224, 3),
    include_top=False,
#     weights='../input/tf-keras-pretrained-model-weights/No Top/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_192_no_top.h5',
    weights='imagenet',
    pooling='avg'
)

# pretrained_model = tf.keras.applications.MobileNetV2(
#     input_shape=(224, 224, 3),
#     include_top=False,
#     weights='imagenet',
#     pooling='avg'
# )
pretrained_model.trainable = False

In [None]:
inputs = pretrained_model.input

# transfer learning

x = tf.keras.layers.Dense(128, activation='relu')(pretrained_model.output)
# x = tf.keras.layers.Dense(128, activation='relu')(x)

outputs = tf.keras.layers.Dense(275, activation='softmax')(x) # 275

model = tf.keras.Model(inputs=inputs, outputs=outputs)

In [None]:
from keras.utils.vis_utils import plot_model

model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# model.summary()

# plot_model(model, to_file='model_plot.png', show_shapes=True, show_layer_names=True)

In [None]:
history = model.fit(
    train_images,
    validation_data=val_images,
    batch_size = 32,
    epochs=6,
    callbacks=[
        tf.keras.callbacks.EarlyStopping(
            monitor='val_loss',
            patience=2,
            restore_best_weights=True
        )
    ]
)

In [None]:
pd.DataFrame(history.history)[['accuracy','val_accuracy']].plot()
plt.title("Accuracy")
plt.show()

In [None]:
pd.DataFrame(history.history)[['loss','val_loss']].plot()
plt.title("Loss")
plt.show()

# Test

In [None]:
# Predict the label of the test_images
pred = model.predict(test_images)
pred = np.argmax(pred,axis=1)

# Map the label
labels = (train_images.class_indices)
labels = dict((v,k) for k,v in labels.items())
pred = [labels[k] for k in pred]

y_test = [labels[k] for k in test_images.classes]

In [None]:
from sklearn.metrics import accuracy_score
acc = accuracy_score(y_test, pred)
print(f'Accuracy on the test set: {100*acc:.2f}%')

In [None]:
from sklearn.metrics import confusion_matrix
import seaborn as sns

cf_matrix = confusion_matrix(y_test, pred, normalize='true')
plt.figure(figsize = (15,10))
sns.heatmap(cf_matrix, 
            annot=False, 
            xticklabels = sorted(set(y_test)),
            yticklabels = sorted(set(y_test)),
            )
plt.title('Normalized Confusion Matrix')
plt.show()

In [None]:
# Display some pictures of the dataset with their labels and the predictions
fig, axes = plt.subplots(nrows=3, ncols=3, figsize=(8, 8),
                        subplot_kw={'xticks': [], 'yticks': []})

for i, ax in enumerate(axes.flat):
    ax.imshow(plt.imread(test_df.Filepath.iloc[i]))
    ax.set_title(f"True: {test_df.Label.iloc[i]}\nPredicted: {pred[i]}",color='white')
plt.tight_layout()
plt.show()

In [None]:
# https://www.kaggle.com/kritidoneria/responsible-ai-model-explainability
from keras.applications.mobilenet_v2 import decode_predictions,preprocess_input
from keras.preprocessing.image import load_img, img_to_array

path = '../input/100-bird-species/test/FLAMINGO/1.jpg'

# input_shape=(224, 224, 3),

image_raw = load_img(path, target_size=(224,224,3))
image_raw

In [None]:
# Convert to numpy array, reshape and preprocess
image = img_to_array(image_raw)

image = image.reshape(
    (1, image.shape[0], image.shape[1], image.shape[2])
)
# print(image.shape)

image = preprocess_input(image).astype('double')
# print(image.shape)

predictions = model.predict(image)

print(predictions.shape)
# print(predictions)

# decode_predictions(predictions)

model.predict(image).argsort()[0, -5:][::-1]

# model.predict(image).argsort()[0, -5:][::-1]

In [None]:
from lime.lime_image import LimeImageExplainer
explainer = LimeImageExplainer()

explanation = explainer.explain_instance(image[0], 
                                         model.predict, 
                                         top_labels=2, 
                                         num_samples=100,
                                         random_seed=42
                                        )

from skimage.segmentation import mark_boundaries
from matplotlib import pyplot as plt

temp, mask = explanation.get_image_and_mask(100,
                                            positive_only=True, 
                                            num_features=5, 
                                            hide_rest=True)
# plot image and mask together
plt.imshow(mark_boundaries(temp , mask))