# Analysis of Model flowerclass-efficientnetv2-2 2: with Image Visualizations

### Goals

* Analysis of the top 8 worst performing classes
* Leverage simple image visualizations to gain insight into algorithm


In [None]:
import math, re, os
import numpy as np
import tensorflow as tf
import tensorflow_addons as tfa
print(tf.__version__)
print(tfa.__version__)

from flowerclass_read_tf_ds import get_datasets, display_batch_by_class, display_batch_of_images #, load_dataset, display_batch_of_images, batch_to_numpy_images_and_labels, display_one_flower
import tensorflow_hub as hub
import pandas as pd
import math
import plotly_express as px
from tqdm import tqdm
import seaborn as sns
import matplotlib.pyplot as plt
import itertools

In [None]:
tf.test.gpu_device_name()

# I. Data Loading

In [None]:
image_size = 224
batch_size = 64

In [None]:
class_names = ['pink primrose',    'hard-leaved pocket orchid', 'canterbury bells', 'sweet pea',     'wild geranium',     'tiger lily',           'moon orchid',              'bird of paradise', 'monkshood',        'globe thistle',         # 00 - 09
           'snapdragon',       "colt's foot",               'king protea',      'spear thistle', 'yellow iris',       'globe-flower',         'purple coneflower',        'peruvian lily',    'balloon flower',   'giant white arum lily', # 10 - 19
           'fire lily',        'pincushion flower',         'fritillary',       'red ginger',    'grape hyacinth',    'corn poppy',           'prince of wales feathers', 'stemless gentian', 'artichoke',        'sweet william',         # 20 - 29
           'carnation',        'garden phlox',              'love in the mist', 'cosmos',        'alpine sea holly',  'ruby-lipped cattleya', 'cape flower',              'great masterwort', 'siam tulip',       'lenten rose',           # 30 - 39
           'barberton daisy',  'daffodil',                  'sword lily',       'poinsettia',    'bolero deep blue',  'wallflower',           'marigold',                 'buttercup',        'daisy',            'common dandelion',      # 40 - 49
           'petunia',          'wild pansy',                'primula',          'sunflower',     'lilac hibiscus',    'bishop of llandaff',   'gaura',                    'geranium',         'orange dahlia',    'pink-yellow dahlia',    # 50 - 59
           'cautleya spicata', 'japanese anemone',          'black-eyed susan', 'silverbush',    'californian poppy', 'osteospermum',         'spring crocus',            'iris',             'windflower',       'tree poppy',            # 60 - 69
           'gazania',          'azalea',                    'water lily',       'rose',          'thorn apple',       'morning glory',        'passion flower',           'lotus',            'toad lily',        'anthurium',             # 70 - 79
           'frangipani',       'clematis',                  'hibiscus',         'columbine',     'desert-rose',       'tree mallow',          'magnolia',                 'cyclamen ',        'watercress',       'canna lily',            # 80 - 89
           'hippeastrum ',     'bee balm',                  'pink quill',       'foxglove',      'bougainvillea',     'camellia',             'mallow',                   'mexican petunia',  'bromelia',         'blanket flower',        # 90 - 99
           'trumpet creeper',  'blackberry lily',           'common tulip',     'wild rose']                                                                                                                                               # 100 - 102
len(class_names)

# II. Model Loading and Predictions: EfficientNetV2

In [None]:
effnet2_base = "https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_s/feature_vector/2"

In [None]:
    effnet2_tfhub = tf.keras.Sequential([
    # Explicitly define the input shape so the model can be properly
    # loaded by the TFLiteConverter
    tf.keras.layers.InputLayer(input_shape=(image_size, image_size,3)),
    hub.KerasLayer(effnet2_base, trainable=False),
    tf.keras.layers.Dropout(rate=0.2),
    tf.keras.layers.Dense(104, activation='softmax')
])
effnet2_tfhub.build((None, image_size, image_size,3,)) #This is to be used for subclassed models, which do not know at instantiation time what their inputs look like.


effnet2_tfhub.summary()

In [None]:
best_phase = 12
effnet2_tfhub.load_weights("../input/flowerclass-efficientnetv2-2/training/"+"cp-"+f"{best_phase}".rjust(4, '0')+".ckpt")

In [None]:
from sklearn.metrics import classification_report, confusion_matrix

Ensure that validation data loader returns fixed order of elements.

In [None]:
ds_train, ds_valid, ds_test = get_datasets(BATCH_SIZE=batch_size, IMAGE_SIZE=(image_size, image_size), 
                                           RESIZE=None, tpu=False, with_id=True)

img_preds = []
img_labels = []
img_ids = []
for imgs, label, imgs_id in tqdm(ds_valid):
    img_preds.append(effnet2_tfhub.predict(imgs, batch_size=batch_size))
    img_labels.append(label.numpy())
    img_ids.append(imgs_id.numpy())
    
img_preds = np.concatenate([img_pred.argmax(1) for img_pred in img_preds])
img_labels = np.concatenate([img_label.argmax(1) for img_label in img_labels])
img_ids = np.concatenate([img_id for img_id in img_ids])


In [None]:
val_results = pd.DataFrame({'pred': img_preds, "label":img_labels, "id": img_ids})
val_results['id'] = val_results['id'].apply(lambda txt: txt.decode())

In [None]:
val_results.head()

# III. Analysis of low-performant classes

In [None]:
worst_classes = pd.DataFrame({'class':['globe-flower', 'clematis', 'canterbury bells', 'mexican petunia',
                'black-eyed susan', 'peruvian lily']})

In [None]:
class_names_mapping = {value:key for key, value in  enumerate(class_names)}

In [None]:
worst_classes['idx'] = worst_classes['class'].map(class_names_mapping)
worst_classes

In [None]:
conf_matrix = confusion_matrix(val_results['label'], val_results['pred'])

In [None]:
val_results_classes = val_results[(val_results['pred'].isin(worst_classes['idx'])) | (val_results['label'].isin(worst_classes['idx']))]
val_results_classes.shape

In [None]:
val_results_classes.head()

# IIIa). globe-flower

In [None]:
class_name = 'globe-flower'

In [None]:
val_results_class = val_results[(val_results['pred'] == class_names_mapping[class_name]) | (val_results['label'] == class_names_mapping[class_name])].copy()

class_names_mapping_inv = {class_names_mapping[name]:name for name in class_names_mapping}
for el in ['pred', 'label']:
    val_results_class.loc[:, f"{el}_class"] = val_results_class[el].map(class_names_mapping_inv)

In [None]:
val_results_class

In [None]:
data_root = "../input/tpu-getting-started"

data_path = data_root + '/tfrecords-jpeg-224x224'
val_224 = tf.io.gfile.glob(data_path + '/val/*.tfrec')
train_224 = tf.io.gfile.glob(data_path + '/train/*.tfrec')

display_batch_by_class(val_224, name = class_name, top_n= 10)

In [None]:
vis_imgs = val_results_class.loc[val_results_class.id.isin(['ed3a59a35', '4a6f8b3ad'])]
vis_imgs

In [None]:
def get_images_by_ids(image_ids_search):
    ds_train, ds_valid, ds_test = get_datasets(BATCH_SIZE=batch_size, IMAGE_SIZE=(image_size, image_size), 
                                               RESIZE=None, tpu=False, with_id=True)
    
    imgs_found = []
    imgage_ids_found = []
    labels_found = []
    for imgs, labels, imgs_id in tqdm(ds_valid):
        for img, img_id, label in zip(imgs, imgs_id, labels) :
            if img_id in image_ids_search:
                imgage_ids_found.append(img_id)
                imgs_found.append(img)
                labels_found.append(tf.argmax(label))
                
    return (tf.stack(imgs_found, 0), tf.cast(tf.concat(labels_found, 0), tf.int64)), imgage_ids_found

In [None]:
batch_found,  imgage_ids_found= get_images_by_ids(vis_imgs['id'].values)

In [None]:
display_batch_of_images(batch_found, predictions=vis_imgs['pred'].values, FIGSIZE=16, image_ids= vis_imgs['id'].values)

> * ed3a59a35 image: Flower shot from the side, and flower seem not to have opened yet. No such type of image exists in the val set. But the training set?
> * 4a6f8b3ad image: the flower seems close to the other globe-flower flowers, in terms of flower and stem leaves. Is buttercup very similar?

In [None]:
display_batch_by_class(train_224, name = class_name, top_n= 10)

> ed3a59a35 image: Training set does not include does not include such an image. On what is the network focusing on?

In [None]:
display_batch_by_class(train_224, name = "lotus", top_n= 25)

> Given the form of hte flower in ed3a59a35 image with some of the lotus flowers, it is reasonable to assume it belongs to the class 

In [None]:
display_batch_by_class(train_224, name = "buttercup", top_n= 25)

> In its closed flower-closed form, buttercup flowers resemble the flower in image 4a6f8b3ad.