#### Run the first box so that the notebook gets 100% of your window

Hopefully, you do not need to run the rest of the boxes, assuming that you see the box outputs.

Then, see the [Instructions](#Instructions) and scroll down to the [ViT-3B](#ViT-3B) and [GreedySoups](#GreedySoups) sections.

In [1]:
from IPython.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [2]:
import base64
from enum import Enum
from io import BytesIO
import json
import os
from pathlib import Path
from typing import List

import numpy as np
import pandas as pd
from PIL import Image
import tensorflow as tf
import tensorflow_datasets as tfds
import tqdm

from src import datasets, utils
from src.evaluation import ErrorClsTypes, EvalManager

os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]="0"

pd.set_option('display.max_colwidth', None)

%load_ext autoreload
%autoreload 2

2023-11-03 12:12:51.692441: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-11-03 12:12:51.721766: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-11-03 12:12:51.722238: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


# Helpers to visualize images in the dataframes

In [3]:
imagenet_val_root = datasets.get_dataset_root(dataset='imagenet', split="val")
# df['image'] = df['img_rel_path'].map(lambda f: load_image(f))

def load_image(path):
    if not path.startswith(imagenet_val_root):
        path = os.path.join(imagenet_val_root, path)
    i = Image.open(path)
    i.thumbnail((500, 500), Image.Resampling.LANCZOS)
    return i

def image_base64(im):
    if isinstance(im, str):
        im = get_thumbnail(im)
    with BytesIO() as buffer:
        im.save(buffer, 'jpeg')
        return base64.b64encode(buffer.getvalue()).decode()

def image_formatter(im):
    return f'<img src="data:image/jpeg;base64,{image_base64(im)}">'

def img_rel_path_formatter(img_rel_path):
    im = load_image(os.path.join(imagenet_val_root, img_rel_path))
    return f'<img src="data:image/jpeg;base64,{image_base64(im)}">'

def visualize_df(df_to_vis):
    print('Number of samples:', df_to_vis.shape[0])
    # print(df_to_vis.columns)
    show_columns = [
        'wnet_id', 'file_name', 'img_rel_path',
        'target', 'target_desc', 'multi_label', 'multi_desc', 'top1', 'top1_desc',
        'error_types', 'clip_top10_train_files', 'clip_top10_supercls',
        # 'DB_category', 'DB_severity'
    ]
    if 'DB_category' in df_to_vis.columns:
        assert 'DB_severity' in df_to_vis.columns
        show_columns += ['DB_category', 'DB_severity']
    return HTML(df_to_vis[show_columns].to_html(escape=False, formatters=dict(img_rel_path=img_rel_path_formatter)))

# Inspect DoughBagel errors

In [4]:
eval_manager = EvalManager(dataset='imagenet')

[32m2023-11-03 12:13:00.910[0m | [1mINFO    [0m | [36msrc.evaluation[0m:[36m__init__[0m:[36m78[0m - [1mInitialize EvalManager[0m
[32m2023-11-03 12:13:01.157[0m | [1mINFO    [0m | [36msrc.utils[0m:[36madd_multi_labels_to_df[0m:[36m114[0m - [1mLoad multilabels from imagenet2012_multilabel[0m
2023-11-03 12:13:01.168851: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2023-11-03 12:13:01.169034: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2023-

[32m2023-11-03 12:13:44.401[0m | [34m[1mDEBUG   [0m | [36msrc.utils[0m:[36mcommon_co_occurrences[0m:[36m278[0m - [34m[1mAppearing more than once: 1019 pairs[0m
[32m2023-11-03 12:13:44.466[0m | [1mINFO    [0m | [36msrc.evaluation[0m:[36m__init__[0m:[36m175[0m - [1mDONE: 2038 pairs in total. (a,b) and (b,a) both counted[0m
[32m2023-11-03 12:13:44.467[0m | [1mINFO    [0m | [36msrc.evaluation[0m:[36m__init__[0m:[36m178[0m - [1m=== DoughBagel non-prototypical samples: 36 in total ===[0m


In [5]:
mistakes_file_name = {
    'vit3b': 'vit3b_mistakes.json',
    'greedysoups': 'greedy_soups_mistakes.json'
}

our_mistake_types = [
    ErrorClsTypes.same_superclass,
    ErrorClsTypes.OOV_detected_by_clip,
    ErrorClsTypes.non_prototypical,
    ErrorClsTypes.common_co_occurrences,
    ErrorClsTypes.not_classified
]
our_mistake_types_text = [
    'same supercls (fine-gr)',
    'fine-gr OOV (CLIP)',
    'non-proto',
    'common co occ (spur corr)',
    'uncls'
]

def print_cnt(cnt, dough_bagel_mistake_types):
    num_mistakes = sum(sum(row) for row in cnt)
    print('Number of mistakes:', num_mistakes)
    print(f'{"DoughBagel":>36}' + ' '.join(f'{t:>24}' for t in our_mistake_types_text + ['Total (row)']))
    for i, row in enumerate(cnt):
        print(f'{dough_bagel_mistake_types[i]:>36}' + ' '.join(f'{t:>24}' for t in row + [sum(row)]))
    col_sums = [sum(cnt[i][j] for i in range(len(dough_bagel_mistake_types))) for j in range(len(our_mistake_types))]
    print(f'{"Total (col)":>36}' + ' '.join(f'{t:>24}' for t in col_sums + [num_mistakes]))


def error_classifications_match(row) -> bool:
    if row.DB_category in ['Fine-grained error', 'Fine-grained with Multilabel Options']:
        return ErrorClsTypes.same_superclass in row.error_types
    elif row.DB_category in ['Fine-grained error with OOV', 'Fine-grained with OOV']:
        return ErrorClsTypes.OOV_detected_by_clip in row.error_types
    elif row.DB_category == 'Non-prototypical':
        return ErrorClsTypes.non_prototypical in row.error_types
    elif row.DB_category == 'Spurious Correlation':
        return ErrorClsTypes.common_co_occurrences in row.error_types
    else:
        raise ValueError(f'Unconsidered DoughBagel error category: {row.DB_category}')


def filter_by_matching_dough_bagel(df_mistakes: pd.DataFrame, match_dough_bagel: bool) -> pd.DataFrame:
    return df_mistakes[df_mistakes.apply(
        lambda row: error_classifications_match(row) == match_dough_bagel,
        axis=1
    )]


def filter_by_severity(df_mistakes: pd.DataFrame, severity: str) -> pd.DataFrame:
    assert severity in ['all', 'major', 'minor']
    if severity == 'all':
        return df_mistakes
    elif severity == 'major':
        return df_mistakes[df_mistakes['DB_severity'] == 'Major']
    else:
        assert severity == 'minor'
        return df_mistakes[df_mistakes['DB_severity'] != 'Major']


def filter_mistakes(
        df_mistakes: pd.DataFrame,
        classified_by_us: bool = True,
        match_dough_bagel: bool = False,
        severity: str = 'all'
) -> pd.DataFrame:
    # classified_by_us == False AND match_dough_bagel == True is invalid combination
    assert classified_by_us or not match_dough_bagel
    
    # DoughBagel have 3 severity levels: Minor + Borderline (both considered minor), and Major
    assert severity in ['all', 'major', 'minor']
    
    df = df_mistakes
    if classified_by_us:
        df = df[~ErrorClsTypes.filter_error_type(df_mistakes, ErrorClsTypes.not_classified)]
    else:
        # i.e., consider only those that are *not* classified by us
        df = df[ErrorClsTypes.filter_error_type(df_mistakes, ErrorClsTypes.not_classified)]
    
    df = filter_by_matching_dough_bagel(df, match_dough_bagel)
    df = filter_by_severity(df, severity)
    
    return df


def get_mistakes_df(model_name: str) -> pd.DataFrame:
    imagenet_mistakes_metadata_path = utils.get_root_path() / 'imagenet-mistakes' / 'metadata'
    with open(imagenet_mistakes_metadata_path / mistakes_file_name[model_name], 'r') as json_file:
        dough_bagel_mistakes = json.load(json_file)
#     print(dough_bagel_mistakes)
    
    dough_bagel_mistake_types = sorted(set(
        m['failure category'] for _, m in dough_bagel_mistakes.items()
    ))
    print(dough_bagel_mistake_types)

    df = eval_manager.prepare_and_eval_model(model_name)
    
    df = df[~df['is_problematic']]
    df = df[~ErrorClsTypes.filter_error_type(df, ErrorClsTypes.correct)]

    df_non_mistakes = df[df['file_name'].map(lambda file_name: file_name not in dough_bagel_mistakes)]
    assert (
        ErrorClsTypes.filter_error_type(df_non_mistakes, ErrorClsTypes.correct_collapsed_mapping) | \
        ErrorClsTypes.filter_error_type(df_non_mistakes, ErrorClsTypes.correct_multi_label)
    ).all()

    df_mistakes = df[df['file_name'].isin(dough_bagel_mistakes)]
    assert df_mistakes.shape[0] == len(dough_bagel_mistakes)
    
    cnt_combined = [[0] * len(our_mistake_types) for _ in range(len(dough_bagel_mistake_types))]
    cnt_major = [[0] * len(our_mistake_types) for _ in range(len(dough_bagel_mistake_types))]
    cnt_minor = [[0] * len(our_mistake_types) for _ in range(len(dough_bagel_mistake_types))]
    
    for _, row in df_mistakes.iterrows():
        file_name = row.file_name
        assert dough_bagel_mistakes[file_name]['top_prediction'] == row.top1
        
        failure_category = dough_bagel_mistakes[file_name]['failure category']
        failure_severity = dough_bagel_mistakes[file_name]['failure severity']
        
        row_idx = dough_bagel_mistake_types.index(failure_category)
        col_idx = -1
        for i, our_mistake_type in enumerate(our_mistake_types):
            if our_mistake_type in row.error_types:
                assert col_idx == -1
                col_idx = i
        
        cnt_combined[row_idx][col_idx] += 1
        if failure_severity == 'Major':
            cnt_major[row_idx][col_idx] += 1
        else:
            cnt_minor[row_idx][col_idx] += 1

    print('Model name:', model_name)
    print('All errors (Major + Minor) counts:')
    print_cnt(cnt_combined, dough_bagel_mistake_types)
    print()
    print('Minor errors counts:')
    print_cnt(cnt_minor, dough_bagel_mistake_types)
    print()
    print('Major errors counts:')
    print_cnt(cnt_major, dough_bagel_mistake_types)
    
    df_mistakes['DB_category'] = df_mistakes['file_name'].map(
        lambda file_name: dough_bagel_mistakes[file_name]['failure category']
    )
    df_mistakes['DB_severity'] = df_mistakes['file_name'].map(
        lambda file_name: dough_bagel_mistakes[file_name]['failure severity']
    )
    
    num_unclassified_all = ErrorClsTypes.filter_error_type(df_mistakes, ErrorClsTypes.not_classified).sum()
    
    for severity in ['all', 'minor', 'major']:
        df_mistakes_severity = filter_by_severity(df_mistakes, severity)
        
        df_classified_matching = filter_mistakes(df_mistakes_severity, classified_by_us=True, match_dough_bagel=True, severity=severity)
        df_classified_not_matching = filter_mistakes(df_mistakes_severity, classified_by_us=True, match_dough_bagel=False, severity=severity)
        df_unclassified = filter_mistakes(df_mistakes_severity, classified_by_us=False, match_dough_bagel=False, severity=severity)
        
        num_classified = df_classified_matching.shape[0] + df_classified_not_matching.shape[0]
        assert df_mistakes_severity.shape[0] == num_classified + df_unclassified.shape[0]
        
        print(f'Severity = {severity}, total: {df_mistakes_severity.shape[0]}')
        print('classified + matching DoughBagel: {} ({:.2f}% of all with that severity, {:.2f}% of the classified with that severity)'.format(
            df_classified_matching.shape[0],
            100 * df_classified_matching.shape[0] / df_mistakes_severity.shape[0],
            100 * df_classified_matching.shape[0] / num_classified
        ))
        print('classified + not matching DoughBagel: {} ({:.2f}% of all with that severity, {:.2f}% of the classified with that severity)'.format(
            df_classified_not_matching.shape[0],
            100 * df_classified_not_matching.shape[0] / df_mistakes_severity.shape[0],
            100 * df_classified_not_matching.shape[0] / num_classified
        ))
        print('unclassified (thus not matching DoughBagel): {} ({:.2f}% of all with that severity, {:.2f}% of all unclassified)'.format(
            df_unclassified.shape[0],
            100 * df_unclassified.shape[0] / df_mistakes_severity.shape[0],
            100 * df_unclassified.shape[0] / num_unclassified_all
        ))
        print()
    
    return df_mistakes

# Instructions

The first cell of each of the ViT-3B and GreedySoups sections present summary statistics (similar to Table 1 in the paper) for the two models.\
The second cell in each section shows the mistakes that the models make and which we classify, but our automated error classification does not match DoughBagel's manual categorization.

Description of the output DataFrame columns:
* `wnet_id` - WordNet class of the sample.
* `file_name` - file name of the sample image.
* `img_rel_path` = `wnet_id/file_name` and can be found in the `val` ImageNet folder. In this notebook we show the image directly.
* `target` - the ground-truth ImageNet (numerical) class id.
* `target_desc` - description of the ImageNet ground truth.
* `multi_label` - multi labels from Shankar's [ImageNet-multilabel](https://www.tensorflow.org/datasets/catalog/imagenet2012_multilabel) dataset.
* `multi_desc` - descriptions of each of the corresponding multi-labels. Each `(...)` in the list presents the description of a single label from the multi-label list.
* `top1` - the top-1 model prediction.
* `top1_desc` - description of the top-1 model prediction.
* `error_types` - the output of _our_ error classification pipeline with a short explanation. E.g., for fine-grained errors we also list the common superclasses, for fine-grained OOV - the top-5 proposals and their CLIP probabilities, for spurious correlations - the classes which commonly co-occur with the model's top-1 prediction.
* `clip_top10_train_files` - the images from the _training_ set that are the most similar to the evaluated sample, according to CLIP's embeddings.
* `clip_top10_supercls` - superclases of the ground-truth targets of the training images in the `clip_top10_train_files` list.
* `DB_category` - DoughBagel's manual error categorization.
* `DB_severity` - severity of the mistake, according to DoughBagel.

Tips & hints:
- You can enlarge the sample images (and make them smaller again) by double-clicking on them.
- You can automatically show single images and visualize ImageNet classes using the `src/show_images.ipynb` notebook. For it to work, you need to have the ImageNet dataset downloaded and the environment variables set up correctly following the instructions in the `README`.
- You can find our superclasses (together with WordNet ids and descriptions for each individual class) in the `artefacts/superclasses.txt` file.

For each sample, the question we answer is:\
`Do you find our error classification equally (or more) good, informative and descriptive, compared to DoughBagel's manual categorization?`

# ViT-3B

In [6]:
vit3b_mistakes_df = get_mistakes_df('vit3b')
vit3b_classified_not_matching = filter_mistakes(vit3b_mistakes_df, classified_by_us=True, match_dough_bagel=False)
vit3b_unclassified = filter_mistakes(vit3b_mistakes_df, classified_by_us=False, match_dough_bagel=False)

['Fine-grained error', 'Fine-grained error with OOV', 'Fine-grained with Multilabel Options', 'Non-prototypical', 'Spurious Correlation']


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_mistakes['DB_category'] = df_mistakes['file_name'].map(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_mistakes['DB_severity'] = df_mistakes['file_name'].map(


Model name: vit3b
All errors (Major + Minor) counts:
Number of mistakes: 378
                          DoughBagel same supercls (fine-gr)       fine-gr OOV (CLIP)                non-proto common co occ (spur corr)                    uncls              Total (row)
                  Fine-grained error                     191                       15                        0                       10                       25                      241
         Fine-grained error with OOV                       9                       20                        0                       11                       14                       54
Fine-grained with Multilabel Options                       1                        0                        0                        0                        0                        1
                    Non-prototypical                      13                        2                       12                        3                        0                  

### Classified by us but not matching DoughBagel categorization

In [7]:
visualize_df(vit3b_classified_not_matching)

Number of samples: 85


Unnamed: 0,wnet_id,file_name,img_rel_path,target,target_desc,multi_label,multi_desc,top1,top1_desc,error_types,clip_top10_train_files,clip_top10_supercls,DB_category,DB_severity
1302,n01630670,ILSVRC2012_val_00003189.JPEG,,26,"common newt, Triturus vulgaris",[26],"[(common newt, Triturus vulgaris)]",29,"axolotl, mud puppy, Ambystoma mexicanum","[same superclass (fine-grained), [salamander_lizard], 26]","[n01632777/n01632777_18909.JPEG, n01632777/n01632777_16078.JPEG, n01632777/n01632777_11522.JPEG, n01632777/n01632777_9036.JPEG, n01632777/n01632777_11826.JPEG, n01632777/n01632777_19277.JPEG, n01632777/n01632777_18497.JPEG, n01632777/n01632777_13343.JPEG, n01632458/n01632458_2473.JPEG, n01632777/n01632777_11336.JPEG]","[salamander_lizard, salamander_lizard, salamander_lizard, salamander_lizard, salamander_lizard, salamander_lizard, salamander_lizard, salamander_lizard, salamander_lizard, salamander_lizard]",Non-prototypical,Borderline
1305,n01630670,ILSVRC2012_val_00005868.JPEG,,26,"common newt, Triturus vulgaris",[26],"[(common newt, Triturus vulgaris)]",28,"spotted salamander, Ambystoma maculatum","[same superclass (fine-grained), [salamander_lizard], 26]","[n01632458/n01632458_2039.JPEG, n01632458/n01632458_2.JPEG, n01630670/n01630670_5942.JPEG, n01631663/n01631663_7307.JPEG, n01632458/n01632458_5909.JPEG, n01630670/n01630670_1768.JPEG, n01632458/n01632458_174.JPEG, n01630670/n01630670_6021.JPEG, n01632458/n01632458_4852.JPEG, n01632458/n01632458_872.JPEG]","[salamander_lizard, salamander_lizard, salamander_lizard, salamander_lizard, salamander_lizard, salamander_lizard, salamander_lizard, salamander_lizard, salamander_lizard, salamander_lizard]",Fine-grained error with OOV,Minor
1505,n01641577,ILSVRC2012_val_00006409.JPEG,,30,"bullfrog, Rana catesbeiana",[30],"[(bullfrog, Rana catesbeiana)]",32,"tailed frog, bell toad, ribbed toad, tailed toad, Ascaphus trui","[same superclass (fine-grained), [frog], 30]","[n01641577/n01641577_7674.JPEG, n01644900/n01644900_20856.JPEG, n01641577/n01641577_22931.JPEG, n01644900/n01644900_19465.JPEG, n01641577/n01641577_239.JPEG, n01641577/n01641577_14528.JPEG, n01641577/n01641577_11410.JPEG, n01641577/n01641577_3910.JPEG, n01644900/n01644900_22049.JPEG, n01644900/n01644900_11754.JPEG]","[frog, frog, frog, frog, frog, frog, frog, frog, frog, frog]",Non-prototypical,Minor
1814,n01667778,ILSVRC2012_val_00013301.JPEG,,36,terrapin,"[36, 769]","[(terrapin), (rule, ruler)]",37,"box turtle, box tortoise","[same superclass (fine-grained), [turtle], 36]","[n01667114/n01667114_12219.JPEG, n01667114/n01667114_17254.JPEG, n01667114/n01667114_11596.JPEG, n01667114/n01667114_4268.JPEG, n01667114/n01667114_82.JPEG, n01667778/n01667778_5171.JPEG, n01669191/n01669191_5385.JPEG, n01667114/n01667114_14044.JPEG, n01669191/n01669191_973.JPEG, n01669191/n01669191_11029.JPEG]","[turtle, turtle, turtle, turtle, turtle, turtle, turtle, turtle, turtle, turtle]",Fine-grained error with OOV,Borderline
1956,n01677366,ILSVRC2012_val_00004201.JPEG,,39,"common iguana, iguana, Iguana iguana",[39],"[(common iguana, iguana, Iguana iguana)]",40,"American chameleon, anole, Anolis carolinensis","[same superclass (fine-grained), [green_snake_and_lizards,salamander_lizard], 39]","[n01682714/n01682714_2587.JPEG, n01693334/n01693334_10362.JPEG, n01693334/n01693334_13787.JPEG, n01693334/n01693334_2762.JPEG, n01693334/n01693334_11301.JPEG, n01693334/n01693334_10641.JPEG, n01682714/n01682714_5375.JPEG, n01693334/n01693334_3257.JPEG, n01693334/n01693334_3430.JPEG, n01682714/n01682714_15617.JPEG]","[green_snake_and_lizards, salamander_lizard, green_snake_and_lizards, salamander_lizard, green_snake_and_lizards, salamander_lizard, green_snake_and_lizards, salamander_lizard, green_snake_and_lizards, salamander_lizard, green_snake_and_lizards, salamander_lizard, green_snake_and_lizards, salamander_lizard, green_snake_and_lizards, salamander_lizard, green_snake_and_lizards, salamander_lizard, green_snake_and_lizards, salamander_lizard]",Fine-grained error with OOV,Borderline
2305,n01693334,ILSVRC2012_val_00004102.JPEG,,46,"green lizard, Lacerta viridis",[46],"[(green lizard, Lacerta viridis)]",41,"whiptail, whiptail lizard","[same superclass (fine-grained), [salamander_lizard], 46]","[n01687978/n01687978_2981.JPEG, n01687978/n01687978_4000.JPEG, n01693334/n01693334_397.JPEG, n01685808/n01685808_4040.JPEG, n01685808/n01685808_5276.JPEG, n01685808/n01685808_11745.JPEG, n01685808/n01685808_1469.JPEG, n01687978/n01687978_4250.JPEG, n01685808/n01685808_3712.JPEG, n01693334/n01693334_6885.JPEG]","[salamander_lizard, salamander_lizard, green_snake_and_lizards, salamander_lizard, salamander_lizard, salamander_lizard, salamander_lizard, salamander_lizard, salamander_lizard, salamander_lizard, green_snake_and_lizards, salamander_lizard]",Non-prototypical,Minor
5214,n01877812,ILSVRC2012_val_00008736.JPEG,,104,"wallaby, brush kangaroo",[104],"[(wallaby, brush kangaroo)]",331,hare,"[potentially OOV, detected by CLIP, Top-5 alternative proposals: [(2323902, 'leporid mammal', '45.91%'), (1886756, 'eutherian mammal', '24.19%'), (1886756, 'eutherian', '10.40%'), (2323449, 'lagomorph', '6.21%'), (2325366, 'wood rabbit', '3.38%')] | out of 17]","[n01877812/n01877812_2393.JPEG, n01877812/n01877812_10137.JPEG, n02326432/n02326432_12525.JPEG, n01877812/n01877812_81.JPEG, n02326432/n02326432_122.JPEG, n01877812/n01877812_213.JPEG, n03042490/n03042490_5543.JPEG, n01877812/n01877812_9709.JPEG, n01877812/n01877812_554.JPEG, n01877812/n01877812_8675.JPEG]","[bear_panda_marsupial, viverrine_musteline_rodent_prototherian, bear_panda_marsupial, viverrine_musteline_rodent_prototherian, rabbit, bear_panda_marsupial, viverrine_musteline_rodent_prototherian, rabbit, bear_panda_marsupial, viverrine_musteline_rodent_prototherian, n03042490, bear_panda_marsupial, viverrine_musteline_rodent_prototherian, bear_panda_marsupial, viverrine_musteline_rodent_prototherian, bear_panda_marsupial, viverrine_musteline_rodent_prototherian]",Fine-grained error,Minor
8779,n02091635,ILSVRC2012_val_00022982.JPEG,,175,"otterhound, otter hound",[175],"[(otterhound, otter hound)]",226,briard,"[common co-occurrences (spurious correlations), 175]","[n02105251/n02105251_6355.JPEG, n02106382/n02106382_6173.JPEG, n02091635/n02091635_3901.JPEG, n02105251/n02105251_4262.JPEG, n02105641/n02105641_5527.JPEG, n02091635/n02091635_11279.JPEG, n02105251/n02105251_4402.JPEG, n02097474/n02097474_17389.JPEG, n02105251/n02105251_1549.JPEG, n02105251/n02105251_3860.JPEG]","[hairy_shepherd_dog, hairy_shepherd_dog, skinny_hound_rest, terrier, hairy_shepherd_dog, hairy_shepherd_dog, skinny_hound_rest, terrier, hairy_shepherd_dog, toy_dog, hairy_shepherd_dog, terrier, hairy_shepherd_dog, hairy_shepherd_dog]",Fine-grained error,Borderline
9606,n02096177,ILSVRC2012_val_00007255.JPEG,,192,"cairn, cairn terrier","[192, 608]","[(cairn, cairn terrier), (jean, blue jean, denim)]",254,"pug, pug-dog","[potentially OOV, detected by CLIP, Top-5 alternative proposals: [(2112497, 'brussels griffon', '23.86%'), (2110958, 'pug-dog', '11.27%'), (2084732, 'bow-wow', '9.49%'), (2110341, 'coach dog', '7.16%'), (2111626, 'spitz', '5.75%')] | out of 45]","[n02092339/n02092339_292.JPEG, n07742313/n07742313_6567.JPEG, n02096177/n02096177_5338.JPEG, n02110063/n02110063_6658.JPEG, n07742313/n07742313_4683.JPEG, n02091831/n02091831_9028.JPEG, n02110958/n02110958_8703.JPEG, n02091032/n02091032_6499.JPEG, n02110185/n02110185_8311.JPEG, n02097130/n02097130_4434.JPEG]","[beagle_hound_pointer_spaniel_setter, reproductive_structure, toy_dog, terrier, husky, reproductive_structure, skinny_hound_rest, pug_boxer_bulldog, skinny_hound_rest, husky, terrier]",Fine-grained error,Borderline
10287,n02099267,ILSVRC2012_val_00035881.JPEG,,205,flat-coated retriever,[205],[(flat-coated retriever)],197,giant schnauzer,"[common co-occurrences (spurious correlations), 205]","[n02088632/n02088632_7257.JPEG, n02088632/n02088632_8103.JPEG, n02099712/n02099712_169.JPEG, n02112706/n02112706_3713.JPEG, n02110627/n02110627_11135.JPEG, n02088632/n02088632_8145.JPEG, n02090721/n02090721_1087.JPEG, n02097209/n02097209_3470.JPEG, n02094114/n02094114_2353.JPEG, n02097298/n02097298_8858.JPEG]","[beagle_hound_pointer_spaniel_setter, beagle_hound_pointer_spaniel_setter, retriever_like, toy_dog, pug_boxer_bulldog, toy_dog, hairy_shepherd_dog, terrier, beagle_hound_pointer_spaniel_setter, skinny_hound_rest, terrier, terrier, toy_dog, terrier, terrier]",Non-prototypical,Minor


# GreedySoups

In [8]:
greedysoups_mistakes_df = get_mistakes_df('greedysoups')
greedysoups_classified_not_matching = filter_mistakes(greedysoups_mistakes_df, classified_by_us=True, match_dough_bagel=False)
greedysoups_unclassified = filter_mistakes(greedysoups_mistakes_df, classified_by_us=False, match_dough_bagel=False)

['Fine-grained error', 'Fine-grained error with OOV', 'Fine-grained with OOV', 'Non-prototypical', 'Spurious Correlation']


                                                                                                                                                      

Model name: greedysoups
All errors (Major + Minor) counts:
Number of mistakes: 249
                          DoughBagel same supercls (fine-gr)       fine-gr OOV (CLIP)                non-proto common co occ (spur corr)                    uncls              Total (row)
                  Fine-grained error                     139                       14                        1                        7                       11                      172
         Fine-grained error with OOV                       4                        7                        0                        4                        2                       17
               Fine-grained with OOV                       3                        1                        0                        1                        4                        9
                    Non-prototypical                       8                        1                        8                        2                        0            

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_mistakes['DB_category'] = df_mistakes['file_name'].map(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_mistakes['DB_severity'] = df_mistakes['file_name'].map(


### Classified by us but not matching DoughBagel categorization

In [9]:
visualize_df(greedysoups_classified_not_matching)

Number of samples: 57


Unnamed: 0,wnet_id,file_name,img_rel_path,target,target_desc,multi_label,multi_desc,top1,top1_desc,error_types,clip_top10_train_files,clip_top10_supercls,DB_category,DB_severity
1302,n01630670,ILSVRC2012_val_00003189.JPEG,,26,"common newt, Triturus vulgaris",[26],"[(common newt, Triturus vulgaris)]",29,"axolotl, mud puppy, Ambystoma mexicanum","[same superclass (fine-grained), [salamander_lizard], 26]","[n01632777/n01632777_18909.JPEG, n01632777/n01632777_16078.JPEG, n01632777/n01632777_11522.JPEG, n01632777/n01632777_9036.JPEG, n01632777/n01632777_11826.JPEG, n01632777/n01632777_19277.JPEG, n01632777/n01632777_18497.JPEG, n01632777/n01632777_13343.JPEG, n01632458/n01632458_2473.JPEG, n01632777/n01632777_11336.JPEG]","[salamander_lizard, salamander_lizard, salamander_lizard, salamander_lizard, salamander_lizard, salamander_lizard, salamander_lizard, salamander_lizard, salamander_lizard, salamander_lizard]",Non-prototypical,Borderline
1505,n01641577,ILSVRC2012_val_00006409.JPEG,,30,"bullfrog, Rana catesbeiana",[30],"[(bullfrog, Rana catesbeiana)]",32,"tailed frog, bell toad, ribbed toad, tailed toad, Ascaphus trui","[same superclass (fine-grained), [frog], 30]","[n01641577/n01641577_7674.JPEG, n01644900/n01644900_20856.JPEG, n01641577/n01641577_22931.JPEG, n01644900/n01644900_19465.JPEG, n01641577/n01641577_239.JPEG, n01641577/n01641577_14528.JPEG, n01641577/n01641577_11410.JPEG, n01641577/n01641577_3910.JPEG, n01644900/n01644900_22049.JPEG, n01644900/n01644900_11754.JPEG]","[frog, frog, frog, frog, frog, frog, frog, frog, frog, frog]",Non-prototypical,Minor
1956,n01677366,ILSVRC2012_val_00004201.JPEG,,39,"common iguana, iguana, Iguana iguana",[39],"[(common iguana, iguana, Iguana iguana)]",40,"American chameleon, anole, Anolis carolinensis","[same superclass (fine-grained), [green_snake_and_lizards,salamander_lizard], 39]","[n01682714/n01682714_2587.JPEG, n01693334/n01693334_10362.JPEG, n01693334/n01693334_13787.JPEG, n01693334/n01693334_2762.JPEG, n01693334/n01693334_11301.JPEG, n01693334/n01693334_10641.JPEG, n01682714/n01682714_5375.JPEG, n01693334/n01693334_3257.JPEG, n01693334/n01693334_3430.JPEG, n01682714/n01682714_15617.JPEG]","[green_snake_and_lizards, salamander_lizard, green_snake_and_lizards, salamander_lizard, green_snake_and_lizards, salamander_lizard, green_snake_and_lizards, salamander_lizard, green_snake_and_lizards, salamander_lizard, green_snake_and_lizards, salamander_lizard, green_snake_and_lizards, salamander_lizard, green_snake_and_lizards, salamander_lizard, green_snake_and_lizards, salamander_lizard, green_snake_and_lizards, salamander_lizard]",Fine-grained error with OOV,Borderline
5214,n01877812,ILSVRC2012_val_00008736.JPEG,,104,"wallaby, brush kangaroo",[104],"[(wallaby, brush kangaroo)]",331,hare,"[potentially OOV, detected by CLIP, Top-5 alternative proposals: [(2323902, 'leporid mammal', '45.91%'), (1886756, 'eutherian mammal', '24.19%'), (1886756, 'eutherian', '10.40%'), (2323449, 'lagomorph', '6.21%'), (2325366, 'wood rabbit', '3.38%')] | out of 17]","[n01877812/n01877812_2393.JPEG, n01877812/n01877812_10137.JPEG, n02326432/n02326432_12525.JPEG, n01877812/n01877812_81.JPEG, n02326432/n02326432_122.JPEG, n01877812/n01877812_213.JPEG, n03042490/n03042490_5543.JPEG, n01877812/n01877812_9709.JPEG, n01877812/n01877812_554.JPEG, n01877812/n01877812_8675.JPEG]","[bear_panda_marsupial, viverrine_musteline_rodent_prototherian, bear_panda_marsupial, viverrine_musteline_rodent_prototherian, rabbit, bear_panda_marsupial, viverrine_musteline_rodent_prototherian, rabbit, bear_panda_marsupial, viverrine_musteline_rodent_prototherian, n03042490, bear_panda_marsupial, viverrine_musteline_rodent_prototherian, bear_panda_marsupial, viverrine_musteline_rodent_prototherian, bear_panda_marsupial, viverrine_musteline_rodent_prototherian]",Fine-grained error,Minor
8385,n02089973,ILSVRC2012_val_00037826.JPEG,,167,English foxhound,[167],[(English foxhound)],180,"American Staffordshire terrier, Staffordshire terrier, American pit bull terrier, pit bull terrier",[non-prototypical (DoughBagel)],"[n02088632/n02088632_3230.JPEG, n02093428/n02093428_9082.JPEG, n02108089/n02108089_24780.JPEG, n02109047/n02109047_1210.JPEG, n02108089/n02108089_815.JPEG, n02093256/n02093256_4181.JPEG, n02108089/n02108089_3295.JPEG, n02088632/n02088632_2736.JPEG, n02107574/n02107574_3910.JPEG, n02088632/n02088632_1321.JPEG]","[beagle_hound_pointer_spaniel_setter, pug_boxer_bulldog, pug_boxer_bulldog, sennenhunde_and_big_dogs, beagle_hound_pointer_spaniel_setter, sennenhunde_and_big_dogs, pug_boxer_bulldog, sennenhunde_and_big_dogs, pug_boxer_bulldog, pug_boxer_bulldog, sennenhunde_and_big_dogs, beagle_hound_pointer_spaniel_setter, sennenhunde_and_big_dogs, beagle_hound_pointer_spaniel_setter]",Fine-grained error,Minor
8779,n02091635,ILSVRC2012_val_00022982.JPEG,,175,"otterhound, otter hound",[175],"[(otterhound, otter hound)]",226,briard,"[common co-occurrences (spurious correlations), 175]","[n02105251/n02105251_6355.JPEG, n02106382/n02106382_6173.JPEG, n02091635/n02091635_3901.JPEG, n02105251/n02105251_4262.JPEG, n02105641/n02105641_5527.JPEG, n02091635/n02091635_11279.JPEG, n02105251/n02105251_4402.JPEG, n02097474/n02097474_17389.JPEG, n02105251/n02105251_1549.JPEG, n02105251/n02105251_3860.JPEG]","[hairy_shepherd_dog, hairy_shepherd_dog, skinny_hound_rest, terrier, hairy_shepherd_dog, hairy_shepherd_dog, skinny_hound_rest, terrier, hairy_shepherd_dog, toy_dog, hairy_shepherd_dog, terrier, hairy_shepherd_dog, hairy_shepherd_dog]",Fine-grained error,Borderline
9196,n02093859,ILSVRC2012_val_00040773.JPEG,,183,Kerry blue terrier,[183],[(Kerry blue terrier)],233,"Bouvier des Flandres, Bouviers des Flandres","[potentially OOV, detected by CLIP, Top-5 alternative proposals: [(2103406, 'working dog', '59.72%'), (2110627, 'affenpinscher', '13.32%'), (2104523, 'sheep dog', '7.47%'), (2104523, 'sheepdog', '5.38%'), (2106382, 'bouvier des flandres', '2.50%')] | out of 28]","[n02113624/n02113624_9117.JPEG, n02095570/n02095570_2938.JPEG, n02095570/n02095570_688.JPEG, n02095570/n02095570_1081.JPEG, n02105251/n02105251_3424.JPEG, n02113624/n02113624_2990.JPEG, n02113712/n02113712_2232.JPEG, n02113712/n02113712_3817.JPEG, n02095570/n02095570_10724.JPEG, n02095570/n02095570_3912.JPEG]","[toy_dog, poodle_like, terrier, terrier, terrier, hairy_shepherd_dog, toy_dog, poodle_like, toy_dog, poodle_like, toy_dog, poodle_like, terrier, terrier]",Fine-grained error,Minor
10287,n02099267,ILSVRC2012_val_00035881.JPEG,,205,flat-coated retriever,[205],[(flat-coated retriever)],197,giant schnauzer,"[common co-occurrences (spurious correlations), 205]","[n02088632/n02088632_7257.JPEG, n02088632/n02088632_8103.JPEG, n02099712/n02099712_169.JPEG, n02112706/n02112706_3713.JPEG, n02110627/n02110627_11135.JPEG, n02088632/n02088632_8145.JPEG, n02090721/n02090721_1087.JPEG, n02097209/n02097209_3470.JPEG, n02094114/n02094114_2353.JPEG, n02097298/n02097298_8858.JPEG]","[beagle_hound_pointer_spaniel_setter, beagle_hound_pointer_spaniel_setter, retriever_like, toy_dog, pug_boxer_bulldog, toy_dog, hairy_shepherd_dog, terrier, beagle_hound_pointer_spaniel_setter, skinny_hound_rest, terrier, terrier, toy_dog, terrier, terrier]",Non-prototypical,Minor
10551,n02100583,ILSVRC2012_val_00004615.JPEG,,211,"vizsla, Hungarian pointer",[211],"[(vizsla, Hungarian pointer)]",159,Rhodesian ridgeback,"[same superclass (fine-grained), [beagle_hound_pointer_spaniel_setter], 211]","[n02087394/n02087394_18182.JPEG, n02100583/n02100583_12289.JPEG, n02087394/n02087394_29562.JPEG, n02087394/n02087394_10209.JPEG, n02087394/n02087394_26627.JPEG, n02087394/n02087394_4557.JPEG, n02087394/n02087394_10274.JPEG, n02087394/n02087394_1614.JPEG, n02100583/n02100583_9949.JPEG, n02100583/n02100583_3147.JPEG]","[beagle_hound_pointer_spaniel_setter, beagle_hound_pointer_spaniel_setter, beagle_hound_pointer_spaniel_setter, beagle_hound_pointer_spaniel_setter, beagle_hound_pointer_spaniel_setter, beagle_hound_pointer_spaniel_setter, beagle_hound_pointer_spaniel_setter, beagle_hound_pointer_spaniel_setter, beagle_hound_pointer_spaniel_setter, beagle_hound_pointer_spaniel_setter]",Non-prototypical,Borderline
11125,n02104029,ILSVRC2012_val_00027529.JPEG,,222,kuvasz,[222],[(kuvasz)],258,"Samoyed, Samoyede","[common co-occurrences (spurious correlations), 222]","[n02111889/n02111889_134.JPEG, n02111889/n02111889_4080.JPEG, n02111500/n02111500_7438.JPEG, n02111500/n02111500_46.JPEG, n02111889/n02111889_2097.JPEG, n02111889/n02111889_8655.JPEG, n02114548/n02114548_7694.JPEG, n02111500/n02111500_2155.JPEG, n02111500/n02111500_4783.JPEG, n02111889/n02111889_11451.JPEG]","[spitz, husky, spitz, husky, retriever_like, retriever_like, spitz, husky, spitz, husky, wolf_wild_dog_hyena_fox, retriever_like, retriever_like, spitz, husky]",Fine-grained error,Minor


# ====================

# Misc

In [10]:
# Filter samples where error classifications match:
df1 = get_mistakes_df('vit3b')
df1 = filter_mistakes(df1, classified_by_us=True, match_dough_bagel=True)

df2 = get_mistakes_df('greedysoups')
df2 = filter_mistakes(df2, classified_by_us=True, match_dough_bagel=True)

df_merged = pd.concat([df1, df2], ignore_index=True, sort=False)
assert df_merged.shape[0] == df1.shape[0] + df2.shape[0]

['Fine-grained error', 'Fine-grained error with OOV', 'Fine-grained with Multilabel Options', 'Non-prototypical', 'Spurious Correlation']


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_mistakes['DB_category'] = df_mistakes['file_name'].map(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_mistakes['DB_severity'] = df_mistakes['file_name'].map(


Model name: vit3b
All errors (Major + Minor) counts:
Number of mistakes: 378
                          DoughBagel same supercls (fine-gr)       fine-gr OOV (CLIP)                non-proto common co occ (spur corr)                    uncls              Total (row)
                  Fine-grained error                     191                       15                        0                       10                       25                      241
         Fine-grained error with OOV                       9                       20                        0                       11                       14                       54
Fine-grained with Multilabel Options                       1                        0                        0                        0                        0                        1
                    Non-prototypical                      13                        2                       12                        3                        0                  

                                                                                                                                                      

Model name: greedysoups
All errors (Major + Minor) counts:
Number of mistakes: 249
                          DoughBagel same supercls (fine-gr)       fine-gr OOV (CLIP)                non-proto common co occ (spur corr)                    uncls              Total (row)
                  Fine-grained error                     139                       14                        1                        7                       11                      172
         Fine-grained error with OOV                       4                        7                        0                        4                        2                       17
               Fine-grained with OOV                       3                        1                        0                        1                        4                        9
                    Non-prototypical                       8                        1                        8                        2                        0            

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_mistakes['DB_category'] = df_mistakes['file_name'].map(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_mistakes['DB_severity'] = df_mistakes['file_name'].map(


In [11]:
visualize_df(df_merged[ErrorClsTypes.filter_error_type(df_merged, ErrorClsTypes.OOV_detected_by_clip)])

Number of samples: 28


Unnamed: 0,wnet_id,file_name,img_rel_path,target,target_desc,multi_label,multi_desc,top1,top1_desc,error_types,clip_top10_train_files,clip_top10_supercls,DB_category,DB_severity
133,n02776631,ILSVRC2012_val_00023736.JPEG,,415,"bakery, bakeshop, bakehouse",[415],"[(bakery, bakeshop, bakehouse)]",529,"diaper, nappy, napkin","[potentially OOV, detected by CLIP, Top-5 alternative proposals: [(4357531, 'sunsuit', '13.21%'), (3122748, 'covering', '12.41%'), (3188531, 'diaper', '10.95%'), (3450734, 'gown', '9.08%'), (3051540, 'habiliment', '6.44%')] | out of 98]","[n02804414/n02804414_9212.JPEG, n02804414/n02804414_131.JPEG, n03188531/n03188531_3577.JPEG, n02804414/n02804414_9592.JPEG, n03188531/n03188531_4029.JPEG, n02804414/n02804414_594.JPEG, n02804414/n02804414_601.JPEG, n02804414/n02804414_8818.JPEG, n02834397/n02834397_21532.JPEG, n02776631/n02776631_78112.JPEG]","[baby_bed, baby_bed, n03188531, baby_bed, n03188531, baby_bed, baby_bed, baby_bed, fabric, public_place]",Fine-grained error with OOV,Minor
145,n02950826,ILSVRC2012_val_00027001.JPEG,,471,cannon,[471],[(cannon)],403,"aircraft carrier, carrier, flattop, attack aircraft carrier","[potentially OOV, detected by CLIP, Top-5 alternative proposals: [(3140900, 'cruiser', '11.40%'), (4552696, 'warship', '11.05%'), (2956393, 'capital ship', '10.06%'), (2687172, 'aircraft carrier', '9.59%'), (2812201, 'battleship', '8.21%')] | out of 55]","[n02687172/n02687172_10338.JPEG, n02687172/n02687172_16486.JPEG, n02687172/n02687172_19776.JPEG, n02687172/n02687172_54068.JPEG, n02687172/n02687172_1114.JPEG, n02687172/n02687172_33487.JPEG, n02687172/n02687172_95244.JPEG, n02687172/n02687172_26367.JPEG, n02687172/n02687172_24910.JPEG, n02687172/n02687172_188.JPEG]","[ship_boat, ship_boat, ship_boat, ship_boat, ship_boat, ship_boat, ship_boat, ship_boat, ship_boat, ship_boat]",Fine-grained error with OOV,Borderline
146,n02974003,ILSVRC2012_val_00011277.JPEG,,479,car wheel,"[479, 581]","[(car wheel), (grille, radiator grille)]",436,"beach wagon, station wagon, wagon, estate car, beach waggon, station waggon, waggon","[potentially OOV, detected by CLIP, Top-5 alternative proposals: [(3791235, 'automotive vehicle', '20.56%'), (3119396, 'coupe', '14.80%'), (3493219, 'hardtop', '10.02%'), (3141065, 'prowl car', '9.12%'), (2814533, 'estate car', '7.11%')] | out of 111]","[n02814533/n02814533_43575.JPEG, n02814533/n02814533_23903.JPEG, n02814533/n02814533_4687.JPEG, n02814533/n02814533_9479.JPEG, n02814533/n02814533_5357.JPEG, n02814533/n02814533_11051.JPEG, n02814533/n02814533_16978.JPEG, n02814533/n02814533_1015.JPEG, n02814533/n02814533_14754.JPEG, n02814533/n02814533_34500.JPEG]","[car_bus_truck, car_bus_truck, car_bus_truck, car_bus_truck, car_bus_truck, car_bus_truck, car_bus_truck, car_bus_truck, car_bus_truck, car_bus_truck]",Fine-grained error with OOV,Minor
147,n03000134,ILSVRC2012_val_00048435.JPEG,,489,chainlink fence,[489],[(chainlink fence)],517,crane,"[potentially OOV, detected by CLIP, Top-5 alternative proposals: [(3525074, 'hoist', '52.50%'), (3664675, 'lifting device', '46.31%'), (3126707, 'crane', '0.98%'), (3240683, 'drilling platform', '0.11%'), (3281145, 'lift', '0.08%')] | out of 10]","[n03126707/n03126707_5440.JPEG, n03126707/n03126707_4591.JPEG, n03126707/n03126707_16357.JPEG, n03126707/n03126707_14258.JPEG, n03126707/n03126707_10788.JPEG, n03532672/n03532672_26995.JPEG, n03126707/n03126707_14307.JPEG, n03126707/n03126707_16918.JPEG, n03126707/n03126707_15718.JPEG, n03126707/n03126707_12656.JPEG]","[construction_site, construction_site, construction_site, construction_site, construction_site, small_object, construction_site, construction_site, construction_site, construction_site]",Fine-grained error with OOV,Minor
148,n03014705,ILSVRC2012_val_00003447.JPEG,,492,chest,[492],[(chest)],362,badger,"[potentially OOV, detected by CLIP, Top-5 alternative proposals: [(2448318, 'ratel', '39.41%'), (2449183, 'carcajou', '31.67%'), (2138441, 'mierkat', '9.36%'), (2449699, 'grison', '4.49%'), (2449699, 'grison vittatus', '4.42%')] | out of 80]","[n04325704/n04325704_312.JPEG, n02441942/n02441942_11470.JPEG, n02120505/n02120505_4477.JPEG, n02447366/n02447366_4144.JPEG, n02112350/n02112350_8896.JPEG, n02120505/n02120505_5847.JPEG, n02447366/n02447366_18443.JPEG, n02447366/n02447366_35154.JPEG, n03404251/n03404251_15113.JPEG, n02445715/n02445715_5243.JPEG]","[woolen_pieces, viverrine_musteline_rodent_prototherian, wolf_wild_dog_hyena_fox, viverrine_musteline_rodent_prototherian, spitz, standard_shepherd_dog, husky, wolf_wild_dog_hyena_fox, viverrine_musteline_rodent_prototherian, viverrine_musteline_rodent_prototherian, robe_overgarment_cloak, viverrine_musteline_rodent_prototherian]",Fine-grained error with OOV,Minor
154,n03124170,ILSVRC2012_val_00002368.JPEG,,515,"cowboy hat, ten-gallon hat",[515],"[(cowboy hat, ten-gallon hat)]",38,banded gecko,"[potentially OOV, detected by CLIP, Top-5 alternative proposals: [(1674464, 'lizard', '26.47%'), (1674990, 'gecko', '19.98%'), (1661091, 'reptile', '13.95%'), (1675352, 'fringed gecko', '9.15%'), (1682714, 'american chameleon', '6.90%')] | out of 55]","[n03124170/n03124170_8375.JPEG, n02110958/n02110958_107.JPEG, n01687978/n01687978_8621.JPEG, n01688243/n01688243_3188.JPEG, n02085620/n02085620_1729.JPEG, n01688243/n01688243_1384.JPEG, n01688243/n01688243_1553.JPEG, n02094258/n02094258_1152.JPEG, n03124170/n03124170_13625.JPEG, n03124170/n03124170_3495.JPEG]","[hat, pug_boxer_bulldog, salamander_lizard, salamander_lizard, toy_dog, salamander_lizard, salamander_lizard, toy_dog, terrier, hat, hat]",Fine-grained error with OOV,Major
155,n03124170,ILSVRC2012_val_00022722.JPEG,,515,"cowboy hat, ten-gallon hat",[515],"[(cowboy hat, ten-gallon hat)]",678,neck brace,"[potentially OOV, detected by CLIP, Top-5 alternative proposals: [(4359589, 'support', '64.25%'), (4336034, 'strengthener', '16.77%'), (2768655, 'back brace', '8.56%'), (3814639, 'neck brace', '4.96%'), (4336034, 'reinforcement', '2.38%')] | out of 10]","[n03124170/n03124170_1985.JPEG, n02869837/n02869837_124.JPEG, n03759954/n03759954_10406.JPEG, n04584207/n04584207_3171.JPEG, n03814639/n03814639_12673.JPEG, n02963159/n02963159_19863.JPEG, n03124170/n03124170_7356.JPEG, n03124170/n03124170_1968.JPEG, n03124170/n03124170_4628.JPEG, n03661043/n03661043_10737.JPEG]","[hat, caps, hat, stick, n04584207, n03814639, woolen_pieces, sweater, hat, hat, hat, public_place]",Fine-grained error with OOV,Minor
157,n03180011,ILSVRC2012_val_00001246.JPEG,,527,desktop computer,"[526, 527]","[(desk), (desktop computer)]",332,"Angora, Angora rabbit","[potentially OOV, detected by CLIP, Top-5 alternative proposals: [(2324850, 'european rabbit', '18.58%'), (2324850, 'old world rabbit', '18.58%'), (2324587, 'bunny rabbit', '11.27%'), (2323449, 'lagomorph', '10.42%'), (2324587, 'bunny', '8.63%')] | out of 39]","[n02328150/n02328150_3509.JPEG, n03180011/n03180011_9847.JPEG, n04399382/n04399382_737.JPEG, n03793489/n03793489_11626.JPEG, n03642806/n03642806_13870.JPEG, n03793489/n03793489_5550.JPEG, n03793489/n03793489_9801.JPEG, n03832673/n03832673_11131.JPEG, n02123597/n02123597_3575.JPEG, n03085013/n03085013_25066.JPEG]","[rabbit, computer_monitor, n04399382, n03793489, computer_monitor, keyboard, n03793489, n03793489, computer_monitor, keyboard, domestic_cat, keyboard]",Fine-grained error with OOV,Borderline
182,n03942813,ILSVRC2012_val_00018814.JPEG,,722,ping-pong ball,[722],[(ping-pong ball)],517,crane,"[potentially OOV, detected by CLIP, Top-5 alternative proposals: [(3664675, 'lifting device', '53.38%'), (3525074, 'hoist', '24.81%'), (4585745, 'winch', '15.53%'), (3183080, 'device', '3.16%'), (3281145, 'lift', '1.18%')] | out of 10]","[n03942813/n03942813_88.JPEG, n03126707/n03126707_17328.JPEG, n03794056/n03794056_26832.JPEG, n03384352/n03384352_12213.JPEG, n03532672/n03532672_23127.JPEG, n03942813/n03942813_6815.JPEG, n03794056/n03794056_14945.JPEG, n03794056/n03794056_2349.JPEG, n03794056/n03794056_17759.JPEG, n03794056/n03794056_2553.JPEG]","[game_ball, construction_site, small_object, work_cart, small_object, game_ball, small_object, small_object, small_object, small_object]",Fine-grained error with OOV,Major
186,n03991062,ILSVRC2012_val_00012647.JPEG,,738,"pot, flowerpot",[738],"[(pot, flowerpot)]",998,"ear, spike, capitulum","[potentially OOV, detected by CLIP, Top-5 alternative proposals: [(13087625, 'plant organ', '20.80%'), (12144580, 'corn', '20.47%'), (12162758, 'prairie gourd', '11.48%'), (13137672, 'syncarp', '10.79%'), (13086908, 'plant part', '8.66%')] | out of 51]","[n03991062/n03991062_6249.JPEG, n03457902/n03457902_21891.JPEG, n13133613/n13133613_40199.JPEG, n03991062/n03991062_15786.JPEG, n03457902/n03457902_24704.JPEG, n03457902/n03457902_25278.JPEG, n03461385/n03461385_58896.JPEG, n03991062/n03991062_2131.JPEG, n03991062/n03991062_10917.JPEG, n03457902/n03457902_107229.JPEG]","[bucket_pot, housing_farming_structure, corn, bucket_pot, housing_farming_structure, housing_farming_structure, public_place, bucket_pot, bucket_pot, housing_farming_structure]",Fine-grained error with OOV,Major


In [12]:
eval_manager.debug_oov('ILSVRC2012_val_00008164.JPEG', df=df_merged)

Filtered: 2 rows, debugging only the first one

img_rel_path: n09256479/ILSVRC2012_val_00008164.JPEG
target: 973, multi_label: [973], multi_desc: ['(coral reef)']
top1: 392, top1_desc: rock beauty, Holocanthus tricolor
['potentially OOV, detected by CLIP', "Top-5 alternative proposals: [(2605316, 'butterfly fish', '70.75%'), (2605936, 'angelfish', '23.69%'), (2605703, 'chaetodon', '3.75%'), (1443537, 'carassius auratus', '0.43%'), (2552171, 'spiny-finned fish', '0.27%')] | out of 52"]

clip_top10_train_files:
n02606052/n02606052_22953.JPEG
n02606052/n02606052_448.JPEG
n02606052/n02606052_20464.JPEG
n02606052/n02606052_22729.JPEG
n02606052/n02606052_12701.JPEG
n02606052/n02606052_1622.JPEG
n02606052/n02606052_1630.JPEG
n02606052/n02606052_5444.JPEG
n02606052/n02606052_5931.JPEG
n02606052/n02606052_5016.JPEG

clip_top10_supercls: ['fish_rest', 'fish_rest', 'fish_rest', 'fish_rest', 'fish_rest', 'fish_rest', 'fish_rest', 'fish_rest', 'fish_rest', 'fish_rest']
top-1 prediction superclasses

In [13]:
vit3b_df = eval_manager.prepare_and_eval_model('vit3b')
vit3b_df = vit3b_df[~vit3b_df['is_problematic']]
vit3b_df = vit3b_df[~ErrorClsTypes.filter_error_type(vit3b_df, ErrorClsTypes.correct)]
# Sort by number of multi labels:
vit3b_df['num_labels'] = vit3b_df['multi_label'].map(lambda multi_label: len(multi_label))
vit3b_df = vit3b_df.sort_values(by=['num_labels'], ascending=False)

                                                                                                                                                      

In [14]:
visualize_df(vit3b_df[ErrorClsTypes.filter_error_type(vit3b_df, ErrorClsTypes.correct_multi_label)][20:40])

Number of samples: 20


Unnamed: 0,wnet_id,file_name,img_rel_path,target,target_desc,multi_label,multi_desc,top1,top1_desc,error_types,clip_top10_train_files,clip_top10_supercls
22429,n02843684,ILSVRC2012_val_00025083.JPEG,,448,birdhouse,"[448, 494, 733, 682, 619]","[(birdhouse), (chime, bell, gong), (pole), (obelisk), (lampshade, lamp shade)]",682,obelisk,[correct wrt multi label (ambiguous/multi-object)],"[n02843684/n02843684_16828.JPEG, n03930313/n03930313_6415.JPEG, n02843684/n02843684_3190.JPEG, n02843684/n02843684_2419.JPEG, n03930313/n03930313_2215.JPEG, n04604644/n04604644_2504.JPEG, n03930313/n03930313_20122.JPEG, n04049303/n04049303_9143.JPEG, n03930313/n03930313_13876.JPEG, n04326547/n04326547_2319.JPEG]","[box, patterned_structure, box, box, patterned_structure, patterned_structure, patterned_structure, barrel_can, patterned_structure, patterned_structure]"
35948,n03933933,ILSVRC2012_val_00049055.JPEG,,718,pier,"[536, 718, 977, 978, 449]","[(dock, dockage, docking facility), (pier), (sandbar, sand bar), (seashore, coast, seacoast, sea-coast), (boathouse)]",977,"sandbar, sand bar",[correct wrt multi label (ambiguous/multi-object)],"[n02894605/n02894605_52762.JPEG, n09428293/n09428293_42263.JPEG, n04273569/n04273569_9237.JPEG, n09428293/n09428293_48465.JPEG, n03947888/n03947888_45822.JPEG, n09421951/n09421951_10629.JPEG, n09399592/n09399592_31399.JPEG, n09246464/n09246464_18384.JPEG, n04273569/n04273569_3429.JPEG, n02981792/n02981792_2103.JPEG]","[structure_near_water, geological_formation, ship_boat, geological_formation, ship_boat, geological_formation, geological_formation, geological_formation, ship_boat, ship_boat]"
39101,n04152593,ILSVRC2012_val_00001526.JPEG,,782,"screen, CRT screen","[526, 527, 664, 673, 782]","[(desk), (desktop computer), (monitor), (mouse, computer mouse), (screen, CRT screen)]",527,desktop computer,[correct wrt multi label (ambiguous/multi-object)],"[n03180011/n03180011_7130.JPEG, n03179701/n03179701_7925.JPEG, n03782006/n03782006_16837.JPEG, n03691459/n03691459_65205.JPEG, n03642806/n03642806_21377.JPEG, n03180011/n03180011_10477.JPEG, n03180011/n03180011_10047.JPEG, n03179701/n03179701_3636.JPEG, n03180011/n03180011_5023.JPEG, n03180011/n03180011_10065.JPEG]","[computer_monitor, table, computer_monitor, big_electronic_equipment, computer_monitor, keyboard, computer_monitor, computer_monitor, table, computer_monitor, computer_monitor]"
24253,n02988304,ILSVRC2012_val_00007354.JPEG,,485,CD player,"[482, 485, 632, 754, 848]","[(cassette player), (CD player), (loudspeaker, speaker, speaker unit, loudspeaker system, speaker system), (radio, wireless), (tape player)]",848,tape player,[correct wrt multi label (ambiguous/multi-object)],"[n04041544/n04041544_32914.JPEG, n04392985/n04392985_11759.JPEG, n02988304/n02988304_31837.JPEG, n04392985/n04392985_14652.JPEG, n02979186/n02979186_2186.JPEG, n04392985/n04392985_6966.JPEG, n04392985/n04392985_6789.JPEG, n02978881/n02978881_23098.JPEG, n04041544/n04041544_13206.JPEG, n02988304/n02988304_13207.JPEG]","[radio_modem, player, player, player, player, player, player, player, small_electronic_equipment, radio_modem, player, player]"
24288,n02988304,ILSVRC2012_val_00039422.JPEG,,485,CD player,"[482, 485, 754, 848, 598]","[(cassette player), (CD player), (radio, wireless), (tape player), (home theater, home theatre)]",598,"home theater, home theatre",[correct wrt multi label (ambiguous/multi-object)],"[n02988304/n02988304_8677.JPEG, n03529860/n03529860_10497.JPEG, n04404412/n04404412_1270.JPEG, n04392985/n04392985_6789.JPEG, n03529860/n03529860_10982.JPEG, n04404412/n04404412_33514.JPEG, n03529860/n03529860_5121.JPEG, n04041544/n04041544_36643.JPEG, n03290653/n03290653_5641.JPEG, n03529860/n03529860_8690.JPEG]","[player, entertainment_center_home_theatre, computer_monitor, player, entertainment_center_home_theatre, computer_monitor, entertainment_center_home_theatre, radio_modem, player, cabinet, entertainment_center_home_theatre, entertainment_center_home_theatre]"
24292,n02988304,ILSVRC2012_val_00043295.JPEG,,485,CD player,"[482, 485, 632, 754, 848]","[(cassette player), (CD player), (loudspeaker, speaker, speaker unit, loudspeaker system, speaker system), (radio, wireless), (tape player)]",848,tape player,[correct wrt multi label (ambiguous/multi-object)],"[n02979186/n02979186_2186.JPEG, n04392985/n04392985_6966.JPEG, n02979186/n02979186_9910.JPEG, n02979186/n02979186_6049.JPEG, n02988304/n02988304_6355.JPEG, n02979186/n02979186_4344.JPEG, n04392985/n04392985_3197.JPEG, n02979186/n02979186_5994.JPEG, n02988304/n02988304_6292.JPEG, n02979186/n02979186_12375.JPEG]","[player, player, player, player, player, player, player, player, player, player]"
38112,n04081281,ILSVRC2012_val_00015051.JPEG,,762,"restaurant, eating house, eating place, eatery","[659, 762, 809, 923, 925]","[(mixing bowl), (restaurant, eating house, eating place, eatery), (soup bowl), (plate), (consomme)]",925,consomme,[correct wrt multi label (ambiguous/multi-object)],"[n07584110/n07584110_1995.JPEG, n04263257/n04263257_5500.JPEG, n04263257/n04263257_6932.JPEG, n07584110/n07584110_5680.JPEG, n04263257/n04263257_3351.JPEG, n07711569/n07711569_484.JPEG, n04263257/n04263257_1072.JPEG, n07584110/n07584110_4784.JPEG, n07584110/n07584110_10454.JPEG, n04263257/n04263257_6831.JPEG]","[dish, utensils, dish, utensils, dish, dish, utensils, dish, dish, utensils, dish, dish, dish, utensils, dish]"
17963,n02443484,ILSVRC2012_val_00014852.JPEG,,359,"black-footed ferret, ferret, Mustela nigripes","[359, 740, 356, 357, 358]","[(black-footed ferret, ferret, Mustela nigripes), (power drill), (weasel), (mink), (polecat, fitch, foulmart, foumart, Mustela putorius)]",740,power drill,[correct wrt multi label (ambiguous/multi-object)],"[n02441942/n02441942_23103.JPEG, n02443114/n02443114_2619.JPEG, n02441942/n02441942_3182.JPEG, n02443484/n02443484_10430.JPEG, n02443484/n02443484_14246.JPEG, n02443484/n02443484_7310.JPEG, n02443484/n02443484_14694.JPEG, n02443484/n02443484_10248.JPEG, n02443484/n02443484_14428.JPEG, n02443484/n02443484_2055.JPEG]","[viverrine_musteline_rodent_prototherian, viverrine_musteline_rodent_prototherian, viverrine_musteline_rodent_prototherian, viverrine_musteline_rodent_prototherian, viverrine_musteline_rodent_prototherian, viverrine_musteline_rodent_prototherian, viverrine_musteline_rodent_prototherian, viverrine_musteline_rodent_prototherian, viverrine_musteline_rodent_prototherian, viverrine_musteline_rodent_prototherian]"
33216,n03782006,ILSVRC2012_val_00015161.JPEG,,664,monitor,"[526, 527, 664, 673, 782]","[(desk), (desktop computer), (monitor), (mouse, computer mouse), (screen, CRT screen)]",527,desktop computer,[correct wrt multi label (ambiguous/multi-object)],"[n03782006/n03782006_12764.JPEG, n03782006/n03782006_35437.JPEG, n03180011/n03180011_7667.JPEG, n04152593/n04152593_13203.JPEG, n04152593/n04152593_81048.JPEG, n03782006/n03782006_33539.JPEG, n04152593/n04152593_65766.JPEG, n03782006/n03782006_7861.JPEG, n04041544/n04041544_6169.JPEG, n03782006/n03782006_14303.JPEG]","[computer_monitor, computer_monitor, computer_monitor, computer_monitor, computer_monitor, computer_monitor, computer_monitor, computer_monitor, radio_modem, player, computer_monitor]"
22293,n02837789,ILSVRC2012_val_00036936.JPEG,,445,"bikini, two-piece","[445, 775, 836, 837]","[(bikini, two-piece), (sarong), (sunglass), (sunglasses, dark glasses, shades)]",836,sunglass,[correct wrt multi label (ambiguous/multi-object)],"[n02837789/n02837789_26284.JPEG, n02837789/n02837789_763.JPEG, n02837789/n02837789_14197.JPEG, n02837789/n02837789_33449.JPEG, n02837789/n02837789_16240.JPEG, n02837789/n02837789_33136.JPEG, n02837789/n02837789_13808.JPEG, n03710637/n03710637_6301.JPEG, n02837789/n02837789_12308.JPEG, n04355933/n04355933_980.JPEG]","[swimsuit, swimsuit, swimsuit, swimsuit, swimsuit, swimsuit, swimsuit, swimsuit, swimsuit, sunglass]"
