## A notebook to check the performance of OVIS on COCO novel class

In [1]:
%matplotlib inline

## Part 1: The the class embeddings to match the saved embeddings

In [2]:
from pathlib import Path
import mmcv
import sys
import torch



In [3]:
BETRAY_PATH = "/jupyter-users-home/tan-2enguyen/betrayed-by-captions"

In [4]:
if BETRAY_PATH not in sys.path:
    sys.path.append(BETRAY_PATH)

In [5]:
known_class_file = '../datasets/unknown/known_65.txt'
class_emb_file = '../datasets/embeddings/coco_class_with_bert_emb.json'

In [6]:
file_client = mmcv.FileClient()

In [7]:
all_known_classes = file_client.get_text(known_class_file).split('\n')

In [8]:
import transformers
from open_set.models.utils.bert_embeddings import BertEmbeddings, BERT_MODEL_BY_EMBEDDING_TYPES

  from .autonotebook import tqdm as notebook_tqdm


In [9]:
emb_type = 'bert'
tokenizer = transformers.BertTokenizer.from_pretrained(BERT_MODEL_BY_EMBEDDING_TYPES[emb_type])
bert_embeddings = BertEmbeddings(
    bert_model=transformers.AutoModel.from_pretrained(BERT_MODEL_BY_EMBEDDING_TYPES[emb_type]).eval(),
)

for param in bert_embeddings.parameters():
    param.requires_grad = False

In [10]:
token_counts_by_name = {name: len(tokenizer.encode(name, add_special_tokens=False)) for name in all_known_classes} 

In [11]:
token_counts_by_name

{'person': 1,
 'bicycle': 1,
 'car': 1,
 'motorcycle': 1,
 'truck': 1,
 'boat': 1,
 'bench': 1,
 'bird': 1,
 'horse': 1,
 'sheep': 1,
 'zebra': 1,
 'giraffe': 3,
 'backpack': 1,
 'handbag': 2,
 'skis': 2,
 'kite': 1,
 'surfboard': 2,
 'bottle': 1,
 'spoon': 1,
 'bowl': 1,
 'banana': 1,
 'apple': 1,
 'orange': 1,
 'broccoli': 3,
 'carrot': 1,
 'pizza': 1,
 'donut': 2,
 'chair': 1,
 'bed': 1,
 'tv': 1,
 'laptop': 1,
 'remote': 1,
 'microwave': 1,
 'oven': 1,
 'refrigerator': 1,
 'book': 1,
 'clock': 1,
 'vase': 1,
 'toothbrush': 2,
 'train': 1,
 'bear': 1,
 'suitcase': 1,
 'frisbee': 3,
 'fork': 1,
 'sandwich': 1,
 'toilet': 1,
 'mouse': 1,
 'toaster': 2,
 'bus': 1,
 'dog': 1,
 'cow': 1,
 'elephant': 1,
 'umbrella': 1,
 'tie': 1,
 'skateboard': 2,
 'cup': 1,
 'knife': 1,
 'cake': 1,
 'couch': 1,
 'keyboard': 1,
 'sink': 1,
 'scissors': 1,
 'airplane': 1,
 'cat': 1,
 'snowboard': 2}

In [12]:
tokenizer.encode('donut',  add_special_tokens=False)

[2123, 4904]

In [13]:
bar = "hat man donut".split(" ")[0]

In [14]:
tokenizer.encode(bar,  add_special_tokens=False)

[6045]

In [1]:
embs_by_name = {
    name: bert_embeddings.calculate_word_embeddings(
        torch.tensor(tokenizer.encode(name, add_special_tokens=False))  # The [] is key, note that the tokenizing of ['fristbee'] and 'fristbee' have very different lengths
        ).mean(dim=0) for name in all_known_classes
    }

NameError: name 'all_known_classes' is not defined

In [16]:
gt_embs_by_name = {x['name']: torch.tensor(x['emb']) for x in mmcv.load(class_emb_file) if x['name'] in embs_by_name}

In [17]:
error_by_name = {k: torch.norm(v - gt_embs_by_name[k], p='fro') for k, v in embs_by_name.items()}

In [18]:
# Correct values should have error close to 0.
error_by_name

{'person': tensor(0.),
 'bicycle': tensor(0.),
 'car': tensor(0.),
 'motorcycle': tensor(1.7268e-06),
 'truck': tensor(3.3101e-06),
 'boat': tensor(1.6852e-06),
 'bench': tensor(0.),
 'bird': tensor(0.),
 'horse': tensor(0.),
 'sheep': tensor(0.),
 'zebra': tensor(0.),
 'giraffe': tensor(1.1234e-06),
 'backpack': tensor(0.),
 'handbag': tensor(0.),
 'skis': tensor(0.),
 'kite': tensor(0.),
 'surfboard': tensor(0.),
 'bottle': tensor(0.),
 'spoon': tensor(3.8716e-06),
 'bowl': tensor(1.7392e-06),
 'banana': tensor(0.),
 'apple': tensor(0.),
 'orange': tensor(1.6809e-06),
 'broccoli': tensor(1.0401e-06),
 'carrot': tensor(0.),
 'pizza': tensor(1.7574e-06),
 'donut': tensor(1.0273e-06),
 'chair': tensor(0.),
 'bed': tensor(0.),
 'tv': tensor(0.),
 'laptop': tensor(0.),
 'remote': tensor(0.),
 'microwave': tensor(0.),
 'oven': tensor(1.6289e-06),
 'refrigerator': tensor(1.7703e-06),
 'book': tensor(0.),
 'clock': tensor(0.),
 'vase': tensor(0.),
 'toothbrush': tensor(0.),
 'train': tensor(

In [19]:
embs_by_name['skateboard'][:10]

tensor([ 0.0898, -1.2682, -0.0260,  0.8309, -0.1939,  1.1347, -0.3817,  0.0976,
         0.1264, -0.4558])

In [21]:
gt_embs_by_name['skateboard'][:10]

tensor([ 0.0898, -1.2682, -0.0260,  0.8309, -0.1939,  1.1347, -0.3817,  0.0976,
         0.1264, -0.4558])

# Test the tokenization with a list vs. a string

In [72]:
# Wrong behavior
tokenizer.encode(["donut", "paper"], add_special_tokens=False)

[100, 3259]

In [69]:
# Correct behavior
tokenizer.encode("donut paper", add_special_tokens=False)

[2123, 4904, 3259]

In [70]:
tokenizer.encode("donut", add_special_tokens=False)

[2123, 4904]

In [71]:
tokenizer.encode("paper", add_special_tokens=False)

[3259]