# Installing transformers package (and restarting the runtime)

In [1]:
!pip install transformers[torch]

Collecting accelerate>=0.20.3 (from transformers[torch])
  Downloading accelerate-0.25.0-py3-none-any.whl (265 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m265.7/265.7 kB[0m [31m5.7 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: accelerate
Successfully installed accelerate-0.25.0


In [None]:
# Restart Runtime
import os
os.kill(os.getpid(), 9)

# Source files

In [1]:
folder_path = "/content/drive/MyDrive/PhD/OOD_VQA/Exps"
%cd $folder_path

/content/drive/MyDrive/PhD/OOD_VQA/Exps


In [2]:
!pwd

/content/drive/MyDrive/PhD/OOD_VQA/Exps


# Import

In [3]:
import numpy as np
import matplotlib.pyplot as plt
from textwrap import wrap
import math
import pandas as pd
from tqdm import tqdm
import os
import random
from utils import *

# Test

In [None]:
# @title Image details { form-width: "70%" }
object_name = "cat" # @param ["dog", "cat", "horse", "cow", "duck", "unknown object"]
object_color = "green" # @param {type:"string"}
bg_type = "house" # @param ["house"]
bg_color = "white" # @param {type:"string"}

gauss = "None" # @param ["None", "Gaussian"]
g_mean = "0.5" # @param {type:"string"}
g_std = "0.5" # @param {type:"string"}

image_name = f"{bg_color}_{bg_type}_{object_color}_{object_name}"
if gauss=="Gaussian": image_name=f"{gauss}_{g_mean[2:]}_{g_std[2:]}"

image_file_path = f"./instances/{image_name}.png"

if not os.path.isfile(image_file_path):
  print("Image is not available!")
else:
  print(f"Loading {image_name}...")
  image = plt.imread(image_file_path)[:,:,:3]
  image = rescale_image(image)
  print("image loading is done!")

Loading white_house_green_cat...
image loading is done!


## Load model

In [None]:
# @title Model details { form-width: "70%" }
model_type = "VQA" # @param ["VQA", "IC"]
model_name = "Vilt" # @param ["GiT_base", "GiT_large", "Blip_base", "Blip_large", "Vilt"]

if model_type=="VQA":
  from vqa_class import *
elif model_type=="IC":
  from ic_class import *

models_chkpts = {
    "VQA": {
        "GiT_base"  : {"ckpt": "microsoft/git-base-vqav2", "class":GiT},
        "GiT_large" : {"ckpt": "microsoft/git-large-vqav2", "class":GiT},
        "Blip_base" : {"ckpt": "Salesforce/blip-vqa-base", "class":Blip},
        "Blip_large": {"ckpt": "Salesforce/blip-vqa-capfilt-large", "class":Blip},
        "Vilt"      : {"ckpt": "dandelin/vilt-b32-finetuned-vqa", "class":Vilt}
    },
    "IC": {
        "None"      : {"ckpt": "", "class":None}
    }
}

print(f"Loading {model_name} model for {model_type}...")
model_class = models_chkpts[model_type][model_name]["class"]
checkpoint = models_chkpts[model_type][model_name]["ckpt"]
model = model_class(checkpoint)
print("model loading is done!")

Loading Vilt model for VQA...
model loading is done!


## Load questions and scores

In [None]:
# in the image variables
object_1="unknown object" if "unknown" in object_name else object_name
color_1 = object_color
object_2 = bg_type
color_2 = bg_color


# not in the image variables
object_3= "dragon"

colors_list = ["green", "red", "pink", "brown"]
if color_1 in colors_list: colors_list.remove(color_1)
if color_2 in colors_list: colors_list.remove(color_2)
color_3 = random.choice(colors_list)
colors_list.remove(color_3)

object_4 = "train"

color_4 = random.choice(colors_list)

objects_dict = {
    "object1"  : object_1, "color1"  : color_1,
    "object2"  : object_2, "color2"  : color_2,
    "object3"  : object_3, "color3"  : color_3,
    "object4"  : object_4, "color4"  : color_4,
}

In [None]:
qsts_file_path = "./qsts_original.xlsx"
qsts_sheet = pd.read_excel(qsts_file_path).astype(str)
qsts_sheet.head()

Unnamed: 0,image_type,used_language,specificity,question_length,complexity,image_relatedness,question,answer_gt
0,0,0,0,0.0,0.0,0.0,what is the color1 object?,object1
1,0,0,0,0.0,0.0,0.0,what is in the image with the object1?,object2+sky
2,0,0,0,0.0,0.0,0.0,what is in the image with the object2?,object1+sky
3,0,0,0,0.0,0.0,0.0,what is the object1 color?,color1
4,0,0,0,0.0,0.0,0.0,what is the object2 color?,color2


In [None]:
if gauss=="Gaussian":
  qsts_sheet=qsts_sheet.loc[qsts_sheet['image_type'] == "1"] # gauss images
else:
  qsts_sheet=qsts_sheet.loc[qsts_sheet['image_type'] == "0"] # normal images
qsts_sheet.head()

Unnamed: 0,image_type,used_language,specificity,question_length,complexity,image_relatedness,question,answer_gt
0,0,0,0,0.0,0.0,0.0,what is the color1 object?,object1
1,0,0,0,0.0,0.0,0.0,what is in the image with the object1?,object2+sky
2,0,0,0,0.0,0.0,0.0,what is in the image with the object2?,object1+sky
3,0,0,0,0.0,0.0,0.0,what is the object1 color?,color1
4,0,0,0,0.0,0.0,0.0,what is the object2 color?,color2


In [None]:
questions = qsts_sheet['question'].values
questions = fill_placeholders(questions, objects_dict)

answer_gt = qsts_sheet['answer_gt'].values
answer_gt = fill_placeholders(answer_gt, objects_dict)

In [None]:
outputs, answers, logits, correct, no_tokens = list(), list(), list(), list(), list()

for i in tqdm(range(len(questions))):
  qst = [questions[i]]
  output = model.generate_outputs([image], qst)[0] # only one item per cycle
  outputs.append(output)
  answer = model.decode_output(output)
  if model_name!="Vilt": answer=answer[0]
  answer = answer.replace('\n', ' ').strip()
  if qst[0] in answer: answer=answer.replace(qst[0],"").strip()
  answers.append(answer)
  logit = model.get_logits(output)
  no_tokens.append(len(logit))
  logits.append(logit)
  eval = 0
  print(i)
  for gt in answer_gt[i].split("+"):
    if gt in answer:
      eval = 1
      break
  correct.append(eval)

In [None]:
results_path = f"./test_results/{model_type}/{model_name}/"
if not os.path.isdir(results_path): os.makedirs(results_path)

In [None]:
results_path = results_path + f"{model_type}_{model_name}_{image_name}"

np.save(results_path, np.array(logits))

data={"image_type":gauss} if gauss=="Gaussian" else objects_dict

criteria = ["image_type", "used_language", "specificity", "question_length", "complexity", "image_relatedness"]
criteria_cols = dict()
for c in criteria:
  criteria_cols[c] = qsts_sheet[c].values

df = pd.DataFrame(data=data, index=[0])
df = (df.T)
df.to_excel(results_path+".xlsx", sheet_name="details")

with pd.ExcelWriter(results_path+".xlsx", mode='a') as writer:
  df = pd.DataFrame()
  for c in criteria:
    df[c] = criteria_cols[c]
  df['question'] = questions
  df['answer_gt'] = answer_gt
  df["answer"] = answers
  df["correct"] = correct
  df["no_tokens"] = no_tokens
  s = pd.Series([i+1 for i in range(len(questions))])
  df.set_index(s, inplace=True)
  df.to_excel(writer, sheet_name="answers")
  # for i, logit in tqdm(enumerate(logits)):
  #   df = pd.DataFrame(np.array(logit).T.tolist(), columns=[str(i) for i in range(len(logit))])
  #   df.to_excel(writer, sheet_name=f"logits_{i+1}", index=False)

# Test loop

In [None]:
# image_names = ["Gaussian_5_2", "Gaussian_5_3", "Gaussian_5_4", "Gaussian_5_5",
#                "white_house_brown_cat", "white_house_brown_cow", "white_house_brown_dog", "white_house_brown_horse",
#                "white_house_green_cat", "white_house_green_unknown",
#                "white_house_pink_cow", "white_house_pink_unknown",
#                "white_house_purple_dog", "white_house_purple_horse",
#                "white_house_red_duck", "white_house_red_unknown",
#                "white_house_yellow_unknown"]

In [None]:
# # @title Model details { form-width: "70%" }
# model_type = "VQA" # @param ["VQA", "IC"]
# model_name = "GiT_large" # @param ["GiT_base", "GiT_large", "Blip_base", "Blip_large", "Vilt"]

# if model_type=="VQA":
#   from vqa_class import *
# elif model_type=="IC":
#   from ic_class import *

# models_chkpts = {
#     "VQA": {
#         "GiT_base"  : {"ckpt": "microsoft/git-base-vqav2", "class":GiT},
#         "GiT_large" : {"ckpt": "microsoft/git-large-vqav2", "class":GiT},
#         "Blip_base" : {"ckpt": "Salesforce/blip-vqa-base", "class":Blip},
#         "Blip_large": {"ckpt": "Salesforce/blip-vqa-capfilt-large", "class":Blip},
#         "Vilt"      : {"ckpt": "dandelin/vilt-b32-finetuned-vqa", "class":Vilt}
#     },
#     "IC": {
#         "None"      : {"ckpt": "", "class":None}
#     }
# }

# print(f"Loading {model_name} model for {model_type}...")
# model_class = models_chkpts[model_type][model_name]["class"]
# checkpoint = models_chkpts[model_type][model_name]["ckpt"]
# model = model_class(checkpoint)
# print("model loading is done!")

In [None]:
# Done_list = list()

# for image_name in tqdm(image_names):

#   image_file_path = f"./instances/{image_name}.png"

#   if not os.path.isfile(image_file_path):
#     print("\nImage is not available!")
#     break
#   else:
#     print(f"\nLoading {image_name}...")
#     image = plt.imread(image_file_path)[:,:,:3]
#     image = rescale_image(image)
#     print("image loading is done!")

#   color_2, object_2 = "white", "house"
#   color_1, object_1 = "unknown object", "red"

#   qsts_file_path = "./qsts_original.xlsx"
#   qsts_sheet = pd.read_excel(qsts_file_path).astype(str)

#   if "Gaussian" in image_name:
#     gauss = "Gaussian"
#     qsts_sheet=qsts_sheet.loc[qsts_sheet['image_type'] == "1"] # gauss images

#   else:
#     gauss = "None"
#     qsts_sheet=qsts_sheet.loc[qsts_sheet['image_type'] == "0"] # normal images

#     color_2, object_2, color_1, object_1 = image_name.split("_")

#   # not in the image variables
#   object_3= "dragon"
#   colors_list = ["green", "red", "pink", "brown"]
#   if color_1 in colors_list: colors_list.remove(color_1)
#   if color_2 in colors_list: colors_list.remove(color_2)
#   color_3 = random.choice(colors_list)
#   colors_list.remove(color_3)
#   object_4 = "train"
#   color_4 = random.choice(colors_list)

#   objects_dict = {
#     "object1"  : object_1, "color1"  : color_1,
#     "object2"  : object_2, "color2"  : color_2,
#     "object3"  : object_3, "color3"  : color_3,
#     "object4"  : object_4, "color4"  : color_4,
#   }

#   questions = qsts_sheet['question'].values
#   questions = fill_placeholders(questions, objects_dict)

#   answer_gt = qsts_sheet['answer_gt'].values
#   answer_gt = fill_placeholders(answer_gt, objects_dict)

#   criteria = ["image_type", "used_language", "specificity", "question_length", "complexity", "image_relatedness"]
#   criteria_cols = dict()
#   for c in criteria:
#     criteria_cols[c] = qsts_sheet[c].values

#   outputs, answers, logits, correct, no_tokens = list(), list(), list(), list(), list()

#   for i in range(len(questions)):
#     qst = [questions[i]]
#     output = model.generate_outputs([image], qst)[0] # only one item per cycle
#     # outputs.append(output)
#     answer = model.decode_output(output)
#     if model_name!="Vilt": answer=answer[0]
#     answer = answer.replace('\n', ' ').strip()
#     qst_text = qst[0][:-1] # no qst mark
#     if qst_text.lower() in answer: answer=answer.replace(qst[0].lower(),"").strip('?').strip() # remove '?' and empty space
#     answers.append(answer)
#     logit = model.get_logits(output)
#     logits.append(logit)
#     no_tokens.append(len(logit))
#     eval = 0
#       for gt in answer_gt[i].split("+"):
#         if gt in answer:
#           eval = 1
#           break
#     correct.append(eval)
#     print(f"{i},", end=" ")

#   results_path = f"./test_results/{model_type}/{model_name}/"
#   if not os.path.isdir(results_path): os.makedirs(results_path)
#   results_path = results_path + f"{model_type}_{model_name}_{image_name}"
#   np.save(results_path, np.array(logits))
#   data={"image_type":gauss} if gauss=="Gaussian" else objects_dict

#   df = pd.DataFrame(data=data, index=[0])
#   df = (df.T)
#   df.to_excel(results_path+".xlsx", sheet_name="details")

#   with pd.ExcelWriter(results_path+".xlsx", mode='a') as writer:
#     df = pd.DataFrame()
#     for c in criteria:
#       df[c] = criteria_cols[c]
#     df['question'] = questions
#     df['answer_gt'] = answer_gt
#     df["answer"] = answers
#     df["correct"] = correct
#     df["no_tokens"] = no_tokens
#     s = pd.Series([i+1 for i in range(len(questions))])
#     df.set_index(s, inplace=True)
#     df.to_excel(writer, sheet_name="answers")

#   Done_list.append(image_name)
#   print("\nDone:::\n", Done_list)

In [None]:
image_names = ["Gaussian_5_2", "Gaussian_5_3", "Gaussian_5_4", "Gaussian_5_5",  # 0, 1, 2, 3,
               "white_house_brown_cat", "white_house_brown_cow", "white_house_brown_dog", "white_house_brown_horse", # 4, 5, 6, 7,
               "white_house_green_cat", "white_house_green_unknown", # 8, 9,
               "white_house_pink_cow", "white_house_pink_unknown", # 10, 11,
               "white_house_purple_dog", "white_house_purple_horse", # 12, 13
               "white_house_red_duck", "white_house_red_unknown", # 14, 15
               "white_house_yellow_unknown"] # 16

model_names = ["Vilt", "Blip_base", "Blip_large", "GiT_base", "GiT_large"]
model_type = "VQA"

if model_type=="VQA":
  from vqa_class import *
elif model_type=="IC":
  from ic_class import *

models_chkpts = {
    "VQA": {
        "GiT_base"  : {"ckpt": "microsoft/git-base-vqav2", "class":GiT},
        "GiT_large" : {"ckpt": "microsoft/git-large-vqav2", "class":GiT},
        "Blip_base" : {"ckpt": "Salesforce/blip-vqa-base", "class":Blip},
        "Blip_large": {"ckpt": "Salesforce/blip-vqa-capfilt-large", "class":Blip},
        "Vilt"      : {"ckpt": "dandelin/vilt-b32-finetuned-vqa", "class":Vilt}
    },
    "IC": {
        "None"      : {"ckpt": "", "class":None}
    }
}

for model_name in model_names[2:3]:
  print("\n\n")
  print(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>")
  print(f"{model_name} STARTS!!!")
  print(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>")
  print("\n\n")

  print(f"Loading {model_name} model for {model_type}...")
  model_class = models_chkpts[model_type][model_name]["class"]
  checkpoint = models_chkpts[model_type][model_name]["ckpt"]
  model = model_class(checkpoint)
  print("model loading is done!")

  Done_list = list()

  for image_name in tqdm(image_names): ######################

    image_file_path = f"./instances/{image_name}.png"

    if not os.path.isfile(image_file_path):
      print("\nImage is not available!")
      break
    else:
      print(f"\nLoading {image_name}...")
      image = plt.imread(image_file_path)[:,:,:3]
      image = rescale_image(image)
      print("image loading is done!")

    color_2, object_2 = "white", "house"
    color_1, object_1 = "red", "unknown object"

    qsts_file_path = "./qsts_original.xlsx"
    qsts_sheet = pd.read_excel(qsts_file_path).astype(str)

    if "Gaussian" in image_name:
      gauss = "Gaussian"
      qsts_sheet=qsts_sheet.loc[qsts_sheet['image_type'] == "1"] # gauss images

    else:
      gauss = "None"
      qsts_sheet=qsts_sheet.loc[qsts_sheet['image_type'] == "0"] # normal images

      color_2, object_2, color_1, object_1 = image_name.split("_")
      object_1="unknown object" if "unknown" in object_1 else object_1

    # not in the image variables
    object_3= "dragon"
    colors_list = ["green", "pink", "brown"]
    if color_1 in colors_list: colors_list.remove(color_1)
    if color_2 in colors_list: colors_list.remove(color_2)
    color_3 = random.choice(colors_list)
    colors_list.remove(color_3)
    object_4 = "train"
    color_4 = random.choice(colors_list)

    objects_dict = {
      "object1"  : object_1, "color1"  : color_1,
      "object2"  : object_2, "color2"  : color_2,
      "object3"  : object_3, "color3"  : color_3,
      "object4"  : object_4, "color4"  : color_4,
    }

    questions = qsts_sheet['question'].values
    questions = fill_placeholders(questions, objects_dict)

    answer_gt = qsts_sheet['answer_gt'].values
    answer_gt = fill_placeholders(answer_gt, objects_dict)

    criteria = ["image_type", "used_language", "specificity", "question_length", "complexity", "image_relatedness"]
    criteria_cols = dict()
    for c in criteria:
      criteria_cols[c] = qsts_sheet[c].values

    outputs, full_answers, clean_answers, logits, correct, no_tokens = list(), list(), list(), list(), list(), list()

    for i in range(len(questions)):
      qst = [questions[i]]
      output = model.generate_outputs([image], qst)[0] # only one item per cycle
      # outputs.append(output)
      # full_answer = model.decode_output(output) #, skip_special_tokens=False
      # clean_answer = model.decode_output(output) #, skip_special_tokens=True
      if model_name=="Vilt":
        full_answer = model.decode_output(output)
        clean_answer = full_answer
      else:
        full_answer = model.decode_output(output, skip_special_tokens=False)[0]
        clean_answer = model.decode_output(output, skip_special_tokens=True)[0]
      full_answer = full_answer.replace('\n', ' ').strip()
      clean_answer = clean_answer.replace('\n', ' ').strip()
      qst_text = qst[0][:-1] # no qst mark
      if qst_text.lower() in full_answer: full_answer=full_answer.replace(qst[0].lower(),"") # remove '?' and empty space
      if qst_text.lower() in clean_answer: clean_answer=clean_answer.replace(qst[0].lower(),"").strip('?').strip() # remove '?' and empty space
      full_answers.append(full_answer)
      clean_answers.append(clean_answer)
      logit = model.get_logits(output)
      logits.append(logit)
      no_tokens.append(len(logit))
      eval = 0
      for gt in answer_gt[i].split("+"):
        if gt in clean_answer:
          eval = 1
          break
      correct.append(eval)
      print(f"{i},", end=" ")

    results_path = f"./test_results/{model_type}/{model_name}/"
    if not os.path.isdir(results_path): os.makedirs(results_path)
    results_path = results_path + f"{model_type}_{model_name}_{image_name}"
    np.save(results_path, np.array(logits))
    data={"image_type":gauss} if gauss=="Gaussian" else objects_dict

    df = pd.DataFrame(data=data, index=[0])
    df = (df.T)
    df.to_excel(results_path+".xlsx", sheet_name="details")

    with pd.ExcelWriter(results_path+".xlsx", mode='a') as writer:
      df = pd.DataFrame()
      for c in criteria:
        df[c] = criteria_cols[c]
      df['question'] = questions
      df['answer_gt'] = answer_gt
      df["full_answer"] = full_answers
      df["clean_answer"] = clean_answers
      df["correct"] = correct
      df["no_tokens"] = no_tokens
      s = pd.Series([i+1 for i in range(len(questions))])
      df.set_index(s, inplace=True)
      df.to_excel(writer, sheet_name="answers")

    Done_list.append(image_name)
    print("\nDone:::\n", Done_list)

  print("\n\n")
  print(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>")
  print(f"{model_name} is DONE!!!")
  print(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>")
  print("\n\n")




>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
Blip_large STARTS!!!
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>



Loading Blip_large model for VQA...


preprocessor_config.json:   0%|          | 0.00/445 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/524 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/4.59k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.54G [00:00<?, ?B/s]

model loading is done!


  0%|          | 0/17 [00:00<?, ?it/s]


Loading Gaussian_5_2...
image loading is done!
0, 1, 2, 3, 4, 

  np.save(results_path, np.array(logits))
  6%|▌         | 1/17 [00:06<01:39,  6.23s/it]


Done:::
 ['Gaussian_5_2']

Loading Gaussian_5_3...
image loading is done!
0, 1, 2, 3, 

  np.save(results_path, np.array(logits))
 12%|█▏        | 2/17 [00:12<01:34,  6.27s/it]

4, 
Done:::
 ['Gaussian_5_2', 'Gaussian_5_3']

Loading Gaussian_5_4...
image loading is done!
0, 1, 2, 3, 

  np.save(results_path, np.array(logits))
 18%|█▊        | 3/17 [00:17<01:21,  5.79s/it]

4, 
Done:::
 ['Gaussian_5_2', 'Gaussian_5_3', 'Gaussian_5_4']

Loading Gaussian_5_5...
image loading is done!
0, 1, 2, 3, 

  np.save(results_path, np.array(logits))
 24%|██▎       | 4/17 [00:23<01:16,  5.90s/it]

4, 
Done:::
 ['Gaussian_5_2', 'Gaussian_5_3', 'Gaussian_5_4', 'Gaussian_5_5']

Loading white_house_brown_cat...
image loading is done!
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 

  np.save(results_path, np.array(logits))
 29%|██▉       | 5/17 [04:45<19:37, 98.16s/it]


Done:::
 ['Gaussian_5_2', 'Gaussian_5_3', 'Gaussian_5_4', 'Gaussian_5_5', 'white_house_brown_cat']

Loading white_house_brown_cow...
image loading is done!
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190

  np.save(results_path, np.array(logits))
 35%|███▌      | 6/17 [09:10<28:24, 154.92s/it]


Done:::
 ['Gaussian_5_2', 'Gaussian_5_3', 'Gaussian_5_4', 'Gaussian_5_5', 'white_house_brown_cat', 'white_house_brown_cow']

Loading white_house_brown_dog...
image loading is done!
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185

  np.save(results_path, np.array(logits))
 41%|████      | 7/17 [13:34<31:46, 190.65s/it]


Done:::
 ['Gaussian_5_2', 'Gaussian_5_3', 'Gaussian_5_4', 'Gaussian_5_5', 'white_house_brown_cat', 'white_house_brown_cow', 'white_house_brown_dog']

Loading white_house_brown_horse...
image loading is done!
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 1

  np.save(results_path, np.array(logits))
 47%|████▋     | 8/17 [17:53<31:51, 212.33s/it]


Done:::
 ['Gaussian_5_2', 'Gaussian_5_3', 'Gaussian_5_4', 'Gaussian_5_5', 'white_house_brown_cat', 'white_house_brown_cow', 'white_house_brown_dog', 'white_house_brown_horse']

Loading white_house_green_cat...
image loading is done!
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 1

  np.save(results_path, np.array(logits))
 53%|█████▎    | 9/17 [22:12<30:14, 226.83s/it]


Done:::
 ['Gaussian_5_2', 'Gaussian_5_3', 'Gaussian_5_4', 'Gaussian_5_5', 'white_house_brown_cat', 'white_house_brown_cow', 'white_house_brown_dog', 'white_house_brown_horse', 'white_house_green_cat']

Loading white_house_green_unknown...
image loading is done!
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 16

  np.save(results_path, np.array(logits))
 59%|█████▉    | 10/17 [26:32<27:39, 237.01s/it]


Done:::
 ['Gaussian_5_2', 'Gaussian_5_3', 'Gaussian_5_4', 'Gaussian_5_5', 'white_house_brown_cat', 'white_house_brown_cow', 'white_house_brown_dog', 'white_house_brown_horse', 'white_house_green_cat', 'white_house_green_unknown']

Loading white_house_pink_cow...
image loading is done!
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164

  np.save(results_path, np.array(logits))
 65%|██████▍   | 11/17 [30:56<24:32, 245.46s/it]


Done:::
 ['Gaussian_5_2', 'Gaussian_5_3', 'Gaussian_5_4', 'Gaussian_5_5', 'white_house_brown_cat', 'white_house_brown_cow', 'white_house_brown_dog', 'white_house_brown_horse', 'white_house_green_cat', 'white_house_green_unknown', 'white_house_pink_cow']

Loading white_house_pink_unknown...
image loading is done!
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 

  np.save(results_path, np.array(logits))
 71%|███████   | 12/17 [35:25<21:02, 252.53s/it]


Done:::
 ['Gaussian_5_2', 'Gaussian_5_3', 'Gaussian_5_4', 'Gaussian_5_5', 'white_house_brown_cat', 'white_house_brown_cow', 'white_house_brown_dog', 'white_house_brown_horse', 'white_house_green_cat', 'white_house_green_unknown', 'white_house_pink_cow', 'white_house_pink_unknown']

Loading white_house_purple_dog...
image loading is done!
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153,

  np.save(results_path, np.array(logits))
 76%|███████▋  | 13/17 [39:49<17:03, 255.87s/it]


Done:::
 ['Gaussian_5_2', 'Gaussian_5_3', 'Gaussian_5_4', 'Gaussian_5_5', 'white_house_brown_cat', 'white_house_brown_cow', 'white_house_brown_dog', 'white_house_brown_horse', 'white_house_green_cat', 'white_house_green_unknown', 'white_house_pink_cow', 'white_house_pink_unknown', 'white_house_purple_dog']

Loading white_house_purple_horse...
image loading is done!
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 1

  np.save(results_path, np.array(logits))
 82%|████████▏ | 14/17 [44:05<12:47, 255.94s/it]


Done:::
 ['Gaussian_5_2', 'Gaussian_5_3', 'Gaussian_5_4', 'Gaussian_5_5', 'white_house_brown_cat', 'white_house_brown_cow', 'white_house_brown_dog', 'white_house_brown_horse', 'white_house_green_cat', 'white_house_green_unknown', 'white_house_pink_cow', 'white_house_pink_unknown', 'white_house_purple_dog', 'white_house_purple_horse']

Loading white_house_red_duck...
image loading is done!
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 14

  np.save(results_path, np.array(logits))
 88%|████████▊ | 15/17 [48:23<08:33, 256.79s/it]


Done:::
 ['Gaussian_5_2', 'Gaussian_5_3', 'Gaussian_5_4', 'Gaussian_5_5', 'white_house_brown_cat', 'white_house_brown_cow', 'white_house_brown_dog', 'white_house_brown_horse', 'white_house_green_cat', 'white_house_green_unknown', 'white_house_pink_cow', 'white_house_pink_unknown', 'white_house_purple_dog', 'white_house_purple_horse', 'white_house_red_duck']

Loading white_house_red_unknown...
image loading is done!
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 

  np.save(results_path, np.array(logits))
 94%|█████████▍| 16/17 [52:41<04:17, 257.12s/it]


Done:::
 ['Gaussian_5_2', 'Gaussian_5_3', 'Gaussian_5_4', 'Gaussian_5_5', 'white_house_brown_cat', 'white_house_brown_cow', 'white_house_brown_dog', 'white_house_brown_horse', 'white_house_green_cat', 'white_house_green_unknown', 'white_house_pink_cow', 'white_house_pink_unknown', 'white_house_purple_dog', 'white_house_purple_horse', 'white_house_red_duck', 'white_house_red_unknown']

Loading white_house_yellow_unknown...
image loading is done!
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 

  np.save(results_path, np.array(logits))
100%|██████████| 17/17 [57:04<00:00, 201.45s/it]


Done:::
 ['Gaussian_5_2', 'Gaussian_5_3', 'Gaussian_5_4', 'Gaussian_5_5', 'white_house_brown_cat', 'white_house_brown_cow', 'white_house_brown_dog', 'white_house_brown_horse', 'white_house_green_cat', 'white_house_green_unknown', 'white_house_pink_cow', 'white_house_pink_unknown', 'white_house_purple_dog', 'white_house_purple_horse', 'white_house_red_duck', 'white_house_red_unknown', 'white_house_yellow_unknown']



>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
Blip_large is DONE!!!
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>








# Decode logits (if needed)

In [31]:
# @title Model details { form-width: "70%" }
model_type = "VQA" # @param ["VQA", "IC"]
model_name = "GiT_base" # @param ["GiT_base", "GiT_large", "Blip_base", "Blip_large", "Vilt"]

if model_type=="VQA":
  from vqa_class import *
elif model_type=="IC":
  from ic_class import *

models_chkpts = {
    "VQA": {
        "GiT_base"  : {"ckpt": "microsoft/git-base-vqav2", "class":GiT},
        "GiT_large" : {"ckpt": "microsoft/git-large-vqav2", "class":GiT},
        "Blip_base" : {"ckpt": "Salesforce/blip-vqa-base", "class":Blip},
        "Blip_large": {"ckpt": "Salesforce/blip-vqa-capfilt-large", "class":Blip},
        "Vilt"      : {"ckpt": "dandelin/vilt-b32-finetuned-vqa", "class":Vilt}
    },
    "IC": {
        "None"      : {"ckpt": "", "class":None}
    }
}

print(f"Loading {model_name} model for {model_type}...")
model_class = models_chkpts[model_type][model_name]["class"]
checkpoint = models_chkpts[model_type][model_name]["ckpt"]
model = model_class(checkpoint)
print("model loading is done!")

Loading GiT_base model for VQA...


preprocessor_config.json:   0%|          | 0.00/503 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/453 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/2.82k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/709M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/141 [00:00<?, ?B/s]

model loading is done!


In [32]:
np_logits_file_path = f"./test_results/{model_type}/{model_type}_test_logits.npy"
logits_dict = np.load(np_logits_file_path, allow_pickle=True).item()
logits_dict.keys()

dict_keys(['Vilt', 'Blip_base', 'Blip_large', 'GiT_base', 'GiT_large'])

In [33]:
model_logits = logits_dict[model_name]
model_logits.keys()

dict_keys(['Gaussian_5_2', 'Gaussian_5_3', 'Gaussian_5_4', 'Gaussian_5_5', 'white_house_brown_cat', 'white_house_brown_cow', 'white_house_brown_dog', 'white_house_brown_horse', 'white_house_green_cat', 'white_house_green_unknown', 'white_house_pink_cow', 'white_house_pink_unknown', 'white_house_purple_dog', 'white_house_purple_horse', 'white_house_red_duck', 'white_house_red_unknown', 'white_house_yellow_unknown'])

In [34]:
image_name = "white_house_brown_cat"
image_model_logits = model_logits[image_name]

In [35]:
qst_idx = 0
sequences = [np.argmax(i) for i in image_model_logits[qst_idx]]
for i in range(len(sequences)):
  s = model.processor.batch_decode([sequences[i]], skip_special_tokens=False)[0]
  print(i, s, end=" ")

0 cat 1 [SEP] 

In [27]:
# for logits in image_model_logits:
#   if len(logits)==2:
#     sequences = [np.argmax(i) for i in logits]
#     for i in range(len(sequences)):
#       s = model.processor.batch_decode([sequences[i]], skip_special_tokens=False)[0]
#       print(i, s, end=" ")
#     print()

0 cat 1 [SEP] 
0 house 1 [SEP] 
0 cat 1 [SEP] 
0 brown 1 [SEP] 
0 white 1 [SEP] 
0 brown 1 [SEP] 
0 white 1 [SEP] 
0 cat 1 [SEP] 
0 house 1 [SEP] 
0 cat 1 [SEP] 
0 front 1 [SEP] 
0 front 1 [SEP] 
0 front 1 [SEP] 
0 cat 1 [SEP] 
0 cat 1 [SEP] 
0 sitting 1 [SEP] 
0 sitting 1 [SEP] 
0 house 1 [SEP] 
0 white 1 [SEP] 
0 cat 1 [SEP] 
0 house 1 [SEP] 
0 black 1 [SEP] 
0 house 1 [SEP] 
0 nothing 1 [SEP] 
0 sitting 1 [SEP] 
0 sitting 1 [SEP] 
0 house 1 [SEP] 
0 house 1 [SEP] 
0 house 1 [SEP] 
0 sitting 1 [SEP] 
0 white 1 [SEP] 
0 cat 1 [SEP] 
0 house 1 [SEP] 
0 black 1 [SEP] 
0 cat 1 [SEP] 
0 sitting 1 [SEP] 
0 cat 1 [SEP] 
0 house 1 [SEP] 
0 house 1 [SEP] 
0 front 1 [SEP] 
0 cat 1 [SEP] 
0 cat 1 [SEP] 
0 cat 1 [SEP] 
0 house 1 [SEP] 
0 cat 1 [SEP] 
0 house 1 [SEP] 
0 house 1 [SEP] 
0 cat 1 [SEP] 
0 cat 1 [SEP] 
0 sitting 1 [SEP] 
0 cat 1 [SEP] 
0 house 1 [SEP] 
0 black 1 [SEP] 
0 sitting 1 [SEP] 
0 sitting 1 [SEP] 
0 house 1 [SEP] 
0 house 1 [SEP] 
0 cat 1 [SEP] 
0 cat 1 [SEP] 
0 cat 1 [SEP] 


In [10]:
# sequences = logits[169]

In [None]:
# sequences = [np.argmax(i) for i in sequences]

In [None]:
# for i in range(len(sequences)):
#   s = model.processor.batch_decode([sequences[i]], skip_special_tokens=False)[0]
#   print(i, s)