In [1]:
import json
import os
from os import path as osp
import pandas as pd
import numpy as np

In [2]:
split = 'val'

# download annotations from here and put them in nlxgpt/data :
# https://drive.google.com/drive/folders/16sJjeEQE2o23G-GGUi870ubXzJjdRDua
# (cf. nlxgpt/README.md -> Annotations Download -> VQA-X link)

base_dir = osp.abspath('../data/')

# Preparation

## Read Annotations

In [3]:
def load_json(file): 
    with open(file, 'r') as f:
        data = json.load(f)
    return data

def get_confident_answers(list_of_answers): 
    return [x for x in list_of_answers if x['answer_confidence'] == 'yes']

def majority_vote(list_of_answers):
    answers = [a['answer'] for a in list_of_answers]
    return max(set(answers), key=answers.count)

In [4]:
annotations = pd.read_json(osp.join(base_dir, f'vqaX_{split}.json')).T
# restrict to confident answers
annotations['confident_answers'] = annotations.answers.map(get_confident_answers)
# determine top answer (later used to determine correctness of predictions)
annotations['top_answer'] = annotations.confident_answers.map(majority_vote)
# rename index column
annotations = annotations.rename_axis('question_id')

## Read & Prepare Samples

In [5]:
def process_sample_file(filename):
    samples_df = pd.read_json(filename)
    
    # rename question id column & set as index
    samples_df = samples_df.rename(columns={'image_id': 'question_id'}).set_index('question_id')

    # split answers from predictions (and collect idx of entries where this doesn't work)
    samples_df['answers_explanations'] = samples_df.caption.map(lambda x: x.split(' because '))
    invalid_ids = samples_df[samples_df.answers_explanations.map(len) != 2].index
    # restrict to valid samples
    samples_df = samples_df[np.logical_not(samples_df.index.isin(invalid_ids))]

    # separate columns for answers and explanations
    samples_df['answer'] = samples_df.answers_explanations.map(lambda x: x[0])
    samples_df['explanation'] = samples_df.answers_explanations.map(lambda x: x[1])
    samples_df = samples_df.drop(columns=['answers_explanations'])
    
    return samples_df, invalid_ids.to_list()

def is_correct_answer(entry, annotations):
    answer = entry.answer
    question_id = entry.name
    return answer == annotations.loc[question_id].top_answer

In [6]:
color_file = f'./unf_captions_full_11_{split}.json'
greyscale_file = f'./unf_captions_full_11_{split}_greyscale.json'

# import
clr_samples, clr_invalid = process_sample_file(color_file)
bw_samples, bw_invalid = process_sample_file(greyscale_file)

# filter out invalid
invalid = set(clr_invalid + bw_invalid)
print(f'{len(invalid)} entries invalid')
clr_samples = clr_samples.drop(invalid, errors='ignore')
bw_samples = bw_samples.drop(invalid, errors='ignore')
# ensure idx are identical
assert np.all(bw_samples.index == clr_samples.index)

2 entries invalid


In [7]:
# determine correctness of predicted answers
clr_samples['correct_answer'] = clr_samples.apply(lambda x: is_correct_answer(x, annotations), axis=1)
print('clr acc:', round(sum(clr_samples.correct_answer) / len(clr_samples), 2))

bw_samples['correct_answer'] = bw_samples.apply(lambda x: is_correct_answer(x, annotations), axis=1)
print('gs acc:', round(sum(bw_samples.correct_answer) / len(bw_samples), 2))

clr acc: 0.75
gs acc: 0.71


# Sample Selection

Selection criterion: Items where the model predicts
1. correct answers on coloured input, and 
2. incorrect answers on black/white input

In [8]:
# ids for samples where the color model predicts the right answers
true_clr_ids = clr_samples[clr_samples.correct_answer == True].index
# ids for samples where the b/w model predicts the false answers
false_bw_ids = bw_samples[bw_samples.correct_answer == False].index
# intersection: samples that meet the selection criterion
match_criterion = set(true_clr_ids) & set(false_bw_ids)

n_hits = len(match_criterion)
perc_hits = round((n_hits / len(clr_samples)) * 100, 2)

print(f'{n_hits} samples ({perc_hits} %) match the criterion')

105 samples (7.21 %) match the criterion


In [9]:
out = list()

for question_id in sorted(match_criterion):
    
    clr_entry = clr_samples.loc[question_id]
    bw_entry = bw_samples.loc[question_id]
    ann = annotations.loc[question_id]
    
    out.append({
        'question_id': question_id,
        'image_id': ann.image_id,
        
        'question': ann.question,
        'top_answer': ann.top_answer,
        'gt_answers': ann.answers,
        'gt_explanations': ann.explanation,
        
        'clr_full': clr_entry.caption, 
        'clr_answer': clr_entry.answer, 
        'clr_explanation': clr_entry.explanation,
        
        'bw_full': bw_entry.caption, 
        'bw_answer': bw_entry.answer, 
        'bw_explanation': bw_entry.explanation 
    })

In [10]:
# example
pd.Series(out[0])

question_id                                                  3711004
image_id                                                        3711
question                                 What is the cat sitting on?
top_answer                                                       car
gt_answers         [{'answer': 'car', 'answer_confidence': 'yes',...
gt_explanations    [cars have flat hoods and windshields, there i...
clr_full           car because the cat is sitting on the hood of ...
clr_answer                                                       car
clr_explanation              the cat is sitting on the hood of a car
bw_full            hood because the cat is sitting on the hood of...
bw_answer                                                       hood
bw_explanation               the cat is sitting on the hood of a car
dtype: object

In [11]:
# write to file
with open('selected_items.json', 'w') as f:
    json.dump(out, f)