In [1]:
import numpy as np

In [2]:
ann_dict = np.load('./image_ann_dict.npz')['im_name2ann'][()]

In [3]:
# We will collect a set of attributes, through manually inspecting the explanations
# ...yes, automatic adjective extraction failed and I don't have time to fix it.
explanation_list = []
for _, ann in ann_dict.items():
    for explanation_info in ann:
        explanation_list.append(explanation_info['explanation'])
        
for i in range(500):
    print(i, explanation_list[i])

0 The bird has a brown crown, skinny tarsus and small eyering.
1 A smallish brown bird with a short pointed beak and long tarsus for its body size.
2 This is a grey bird with a white eyebrow and eye.
3 This bird has a grey belly and breast with abrown crown and short pointy bill.
4 A bird with a grey belly and brown wings, back, and rump with a white spot above it's eyes.
5 This bird is a very unique colored brown all around and the tail is fairly darker than the rest.
6 This a small bird with feathers in graduated shades of brown.  It has long legs and a short, pointed beak.  It alsos has some lighter coloration around its eyes.
7 This bird has wings that are brown and has a small bill
8 This bird is brown with white and has a very short beak.
9 This bird is brown with black and has a very short beak.
10 The bird has a small round body covered in yellow feathers aside from it's crown, nape and throat.
11 A small yellow bird with a black ring around its face and short black pointed bea

In [4]:
attributes_size = {'little', 'small', 'medium', 'big', 'large'}
attributes_color = {'white', 'black', 'red', 'orange', 'green', 'grey', 'gray', 'blue', 'brown', 'yellow'}
attributes_misc = {'long', 'short', 'sharp', 'blunt', 'pointy'}

attributes_all = attributes_size | attributes_color | attributes_misc

# build a dictionary to flip an attribute
flip_dict = {}
for w in attributes_size:
    flip_dict[w] = list(attributes_size - {w})
for w in attributes_color:
    if w == 'grey' or w == 'gray':
        flip_dict[w] = list(attributes_color - {'grey', 'gray'})
    else:
        flip_dict[w] = list(attributes_color - {w})
for w in attributes_misc:
    flip_dict[w] = list(attributes_misc - {w})

In [5]:
import re

SENTENCE_SPLIT_REGEX = re.compile(r'(\W+)')
def tokenize(sentence):
    tokens = SENTENCE_SPLIT_REGEX.split(sentence.lower())
    tokens = [t.strip() for t in tokens if len(t.strip()) > 0]
    return tokens

# Flip the attribute
def flip_noun_phrase(noun_phrases):
    '''
    Randomly flipping the attributes in a noun phrase in a noun-phrase
    
    Return tuple (new_phrase, is_flipped)
    '''
    noun_phrases = noun_phrases.lower()
    words = tokenize(noun_phrases)
    for w in words:
        if w in attributes_all:
            flipped_w = np.random.choice(flip_dict[w])
            assert(w in noun_phrases)
            flipped_noun_phrases = noun_phrases.replace(w, flipped_w)
            return flipped_noun_phrases, True

    return noun_phrases, False

def build_explanation_pair(explanation_info_pos):
    '''
    Return (explanation_info_pos, explanation_info_neg) if flipped
    otherwise return (None, None)
    '''
    
    # The input has to be ground-truth (labeled True)
    if not explanation_info_pos['label']:
        return None, None
    
    # A deep copy of explanation_info_pos
    explanation_info_neg = explanation_info_pos.copy()
    explanation_info_neg['noun_phrases'] = [{'phrase': p['phrase']} for p in explanation_info_pos['noun_phrases']]
    
    explanation_sentence = explanation_info_neg['explanation'].lower()
    
    is_sentence_flipped = False
    for p in explanation_info_neg['noun_phrases']:
        phrase = p['phrase']
        # print(phrase)
        new_phrase, is_p_flipped = flip_noun_phrase(phrase)
        if is_p_flipped:
            try:
                assert(phrase in explanation_sentence)
                p['phrase'] = new_phrase
                explanation_sentence = explanation_sentence.replace(phrase, new_phrase)
                is_sentence_flipped = is_sentence_flipped | is_p_flipped
            except:
                print('phrase not found:')
                print(phrase)
                print(explanation_sentence)
    explanation_info_neg['explanation'] = explanation_sentence
    explanation_info_neg['label'] = False
    
    if is_sentence_flipped:
        return explanation_info_pos, explanation_info_neg
        
    return None, None

In [6]:
# # Look at one example
# print(flip_noun_phrase('white belly'))

# im_name = '007.Parakeet_Auklet/Parakeet_Auklet_0001_795972.jpg'
# ann = ann_dict[im_name]

# explanation_info_pos, explanation_info_neg = build_explanation_pair(ann[0])
# print(explanation_info_pos)
# print(explanation_info_neg)

In [7]:
ann_new_dict = {}
for im_name in ann_dict:
    ann = ann_dict[im_name]
    ann_new = []
    for e in ann:
        explanation_info_pos, explanation_info_neg = build_explanation_pair(e)
        if explanation_info_pos and explanation_info_neg:
            ann_new.append(explanation_info_pos)
            ann_new.append(explanation_info_neg)
        
    ann_new_dict[im_name] = ann_new

phrase not found:
brown wings back and rump
a bird with a grey belly and brown wings, back, and rump with a white spot above it's eyes.
phrase not found:
short pointed beak
this a little bird with feathers in graduated shades of brown.  it has blunt legs and a short, pointed beak.  it alsos has some lighter coloration around its eyes.
phrase not found:
black superciliary cheek throat and side
this small perching bird with a small bill has yellow crown, nap, back, tail, and coverts, black superciliary, cheek, throat, and side, and white chest, belly, and abdomen.
phrase not found:
white chest belly and abdomen
this small perching bird with a small bill has yellow crown, nap, back, tail, and coverts, black superciliary, cheek, throat, and side, and white chest, belly, and abdomen.
phrase not found:
black crown bill and feathers
a black bodied bird with black crown, bill and feathers, with a white wingbar.
phrase not found:
blue crown nape and back
a big very round bird with a blue crown,

In [8]:
np.savez('./image_ann_dict_flipped_attributes.npz', im_name2ann=ann_new_dict)