In [2]:
import nltk
import spacy
from tqdm import tqdm

In [3]:
nlp = spacy.load('en_core_web_lg')

In [None]:
from nltk.parse.stanford import StanfordParser

# Note: Download Stanford jar dependencies first
# See https://stackoverflow.com/questions/13883277/stanford-parser-and-nltk
stanford_parser = StanfordParser(
    model_path="edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz"
)

def nltk_stanford_tree(sent):
    """
    Visualize the Stanford dependency tree with nltk.tree
    """
    parse = stanford_parser.raw_parse(sent)

In [4]:
from nltk import word_tokenize
import pandas as pd

In [5]:
f1=u'this is my random text'

tokens = word_tokenize(f1)
bigramFinder = nltk.collocations.BigramCollocationFinder.from_words(tokens)
bigram_freq = bigramFinder.ngram_fd.items()
bigramFreqTable = pd.DataFrame(list(bigram_freq), columns=['bigram','freq']).sort_values(by='freq', ascending=False)
print(bigramFreqTable)

def rightTypes(ngram):
    first_type = ('JJ')
    second_type = ('NN')
    tags = nltk.pos_tag(ngram)
    if tags[0][1] in first_type and tags[1][1] in second_type:
        return True
    else:
        return False

filtered_bi = bigramFreqTable[bigramFreqTable.bigram.map(lambda x: rightTypes(x))]
print(filtered_bi)

           bigram  freq
0      (this, is)     1
1        (is, my)     1
2    (my, random)     1
3  (random, text)     1
Empty DataFrame
Columns: [bigram, freq]
Index: []


In [6]:
from datasets import load_dataset
auth_token = "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"  # Replace with an auth token, which you can get from your huggingface account: Profile -> Settings -> Access Tokens -> New Token
winoground = load_dataset("facebook/winoground", use_auth_token=auth_token)["test"]
winoground.set_format("torch")
def transform_wino(examples):
    examples["image_0"] = [image.convert("RGB") for image in examples["image_0"]]
    examples["image_1"] = [image.convert("RGB") for image in examples["image_1"]]
    return examples

winoground.set_transform(transform_wino)
device = 'cuda:0'

Found cached dataset winoground (/home/asura/.cache/huggingface/datasets/facebook___winoground/default/0.0.0/72585f4d9cd5a28790bb9bc2adbdd45633f36dfbf85df529e0756e114e134285)


  0%|          | 0/1 [00:00<?, ?it/s]

In [7]:
from itertools import permutations, combinations

def swap_subtrees_between_verbs(doc):
    pass
    # verb_indices = []
    # for i, token in enumerate(doc):
    #     if token.pos_ in ['VERB']:
    #         # swap nsubj and dobj subtrees
    #         for child in token.children:
    #             print(child, child.dep_)
    #             if child.dep_ == 'nsubj':
    #                 child.dep_ = 'dobj'
    #             elif child.dep_ == 'dobj':
    #                 child.dep_ = 'nsubj'

    from nltk.tree import Tree

    def token_format(token):
        return "_".join([token.orth_, token.tag_, token.dep_])

    def to_nltk_tree(node):
        if node.n_lefts + node.n_rights > 0:
            return Tree(token_format(node),
                    [to_nltk_tree(child) 
                        for child in node.children]
                )
        else:
            return token_format(node)

    tree = [to_nltk_tree(sent.root) for sent in doc.sents]
    # The first item in the list is the full tree
    tree[0].draw()

    return doc


def swap_adjectives(doc: spacy.tokens.doc.Doc, neg: bool = False):
    adjpropn_pos = []
    adjpropn_words = []
    token_list = [token for token in doc]
    for i, token in enumerate(doc):
        cond = (not neg) and (token.pos_ in ['ADJ','PROPN'])
        ncond = neg and (token.pos_ not in ['ADJ','PROPN'])
        if cond or ncond:
            adjpropn_pos.append(i)
            adjpropn_words.append(token)

    # permute the words list completely
    perms = list(permutations(adjpropn_words))
    sentences = []
    for perm in perms:
        # fill out all the tokens
        new_token_list = token_list
        for i, pos in enumerate(adjpropn_pos):
            new_token_list[pos] = perm[i]
        sentences.append(new_token_list.copy())
    return sentences

def swap_adj_noun(doc):
    # swap combinations of adjectives and nouns with each other
    adj_noun_pos = []
    adj_noun_words = []
    token_list = [token for token in doc]
    for i, token in enumerate(doc):
        if token.pos_ in ['NOUN']:
            # find its noun chunk
            for chunk in doc.noun_chunks:
                if token in chunk:
                    adj_noun_pos.append((chunk.start, chunk.end))
    # get all nC2 2-choices of swaps from adj_noun_pos
    swaps = list(combinations(adj_noun_pos, 2))
    sentences = []
    # create temp token list
    for i,j in swaps:
        token_list = [token for token in doc]
        # swap the ranges
        token_list[i[0]:i[1]], token_list[j[0]:j[1]] = token_list[j[0]:j[1]], token_list[i[0]:i[1]]

        sentences.append(token_list.copy())
    return sentences

def swap_nodes_preposition(doc):
    # swap only the words across prepositions
    sentences = []
    indices_to_swap = []

    for token in doc:
        arg1, arg2 = None, None
        if token.pos_ in ['ADP']:
            for child in token.children:
                if child.dep_ == 'pobj':
                    # get noun chunks for child
                    for chunk in doc.noun_chunks:
                        if child in chunk:
                            arg1 = chunk.start, chunk.end
                            break

            if token.head.pos_ in ['NOUN', 'PROPN']:
                # get noun chunks for head
                for chunk in doc.noun_chunks:
                    if token.head in chunk:
                        arg2 = chunk.start, chunk.end
                        break
                        
            elif token.head.pos_ == 'VERB':
                for child in token.head.children:
                    if child.dep_ == 'nsubj':
                        for chunk in doc.noun_chunks:
                            if child in chunk:
                                arg2 = chunk.start, chunk.end
                                break
                                
            if arg1 and arg2:
                indices_to_swap.append((arg1, arg2))

    swaps = indices_to_swap
    # create temp token list
    for i, j in swaps:
        token_list = [token for token in doc]
        # swap the ranges
        token_list[i[0]:i[1]], token_list[j[0]:j[1]] = token_list[j[0]:j[1]], token_list[i[0]:i[1]]
        
        sentences.append(token_list.copy())
    return sentences

In [23]:
# run all functions on winoground captions
augmented_caption = {}
for i, example in tqdm(enumerate(winoground), total=len(winoground)):
    # get the caption
    caption_0 = example['caption_0']
    caption_1 = example['caption_1']

    # get the doc
    doc = nlp(caption_0)
    doc_1 = nlp(caption_1)
    # swap adjectives
    adj_swaps = swap_adjectives(doc, False)

    # swap nodes across prepositions
    preposition_swaps = swap_nodes_preposition(doc)
    # swap adj-noun
    adj_noun_swaps = swap_adj_noun(doc)
    
    # convert all types of swaps to tuples
    adj_swaps = [tuple(swap) for swap in adj_swaps]
    preposition_swaps = [tuple(swap) for swap in preposition_swaps]
    adj_noun_swaps = [tuple(swap) for swap in adj_noun_swaps]

    # create a set of all swaps
    all_swaps = set(adj_swaps + preposition_swaps + adj_noun_swaps)
    
    def filter_edit_distance(swaps):
        # filter with edit distance > 3
        print(len(swaps))
        return [swap for swap in swaps
                if nltk.edit_distance(' '.join([token.text for token in swap]), 
                                      ' '.join([token.text for token in doc])) > 3]
    

    def filter_set_with_alt_caption(all_swaps):
        return [" ".join([token.text for token in swap]) for swap in all_swaps
                if ([token.text for token in swap] != [token.text for token in doc_1]) and ([token.text for token in swap] != [token.text for token in doc])]+[" ".join([token.text for token in doc_1])]
    
    all_swaps = filter_set_with_alt_caption(all_swaps)
    # add to augmented caption
    augmented_caption[caption_0]= all_swaps

100%|██████████| 400/400 [00:27<00:00, 14.37it/s]


In [24]:
augmented_caption

{'an old person kisses a young person': ['an young person kisses a old person',
  'a young person kisses an old person'],
 'the taller person hugs the shorter person': ['the shorter person hugs the taller person'],
 'the masked wrestler hits the unmasked wrestler': ['the unmasked wrestler hits the masked wrestler'],
 'a person watches an animal': ['an animal watches a person'],
 'the person without earrings pays the person with earrings': ['earrings without earrings pays the person with earrings the person',
  'the person without earrings pays earrings with earrings the person',
  'earrings without the person pays the person with earrings',
  'earrings without earrings pays the person with the person',
  'earrings without earrings the person the person with earrings',
  'the person without the person earrings person with earrings',
  'the person with earrings pays the person without earrings'],
 'a bird eats a snake': ['a snake eats a bird'],
 'a plant was harmed by another organism, a

In [25]:
# filter by minilm similarity
from sentence_transformers import SentenceTransformer
sentences = ["This is an example sentence", "Each sentence is converted"]

model = SentenceTransformer('sentence-transformers/paraphrase-MiniLM-L12-v2')
embeddings = model.encode(sentences)
# compute cosine similarity
from sklearn.metrics.pairwise import cosine_similarity
cosine_similarity([embeddings[0]], [embeddings[1]])

array([[0.36930478]], dtype=float32)

In [26]:
for caption, swaps in tqdm(augmented_caption.items()):
    new_swap = [swap for swap in swaps if cosine_similarity([model.encode(caption)], [model.encode(swap)]) > 0.7]
    augmented_caption[caption] = new_swap

        

In [27]:
augmented_caption

{'an old person kisses a young person': ['an young person kisses a old person',
  'a young person kisses an old person'],
 'the taller person hugs the shorter person': ['the shorter person hugs the taller person'],
 'the masked wrestler hits the unmasked wrestler': ['the unmasked wrestler hits the masked wrestler'],
 'a person watches an animal': ['an animal watches a person'],
 'the person without earrings pays the person with earrings': ['earrings without earrings pays the person with earrings the person',
  'the person without earrings pays earrings with earrings the person',
  'earrings without the person pays the person with earrings',
  'earrings without earrings pays the person with the person',
  'earrings without earrings the person the person with earrings',
  'the person without the person earrings person with earrings',
  'the person with earrings pays the person without earrings'],
 'a bird eats a snake': ['a snake eats a bird'],
 'a plant was harmed by another organism, a