In [2]:
import utils
import json

from collections import defaultdict
from ordered_set import OrderedSet

In [17]:
CONTENT_LEMMAS = ['book', 'store', 'cup', 'pencil', 'ball', 'bear', 'dog', 'cat', 'pencil', 'lego', 'cheerios', 'milk', 'food', 'toy']

# read adaptation lexicon

def read_lexicon(path):
    with open(path, "r") as f:
        lexicon = json.load(f)
        lexicon = {k: OrderedSet(v) for k, v in lexicon.items()}
        long = OrderedSet(
            [
                x
                for x in lexicon["animate"].union(lexicon["inanimate"])
                if len(x.split(" ")) > 2
            ]
        )
        short = OrderedSet(
            [
                x
                for x in lexicon["animate"].union(lexicon["inanimate"])
                if len(x.split(" ")) <= 2
            ]
        )
        nominals = OrderedSet(
            [
                x
                for x in lexicon["animate"].union(lexicon["inanimate"])
                - lexicon["pronoun"]
            ]
        )
        lexicon.update({"long": long, "short": short, "nominal": nominals})
    return lexicon

In [18]:
lexicon = read_lexicon("../data/lexicon/adaptation-final.json")

In [19]:
unique_words = OrderedSet()
for k, v in lexicon.items():
    unique_words.update(v)

In [20]:
len(unique_words)

96

In [21]:
conflict_set = defaultdict(OrderedSet)
for word in CONTENT_LEMMAS:
    # print(word)
    # conflict_set = OrderedSet()
    for uw in unique_words:
        if word in uw:
            conflict_set[word].add(uw)

In [22]:
conflict_words = {}
for word, conflicts in conflict_set.items():
    for c in conflicts:
        conflict_words[c] = list(conflicts - {c})
    
print(conflict_words)

{'a book': ['the book', 'book', 'a nice book', 'some books'], 'the book': ['a book', 'book', 'a nice book', 'some books'], 'book': ['a book', 'the book', 'a nice book', 'some books'], 'a nice book': ['a book', 'the book', 'book', 'some books'], 'some books': ['a book', 'the book', 'book', 'a nice book'], 'a store': ['the store', 'some store', 'a store that is far away', 'the store that is far away', 'store'], 'the store': ['a store', 'some store', 'a store that is far away', 'the store that is far away', 'store'], 'some store': ['a store', 'the store', 'a store that is far away', 'the store that is far away', 'store'], 'a store that is far away': ['a store', 'the store', 'some store', 'the store that is far away', 'store'], 'the store that is far away': ['a store', 'the store', 'some store', 'a store that is far away', 'store'], 'store': ['a store', 'the store', 'some store', 'a store that is far away', 'the store that is far away'], 'cup': ['a cup', 'the cup', 'the cup on the table', 

In [10]:
features = defaultdict(OrderedSet)

for k,v in lexicon.items():
    for word in v:
        features[word].add(k)

In [11]:
features['teddy']

OrderedSet(['inanimate', 'definite', 'theme', 'recipient', 'unmarked', 'short', 'nominal'])

In [16]:
lexicon['inanimate'].intersection(lexicon['definite']).intersection(lexicon['recipient']).intersection(lexicon['short']).intersection(lexicon['unmarked'])

OrderedSet(['it', 'teddy'])