In [67]:
import utils
import json

from collections import defaultdict
from ordered_set import OrderedSet

In [68]:
CONTENT_LEMMAS = ['book', 'store', 'cup', 'pencil', 'ball', 'bear', 'dog', 'cat', 'lego', 'cheerios', 'milk', 'food', 'toy', 'chair', 'legos', 'pencils', 'dogs', 'bears']

# read adaptation lexicon

def read_lexicon(path):
    with open(path, "r") as f:
        lexicon = json.load(f)
        lexicon = {k: OrderedSet(v) for k, v in lexicon.items()}
        long = OrderedSet(
            [
                x
                for x in lexicon["animate"].union(lexicon["inanimate"])
                if len(x.split(" ")) > 2
            ]
        )
        short = OrderedSet(
            [
                x
                for x in lexicon["animate"].union(lexicon["inanimate"])
                if len(x.split(" ")) <= 2
            ]
        )
        nominals = OrderedSet(
            [
                x
                for x in lexicon["animate"].union(lexicon["inanimate"])
                - lexicon["pronoun"]
            ]
        )
        lexicon.update({"long": long, "short": short, "nominal": nominals})
    return lexicon

In [69]:
lexicon = read_lexicon("../data/lexicon/adaptation-final-nomarkedness.json")

In [70]:
unique_words = OrderedSet()
for k, v in lexicon.items():
    unique_words.update(v)

In [71]:
len(unique_words)

74

In [72]:
unique_words

OrderedSet(['she', 'he', 'mommy', 'daddy', 'lucy', 'john', 'me', 'her', 'him', 'it', 'them', 'us', 'someone', 'something', 'grandpa', 'grandma', 'a cat', 'the cat', 'a dog', 'the dog', 'elmo', 'bert', 'a big bear', 'the big bear', 'a cute cat', 'a small dog', 'the cute cat', 'the small dog', 'the bear over there', 'a bear outside the house', 'the cat in that house', 'the cat over there', 'a cat in that house', 'the dog outside the house', 'a dog outside the house', 'the dog over there', 'some dogs outside the house', 'some bears in the forest', 'a bear', 'the bear', 'a ball', 'the ball', 'a book', 'the book', 'a cup', 'the cup', 'a toy', 'the toys', 'a red ball', 'the red ball', 'a nice book', 'some books', 'some balls', 'a toy in the room', 'the toys in the room', 'the cup on the table', 'a cup on the table', 'some milk', 'some food', 'the food', 'the milk', 'the food on the table', 'the balls in the room', 'a ball in that room', 'the cheerios', 'the legos', 'a lego', 'the pencils', '

In [73]:
conflict_set = defaultdict(OrderedSet)
for word in CONTENT_LEMMAS:
    # print(word)
    # conflict_set = OrderedSet()
    for uw in unique_words:
        if word in uw:
            conflict_set[word].add(uw)

In [74]:
conflict_set

defaultdict(ordered_set.OrderedSet,
            {'book': OrderedSet(['a book', 'the book', 'a nice book', 'some books']),
             'cup': OrderedSet(['a cup', 'the cup', 'the cup on the table', 'a cup on the table']),
             'pencil': OrderedSet(['the pencils', 'a pencil', 'the pencils in the box']),
             'ball': OrderedSet(['a ball', 'the ball', 'a red ball', 'the red ball', 'some balls', 'the balls in the room', 'a ball in that room']),
             'bear': OrderedSet(['a big bear', 'the big bear', 'the bear over there', 'a bear outside the house', 'some bears in the forest', 'a bear', 'the bear']),
             'dog': OrderedSet(['a dog', 'the dog', 'a small dog', 'the small dog', 'the dog outside the house', 'a dog outside the house', 'the dog over there', 'some dogs outside the house']),
             'cat': OrderedSet(['a cat', 'the cat', 'a cute cat', 'the cute cat', 'the cat in that house', 'the cat over there', 'a cat in that house']),
             'lego': Ord

In [75]:
conflict_words = defaultdict(set)
for word, conflicts in conflict_set.items():
    for c in conflicts:
        conflict_words[c].update(conflicts)

conflict_words = {k: list(v) for k, v in conflict_words.items()}

In [76]:
conflict_words

{'a book': ['the book', 'a nice book', 'some books', 'a book'],
 'the book': ['the book', 'a nice book', 'some books', 'a book'],
 'a nice book': ['the book', 'a nice book', 'some books', 'a book'],
 'some books': ['the book', 'a nice book', 'some books', 'a book'],
 'a cup': ['a cup on the table', 'the cup on the table', 'the cup', 'a cup'],
 'the cup': ['a cup on the table', 'the cup on the table', 'the cup', 'a cup'],
 'the cup on the table': ['a cup on the table',
  'the cup on the table',
  'the cup',
  'a cup'],
 'a cup on the table': ['a cup on the table',
  'the cup on the table',
  'the cup',
  'a cup'],
 'the pencils': ['a pencil', 'the pencils', 'the pencils in the box'],
 'a pencil': ['a pencil', 'the pencils', 'the pencils in the box'],
 'the pencils in the box': ['a pencil',
  'the pencils',
  'the pencils in the box'],
 'a ball': ['the ball',
  'some balls',
  'the red ball',
  'a ball in that room',
  'the balls in the room',
  'a red ball',
  'a ball'],
 'the ball': ['

In [57]:
# conflict_words = {}
# for word, conflicts in conflict_set.items():
#     for c in conflicts:
#         conflict_words[c] = list(conflicts - {c})
    
# print(conflict_words)

In [16]:
features = defaultdict(OrderedSet)

for k,v in lexicon.items():
    for word in v:
        features[word].add(k)

In [11]:
features['teddy']

OrderedSet(['inanimate', 'definite', 'theme', 'recipient', 'unmarked', 'short', 'nominal'])

In [16]:
lexicon['inanimate'].intersection(lexicon['definite']).intersection(lexicon['recipient']).intersection(lexicon['short']).intersection(lexicon['unmarked'])

OrderedSet(['it', 'teddy'])