In [103]:
import json
import utils
import random
import pathlib

from ordered_set import OrderedSet
from collections import defaultdict

In [13]:
def read_lexicon(path):
    with open(path, "r") as f:
        lexicon = json.load(f)
        lexicon = {k: OrderedSet(v) for k, v in lexicon.items()}
        long = OrderedSet(
            [
                x
                for x in lexicon["animate"].union(lexicon["inanimate"])
                if len(x.split(" ")) > 2
            ]
        )
        short = OrderedSet(
            [
                x
                for x in lexicon["animate"].union(lexicon["inanimate"])
                if len(x.split(" ")) <= 2
            ]
        )
        nominals = OrderedSet(
            [
                x
                for x in lexicon["animate"].union(lexicon["inanimate"])
                - lexicon["pronoun"]
            ]
        )
        lexicon.update({"long": long, "short": short, "nominal": nominals})
    return lexicon

In [14]:
adaptation_lexicon = read_lexicon("../data/lexicon/adaptation-final.json")

In [55]:
word2feature = defaultdict(OrderedSet)
for feature, words in adaptation_lexicon.items():
    for word in words:
        word2feature[word].add(feature)

In [62]:
word2feature = dict(word2feature)
word2feature['the teddy']

OrderedSet(['inanimate', 'definite', 'theme', 'recipient', 'marked', 'short', 'nominal'])

In [30]:
inanimate_nominals = adaptation_lexicon["inanimate"].intersection(adaptation_lexicon["nominal"]).intersection(adaptation_lexicon["short"]).intersection(adaptation_lexicon["marked"])

animate_nominals = adaptation_lexicon["animate"].intersection(adaptation_lexicon["nominal"]).intersection(adaptation_lexicon["short"]) - OrderedSet(["cat", "dog", "bear"])

In [78]:
adaptation_lexicon['animate'].intersection(adaptation_lexicon['short']).intersection(adaptation_lexicon['definite'])

OrderedSet(['me', 'her', 'him', 'them', 'us', 'mommy', 'grandpa', 'grandma', 'the cat', 'the dog', 'elmo', 'bert', 'daddy', 'cat', 'dog', 'the bear', 'bear'])

In [73]:
adaptation_lexicon['inanimate'].intersection(adaptation_lexicon['unmarked'])

OrderedSet(['it', 'something', 'cheerios', 'teddy', 'cup', 'book', 'toys in the room', 'cup on the table', 'food on the table', 'balls in the room', 'legos', 'ball', 'store', 'pencils', 'dolly'])

In [100]:
pronoun_items = {
    'him': ["daddy", "grandpa", "ryan", "john", "teddy", "the dog", "the cat"],
    'her': ["mommy", "grandma", "sarah", "jessica", "the bear", "the cat", "the doll"],
    'me': ['me'],
    'us': ['us'],
    'them': ['some balls', 'some toys', 'the cats', 'the dogs', 'the dolls', 'someone', 'some people'],
    'it': ['the doll', 'the book', 'the ball', 'a ball', 'a doll', 'a book']
}

def pronoun2name(name):
    if name == "he":
        return random.choice(["daddy", "sam"])
    elif name == "she":
        return random.choice(["mommy", "lucy", "nonna"])

In [89]:
adaptation1 = utils.read_jsonl("../data/experiments/single_stimuli_dative_simulation_valtest_vbd_no_discourse/adaptation.jsonl")
adaptation2 = utils.read_jsonl("../data/experiments/single_stimuli_dative_simulation_valtest_vbd_no_discourse2/adaptation.jsonl")

adaptation3 = utils.read_jsonl("../data/experiments/single_stimuli_dative_simulation_valtest_vbd_no_discourse3/adaptation.jsonl")

adaptation3_id2item = {a['item']: a for a in adaptation3}
adaptation3_ids = adaptation3_id2item.keys()

adaptation12 = adaptation1 + adaptation2

adaptation = []
for a in adaptation12:
    if a['item'] in adaptation3_ids:
        adaptation.append(adaptation3_id2item[a['item']])
    else:
        adaptation.append(a)

len(adaptation)

2280

In [106]:
# control:
random.seed(42)
adaptation_control = []
for a in adaptation:
    a_copy = a.copy()
    if a['agent'] in ['he', 'she']:
        agent = pronoun2name(a['agent'])
    else:
        agent = a['agent']
    a_copy['sentence'] = f"look {agent} is walking around . {a_copy['sentence']}"
    a_copy['sampled_agent'] = agent
    adaptation_control.append(a_copy)

In [107]:
pathlib.Path("../data/experiments/single_stimuli_dative_simulation_valtest_vbd_discourse_control").mkdir(parents=True, exist_ok=True)
with open("../data/experiments/single_stimuli_dative_simulation_valtest_vbd_discourse_control/adaptation.jsonl", "w") as f:
    for a in adaptation_control:
        f.write(json.dumps(a) + "\n")