In [1]:
import json
import utils
import random
import pathlib

from ordered_set import OrderedSet
from collections import defaultdict

In [2]:
def read_lexicon(path):
    with open(path, "r") as f:
        lexicon = json.load(f)
        lexicon = {k: OrderedSet(v) for k, v in lexicon.items()}
        long = OrderedSet(
            [
                x
                for x in lexicon["animate"].union(lexicon["inanimate"])
                if len(x.split(" ")) > 2
            ]
        )
        short = OrderedSet(
            [
                x
                for x in lexicon["animate"].union(lexicon["inanimate"])
                if len(x.split(" ")) <= 2
            ]
        )
        nominals = OrderedSet(
            [
                x
                for x in lexicon["animate"].union(lexicon["inanimate"])
                - lexicon["pronoun"]
            ]
        )
        lexicon.update({"long": long, "short": short, "nominal": nominals})
    return lexicon

In [3]:
adaptation_lexicon = read_lexicon("../data/lexicon/adaptation-final-nomarkedness.json")

In [4]:
word2feature = defaultdict(OrderedSet)
for feature, words in adaptation_lexicon.items():
    for word in words:
        word2feature[word].add(feature)

In [5]:
word2feature = dict(word2feature)
word2feature['the chair']

OrderedSet(['inanimate', 'recipient', 'theme', 'definite', 'short', 'nominal'])

In [6]:
for w,f in word2feature.items():
    if "agent" not in f:
        if len(f) < 5:
            print(w, f)

In [7]:
inanimate_nominals = adaptation_lexicon["inanimate"].intersection(adaptation_lexicon["nominal"]).intersection(adaptation_lexicon["short"])

animate_nominals = adaptation_lexicon["animate"].intersection(adaptation_lexicon["nominal"]).intersection(adaptation_lexicon["short"]) - OrderedSet(["cat", "dog", "bear"])

In [8]:
inanimate_nominals

OrderedSet(['a ball', 'the ball', 'a book', 'the book', 'a cup', 'the cup', 'a toy', 'the toys', 'some books', 'some balls', 'some milk', 'some food', 'the food', 'the milk', 'the cheerios', 'the legos', 'a lego', 'the pencils', 'a pencil', 'a chair', 'the chair', 'some cheerios'])

In [9]:
adaptation_lexicon['animate'].intersection(adaptation_lexicon['short']).intersection(adaptation_lexicon['definite'])

OrderedSet(['me', 'her', 'him', 'them', 'us', 'mommy', 'grandpa', 'grandma', 'the cat', 'the dog', 'daddy'])

In [10]:
# adaptation_lexicon['inanimate'].intersection(adaptation_lexicon['unmarked'])

In [11]:
pronoun_items = {
    'him': ["daddy", "grandpa", "ryan", "john", "teddy", "the dog", "the cat"],
    'her': ["mommy", "grandma", "sarah", "jessica", "the bear", "the cat", "the doll"],
    'me': ['me'],
    'us': ['us'],
    'them': ['some balls', 'some toys', 'the cats', 'the dogs', 'the dolls', 'someone', 'some people'],
    'it': ['the doll', 'the book', 'the ball', 'a ball', 'a doll', 'a book']
}

def pronoun2name(name, remove=[]):
    if name == "he":
        choices = [n for n in ["daddy", "john"] if n not in remove]
        return random.choice(choices)
    elif name == "she":
        choices = [n for n in ["mommy", "lucy"] if n not in remove]
        return random.choice(choices)

In [12]:
# adaptation1 = utils.read_jsonl("../data/experiments/single_stimuli_dative_simulation_valtest_vbd_no_discourse/adaptation.jsonl")
# adaptation2 = utils.read_jsonl("../data/experiments/single_stimuli_dative_simulation_valtest_vbd_no_discourse2/adaptation.jsonl")

# adaptation3 = utils.read_jsonl("../data/experiments/single_stimuli_dative_simulation_valtest_vbd_no_discourse3/adaptation.jsonl")

# adaptation3_id2item = {a['item']: a for a in adaptation3}
# adaptation3_ids = adaptation3_id2item.keys()

# adaptation12 = adaptation1 + adaptation2

# adaptation = []
# for a in adaptation12:
#     if a['item'] in adaptation3_ids:
#         adaptation.append(adaptation3_id2item[a['item']])
#     else:
#         adaptation.append(a)

# len(adaptation)

adaptation = utils.read_jsonl("../data/experiments/single_stimuli_dative_simulation_valtest_vbd_no_markedness_no_discourse/adaptation.jsonl")
len(adaptation)

1620

In [13]:
# control:
random.seed(42)
adaptation_control = []
neutral_control = []
for a in adaptation:
    a_copy = a.copy()
    n_copy = a.copy()
    if a['agent'] in ['he', 'she']:
        agent = pronoun2name(a['agent'], [a['theme'], a['recipient']])
    else:
        agent = a['agent']
    a_copy['sentence'] = f"look {agent} is walking around . {a_copy['sentence']}"
    a_copy['sampled_agent'] = agent
    n_copy['sentence'] = f"it was a nice day . {n_copy['sentence']}"
    n_copy['sampled_agent'] = agent
    adaptation_control.append(a_copy)
    neutral_control.append(n_copy)

In [15]:
pathlib.Path("../data/experiments/single_stimuli_dative_simulation_valtest_vbd_no_markedness_discourse_control").mkdir(parents=True, exist_ok=True)
with open("../data/experiments/single_stimuli_dative_simulation_valtest_vbd_no_markedness_discourse_control/adaptation.jsonl", "w") as f:
    for a in adaptation_control:
        f.write(json.dumps(a) + "\n")

pathlib.Path("../data/experiments/single_stimuli_dative_simulation_valtest_vbd_no_markedness_neutral_discourse").mkdir(parents=True, exist_ok=True)
with open("../data/experiments/single_stimuli_dative_simulation_valtest_vbd_no_markedness_neutral_discourse/adaptation.jsonl", "w") as f:
    for a in neutral_control:
        f.write(json.dumps(a) + "\n")