In [1]:
import argparse
import config
# import inflect
import json
import math
# import os
import random
import utils
import pathlib

from collections import defaultdict
from string import Template
from dataclasses import dataclass
from itertools import product
from ordered_set import OrderedSet
from tqdm import trange, tqdm

In [2]:
def write_jsonl(data, path):
    with open(path, "w") as f:
        for d in data:
            f.write(json.dumps(d) + "\n")

In [3]:
@dataclass
class Dative:
    dative: str
    verb: str
    agent: str
    theme: str
    recipient: str

    def generate(self, marked_theme=False, marked_recipient=False):
        if self.dative == "do":
            template = Template("$agent $verb $recipient $theme .")
        elif self.dative == "pp":
            template = Template("$agent $verb $theme to $recipient .")

        if marked_theme:
            self.theme = f"the {self.theme}"

        if marked_recipient:
            self.recipient = f"the {self.recipient}"

        self.sentence = template.substitute(
            agent=self.agent, verb=self.verb, theme=self.theme, recipient=self.recipient
        )
        return self.sentence

    def givenness(self, discourse_sentence=None):
        return NotImplementedError

In [4]:
LEXICON = {
    "animate": ["them", "the cat", "the dog", "the bear", "me", "us", "him", "her", "mommy", "daddy", "grandpa", "grandma"],
    "inanimate": ["it", "the ball", "the cup", "the book"],
    "pronoun": ["it", "them", "me", "us", "him", "her"],
    "nominal": ["the cat", "the dog", "the bear", "the ball", "the cup", "the book", "mommy", "daddy", "grandpa", "grandma"],
    "theme": ["it", "them", "the cat", "the dog", "the bear", "the ball", "the cup", "the book"],
    "recipient": ["me", "us", "him", "her", "mommy", "daddy", "grandpa", "grandma"],
}

INVERSE_LEXICON = defaultdict(list)
for key, values in LEXICON.items():
    for value in values:
        INVERSE_LEXICON[value].append(key)

THEME_FEATURES = {
    "it": {"article": "it", "reference": "it", "is": "is", "was": "was", "belongs": "belongs"},
    "them": {"article": "they", "reference": "they", "is": "are", "was": "were", "belongs": "belong"},
    "the ball": {"article": "the ball", "reference": "it", "is": "is", "belongs": "belongs", "was": "was"},
    "the cup": {"article": "the cup", "reference": "it", "is": "is", "belongs": "belongs", "was": "was"},
    "the book": {"article": "the book", "reference": "it", "is": "is", "belongs": "belongs", "was": "was"},
    "the cat": {"article": "the cat", "reference": "it", "is": "is", "belongs": "belongs", "was": "was"},
    "the dog": {"article": "the dog", "reference": "it", "is": "is", "belongs": "belongs", "was": "was"},
    "the bear": {"article": "the bear", "reference": "it", "is": "is", "belongs": "belongs", "was": "was"},
}

RECIPIENT_FEATURES = {
    "me": {"possessive": "mine", "pronoun-obj": "me", "pronoun-subj": "I", "has": "have", "own": "own", "think": "think", "was": "was"},
    "us": {"possessive": "ours", "pronoun-obj": "us", "pronoun-subj": "we", "has": "have", "own": "own", "think": "think", "was": "were"},
    "him": {"possessive": "his", "pronoun-obj": "him", "pronoun-subj": "he", "has": "has", "own": "owns", "think": "thinks", "was": "was"},
    "her": {"possessive": "hers", "pronoun-obj": "her", "pronoun-subj": "she", "has": "has", "own": "owns", "think": "thinks", "was": "was"},
    "mommy": {"possessive": "hers", "pronoun-obj": "her", "pronoun-subj": "she", "has": "has", "own": "owns", "think": "thinks", "was": "was"},
    "daddy": {"possessive": "his", "pronoun-obj": "him", "pronoun-subj": "he", "has": "has", "own": "owns", "think": "thinks", "was": "was"},
    "grandpa": {"possessive": "his", "pronoun-obj": "him", "pronoun-subj": "he", "has": "has", "own": "owns", "think": "thinks", "was": "was"},
    "grandma": {"possessive": "hers", "pronoun-obj": "her", "pronoun-subj": "she", "has": "has", "own": "owns", "think": "thinks", "was": "was"},
}

AGENTS = ["sam", "ryan", "john", "katie", "lucy", "sarah"]

In [5]:
INVERSE_LEXICON

defaultdict(list,
            {'them': ['animate', 'pronoun', 'theme'],
             'the cat': ['animate', 'nominal', 'theme'],
             'the dog': ['animate', 'nominal', 'theme'],
             'the bear': ['animate', 'nominal', 'theme'],
             'me': ['animate', 'pronoun', 'recipient'],
             'us': ['animate', 'pronoun', 'recipient'],
             'him': ['animate', 'pronoun', 'recipient'],
             'her': ['animate', 'pronoun', 'recipient'],
             'mommy': ['animate', 'nominal', 'recipient'],
             'daddy': ['animate', 'nominal', 'recipient'],
             'grandpa': ['animate', 'nominal', 'recipient'],
             'grandma': ['animate', 'nominal', 'recipient'],
             'it': ['inanimate', 'pronoun', 'theme'],
             'the ball': ['inanimate', 'nominal', 'theme'],
             'the cup': ['inanimate', 'nominal', 'theme'],
             'the book': ['inanimate', 'nominal', 'theme']})

In [6]:
def specified(template, theme, recipient):
    tf = THEME_FEATURES[theme]
    rf = RECIPIENT_FEATURES[recipient]
    if template == 1:
        return f"{tf['reference']} {tf['is']} {rf['possessive']} now ."
    if template == 2:
        return f"{tf['reference']} {tf['belongs']} to {rf['pronoun-obj']} now ."
    if template == 3:
        return f"{rf['pronoun-subj']} {rf['has']} {tf['reference']} now ."
    if template == 4:
        return f"{rf['pronoun-subj']} {rf['own']} {theme} now ."
    if template == 5:
        return f"{rf['pronoun-subj']} was happy to get {theme} ."
    if template == 6:
        return f"{rf['pronoun-subj']} took {theme} to the room ."

def unspecified(template, theme, recipient):
    tf = THEME_FEATURES[theme]
    rf = RECIPIENT_FEATURES[recipient]
    if template == 1:
        return f"{tf['reference']} {tf['was']} very nice ."
    if template == 2:
        return f"{tf['reference']} {tf['was']} nice and awesome ."
    if template == 3:
        return f"{rf['pronoun-subj']} {rf['think']} {tf['reference']} {tf['was']} nice ."
    if template == 4:
        return f"{rf['pronoun-subj']} {rf['think']} {tf['article']} {tf['was']} awesome ."
    if template == 5:
        return f"{rf['pronoun-subj']} thought {tf['article']} {tf['was']} very nice ."
    if template == 6:
        return f"{rf['pronoun-subj']} thought {tf['article']} {tf['was']} so awesome ."


In [7]:
items = list(product(LEXICON['theme'], LEXICON['recipient']))

random.seed(42)

stimuli = []

idx = 0

for t in range(1, 6):
    # print(f"OUTSIDE: {t}")
    for i, (theme, recipient) in enumerate(items):
        # print(f"INSIDE: {t}")
        theme_animacy = INVERSE_LEXICON[theme][0]
        recipient_animacy = INVERSE_LEXICON[recipient][0]
        theme_pronominality = INVERSE_LEXICON[theme][1]
        recipient_pronominality = INVERSE_LEXICON[recipient][1]

        agent = random.sample(AGENTS, 1)[0]
        dative = Dative("pp", "[verb]", agent, theme, recipient)
        dative_sentence = dative.generate()

        spec_continuation = specified(t, theme, recipient)
        unspec_continuation = unspecified(t, theme, recipient)

        full_sentence_spec = f"{dative_sentence} {spec_continuation}"
        full_sentence_unspec = f"{dative_sentence} {unspec_continuation}"

        stimuli.append({
            "item": idx,
            "hypothesis_instance": t,
            "hypothesis_id": t,
            "agent": agent,
            "template": t,
            "theme": theme,
            "recipient": recipient,
            "theme_animacy": theme_animacy,
            "recipient_animacy": recipient_animacy,
            "theme_pronominality": theme_pronominality,
            "recipient_pronominality": recipient_pronominality,
            "dative": "pp",
            "dative_sentence": dative_sentence,
            "specificity": "specified",
            "sentence": full_sentence_spec
        })

        stimuli.append({
            "item": idx,
            "hypothesis_instance": t,
            "hypothesis_id": t,
            "agent": agent,
            "template": t,
            "theme": theme,
            "recipient": recipient,
            "theme_animacy": theme_animacy,
            "recipient_animacy": recipient_animacy,
            "theme_pronominality": theme_pronominality,
            "recipient_pronominality": recipient_pronominality,
            "dative": "pp",
            "dative_sentence": dative_sentence,
            "specificity": "unspecified",
            "sentence": full_sentence_unspec
        })

        idx += 1

In [8]:
len(stimuli)

640

In [9]:
# write stimuli to jsonl
pathlib.Path("../data/experiments/single_stimuli_dative_simulation_valtest_vbd_possession_specificity/").mkdir(exist_ok=True, parents=True)

with open("../data/experiments/single_stimuli_dative_simulation_valtest_vbd_possession_specificity/adaptation.jsonl", "w") as f:
    for item in stimuli:
        f.write(json.dumps(item) + "\n")

generalization = utils.read_jsonl("../data/experiments/single_stimuli_dative_simulation_valtest_vbd_no_markedness_discourse_control/generalization.jsonl")

write_jsonl(generalization, f"../data/experiments/single_stimuli_dative_simulation_valtest_vbd_possession_specificity/generalization.jsonl")

In [10]:
specified_stimuli = []
unspecified_stimuli = []

for item in stimuli:
    if item["specificity"] == "specified":
        specified_stimuli.append(item)
    else:
        unspecified_stimuli.append(item)

In [11]:
# write stimuli to jsonl
pathlib.Path("../data/experiments/single_stimuli_dative_simulation_valtest_vbd_possession_specified/").mkdir(exist_ok=True, parents=True)

with open("../data/experiments/single_stimuli_dative_simulation_valtest_vbd_possession_specified/adaptation.jsonl", "w") as f:
    for item in specified_stimuli:
        f.write(json.dumps(item) + "\n")

# generalization = utils.read_jsonl("../data/experiments/single_stimuli_dative_simulation_valtest_vbd_no_markedness_discourse_control/generalization.jsonl")

write_jsonl(generalization, f"../data/experiments/single_stimuli_dative_simulation_valtest_vbd_possession_specified/generalization.jsonl")


pathlib.Path("../data/experiments/single_stimuli_dative_simulation_valtest_vbd_possession_unspecified/").mkdir(exist_ok=True, parents=True)

with open("../data/experiments/single_stimuli_dative_simulation_valtest_vbd_possession_unspecified/adaptation.jsonl", "w") as f:
    for item in unspecified_stimuli:
        f.write(json.dumps(item) + "\n")

# generalization = utils.read_jsonl("../data/experiments/single_stimuli_dative_simulation_valtest_vbd_no_markedness_discourse_control/generalization.jsonl")

write_jsonl(generalization, f"../data/experiments/single_stimuli_dative_simulation_valtest_vbd_possession_unspecified/generalization.jsonl")