## Generating data for experiments

In [1]:
import inflect
from pattern.en import singularize, pluralize
import glob
import re
from collections import defaultdict
import csv

In [2]:
inflector = inflect.engine()

In [3]:
def is_plural(plural_form):
    singular_form = singularize(plural_form)
    plural = True if plural_form != singular_form else False
    return plural

## Rosch (1975)

**Paper:** Cognitive representations of semantic categories

In [4]:
categories = ["furniture", "fruit", "vehicle", "weapon", "vegetable", 
              "tool", "bird", "sport", "toy", "clothing"]

In [8]:
stimuli = []
items = defaultdict(list)
for file in glob.glob("../data/rosch1975/*.txt"):
    category = re.search(r'(?<=5\/)(.*)(?=\.txt)', file).group(1)
    with open(file, "r") as f:
        for rank, word in enumerate(f):
            word = word.strip()
            items[category].append(word)
            if category == "sport":
                preamble = word
                stimulus = "is a sport."
#                 stimulus = f"{word} is a sport."
            else:
                if is_plural(word):
                    if category == "clothing":
                        preamble = word
                        stimulus = "are clothes."
#                         stimulus = f"{word} are clothes."
                    else:
                        preamble = word
                        stimulus = f"are {pluralize(category)}."
#                         stimulus = f"{word} are {pluralize(category)}."
                else:
                    if category == "furniture" or category == "clothing":
                        preamble = inflector.a(word)
                        stimulus = f"is a type of {category}."
#                         stimulus = f"{inflector.a(word)} is a type of {category}."
                    elif category == "vegetable" or category == "fruit":
                        preamble = word
                        stimulus = f"is a {category}."
#                         stimulus = f"{word} is a {category}."
                    else:
                        preamble = inflector.a(word)
                        stimulus = f"is a {category}."
#                         stimulus = f"{inflector.a(word)} is a {category}."
            stimuli.append((preamble, stimulus, word, category, rank+1))

In [9]:
len(stimuli)

565

In [10]:
stimuli

[('a doll', 'is a toy.', 'doll', 'toy', 1),
 ('a top', 'is a toy.', 'top', 'toy', 2),
 ('a jack-in-the-box', 'is a toy.', 'jack-in-the-box', 'toy', 3),
 ('a toy soldier', 'is a toy.', 'toy soldier', 'toy', 4),
 ('a yo-yo', 'is a toy.', 'yo-yo', 'toy', 5),
 ('a block', 'is a toy.', 'block', 'toy', 6),
 ('marbles', 'are toys.', 'marbles', 'toy', 7),
 ('a rattle', 'is a toy.', 'rattle', 'toy', 8),
 ('a stuffed animal', 'is a toy.', 'stuffed animal', 'toy', 9),
 ('a water pistol', 'is a toy.', 'water pistol', 'toy', 10),
 ('a teddy bear', 'is a toy.', 'teddy bear', 'toy', 11),
 ('a rocking horse', 'is a toy.', 'rocking horse', 'toy', 12),
 ('a doll house', 'is a toy.', 'doll house', 'toy', 13),
 ('a ball', 'is a toy.', 'ball', 'toy', 14),
 ('jacks', 'are toys.', 'jacks', 'toy', 15),
 ('paper dolls', 'are toys.', 'paper dolls', 'toy', 16),
 ('an erector set', 'is a toy.', 'erector set', 'toy', 17),
 ('a hula hoop', 'is a toy.', 'hula hoop', 'toy', 18),
 ('a jump rope', 'is a toy.', 'jump ro

In [13]:
feature_stimuli = []
for file in glob.glob("../data/hampton_features/*.txt"):
    category = re.search(r'(?<=features\/)(.*)(?=\.txt)', file).group(1)
    instances = items[category]
    with open(file, "r") as f:
        for i, entry in enumerate(f):
            feature, production_frequency = entry.split(";")
            for word in instances:
                if category == "sport":
                    feature_stimuli.append([word, f"{feature}.", word, category, i+1])
#                     feature_stimuli.append([f"{word} {feature}.", word, category, i+1])
#                     print(f"{j}_{i}, {word} {feature}.")
                elif category == "bird":
                    feature_stimuli.append([inflector.a(word), f"{feature}.", word, category, i+1])
                else:
                    if is_plural(word):
                        feature_words = feature.split(" ")
                        verb = inflector.plural_verb(feature_words[0])
                        feature_sentence = " ".join([verb] + feature_words[1:])
                        feature_stimuli.append([word, f"{feature_sentence}.", word, category, i+1])
#                         feature_stimuli.append([f"{word} {feature_sentence}.", word, category, i+1])
#                         print(f"{j}_{i}, {word} {feature_sentence}.")
                    else:
                        if category == "vegetable" or category == "fruit":
                            feature_stimuli.append([word, f"{feature}.", word, category, i+1])
#                             feature_stimuli.append([f"{word} {feature}.", word, category, i+1])
                        else:
                            feature_stimuli.append([inflector.a(word), f"{feature}.", word, category, i+1])
#                             feature_stimuli.append([f"{inflector.a(word)} {feature}.", word, category, i+1])

In [14]:
feature_stimuli

[['a robin', 'is alive.', 'robin', 'bird', 1],
 ['a sparrow', 'is alive.', 'sparrow', 'bird', 1],
 ['a bluejay', 'is alive.', 'bluejay', 'bird', 1],
 ['a bluebird', 'is alive.', 'bluebird', 'bird', 1],
 ['a canary', 'is alive.', 'canary', 'bird', 1],
 ['a blackbird', 'is alive.', 'blackbird', 'bird', 1],
 ['a dove', 'is alive.', 'dove', 'bird', 1],
 ['a lark', 'is alive.', 'lark', 'bird', 1],
 ['a swallow', 'is alive.', 'swallow', 'bird', 1],
 ['a parakeet', 'is alive.', 'parakeet', 'bird', 1],
 ['an oriole', 'is alive.', 'oriole', 'bird', 1],
 ['a mockingbird', 'is alive.', 'mockingbird', 'bird', 1],
 ['a redbird', 'is alive.', 'redbird', 'bird', 1],
 ['a wren', 'is alive.', 'wren', 'bird', 1],
 ['a finch', 'is alive.', 'finch', 'bird', 1],
 ['a starling', 'is alive.', 'starling', 'bird', 1],
 ['a cardinal', 'is alive.', 'cardinal', 'bird', 1],
 ['an eagle', 'is alive.', 'eagle', 'bird', 1],
 ['a hummingbird', 'is alive.', 'hummingbird', 'bird', 1],
 ['a seagull', 'is alive.', 'seagul

In [15]:
with open("../data/rosch1975/rosch1975.csv", "w") as f:
    writer = csv.writer(f)
    writer.writerow(["preamble", "stimulus", "item", "category", "rank"])
    writer.writerows(stimuli)

In [16]:
with open("../data/rosch1975/rosch1975_features.csv", "w") as f:
    writer = csv.writer(f)
    writer.writerow(["preamble", "stimulus", "item", "category", "feature"])
    writer.writerows(feature_stimuli)