## Generating data for experiments

In [2]:
import inflect
from pattern.en import singularize, pluralize
import glob
import re
from collections import defaultdict
import csv

In [3]:
inflector = inflect.engine()

In [4]:
def is_plural(plural_form):
    singular_form = singularize(plural_form)
    plural = True if plural_form != singular_form else False
    return plural

## Rosch (1975)

**Paper:** Cognitive representations of semantic categories

In [5]:
categories = ["furniture", "fruit", "vehicle", "weapon", "vegetable", 
              "tool", "bird", "sport", "toy", "clothing"]

In [6]:
stimuli = []
items = defaultdict(list)
for file in glob.glob("../data/rosch1975/*.txt"):
    category = re.search(r'(?<=5\/)(.*)(?=\.txt)', file).group(1)
    with open(file, "r") as f:
        for rank, word in enumerate(f):
            word = word.strip()
            items[category].append(word)
            if category == "sport":
                preamble = word
                stimulus = "is a sport."
#                 stimulus = f"{word} is a sport."
            else:
                if is_plural(word):
                    if category == "clothing":
                        preamble = word
                        stimulus = "are clothes."
#                         stimulus = f"{word} are clothes."
                    else:
                        preamble = word
                        stimulus = f"are {pluralize(category)}."
#                         stimulus = f"{word} are {pluralize(category)}."
                else:
                    if category == "furniture" or category == "clothing":
                        preamble = inflector.a(word)
                        stimulus = f"is a type of {category}."
#                         stimulus = f"{inflector.a(word)} is a type of {category}."
                    elif category == "vegetable" or category == "fruit":
                        preamble = word
                        stimulus = f"is a {category}."
#                         stimulus = f"{word} is a {category}."
                    else:
                        preamble = inflector.a(word)
                        stimulus = f"is a {category}."
#                         stimulus = f"{inflector.a(word)} is a {category}."
            stimuli.append((preamble.capitalize(), stimulus, word, category, rank+1))

In [7]:
alternate_stimuli = []
not_plural = ["gas", "glass", "teargas", "asparagus", "watercress"]
a_determiner = ["chest of drawers", "bus", "albatross", "dress"]
items = defaultdict(list)
for file in glob.glob("../data/rosch1975/*.txt"):
    category = re.search(r'(?<=5\/)(.*)(?=\.txt)', file).group(1)
    with open(file, "r") as f:
        for rank, word in enumerate(f):
            word = word.strip()
            items[category].append(word)
            if category == "sport":
                preamble = f"{word} is a"
                stimulus = "sport."
#                 stimulus = f"{word} is a sport."
            else:
                if is_plural(word):
                    if category == "clothing":
                        preamble = f"{word} are"
                        stimulus = "clothes."
                        if word == "dress":
                            preamble = "a dress is a"
                            stimulus = "clothing."
                    elif word in not_plural:
                        preamble = f"{word} is a"
                        stimulus = f"{category}."
                    elif word in a_determiner:
                        preamble = f"{inflector.a(word)} is a"
                        stimulus = f"{category}."
                    else:
                        preamble = f"{word} are"
                        stimulus = f"{pluralize(category)}."
#                         stimulus = f"{word} are {pluralize(category)}."
                else:
                    if category == "furniture":
                        preamble = f"{inflector.a(word)} is a type of"
                        stimulus = f"{category}."
                        if word in a_determiner:
                            preamble = f"{inflector.a(word)} is type of"
                            stimulus = f"{category}."
                    elif category == "vegetable" or category == "fruit":
                        preamble = f"{word} is a"
                        stimulus = f"{category}."
#                         stimulus = f"{word} is a {category}."
                    else:
                        preamble = f"{inflector.a(word)} is a"
                        stimulus = f"{category}."
#                         stimulus = f"{inflector.a(word)} is a {category}."
            alternate_stimuli.append((preamble.capitalize(), stimulus, word, category, rank+1))

In [41]:
'dress' in a_determiner

True

In [8]:
alternate_stimuli

[('A gun is a', 'weapon.', 'gun', 'weapon', 1),
 ('A pistol is a', 'weapon.', 'pistol', 'weapon', 2),
 ('A revolver is a', 'weapon.', 'revolver', 'weapon', 3),
 ('A machine gun is a', 'weapon.', 'machine gun', 'weapon', 4),
 ('A rifle is a', 'weapon.', 'rifle', 'weapon', 5),
 ('A switchblade is a', 'weapon.', 'switchblade', 'weapon', 6),
 ('A knife is a', 'weapon.', 'knife', 'weapon', 7),
 ('A dagger is a', 'weapon.', 'dagger', 'weapon', 8),
 ('A shotgun is a', 'weapon.', 'shotgun', 'weapon', 9),
 ('A sword is a', 'weapon.', 'sword', 'weapon', 10),
 ('A bomb is a', 'weapon.', 'bomb', 'weapon', 11),
 ('A hand grenade is a', 'weapon.', 'hand grenade', 'weapon', 12),
 ('An atom bomb is a', 'weapon.', 'Atom bomb', 'weapon', 13),
 ('A bayonet is a', 'weapon.', 'bayonet', 'weapon', 14),
 ('A spear is a', 'weapon.', 'spear', 'weapon', 15),
 ('A bazooka is a', 'weapon.', 'bazooka', 'weapon', 16),
 ('A cannon is a', 'weapon.', 'cannon', 'weapon', 17),
 ('A bow and arrow is a', 'weapon.', 'bow a

In [9]:
with open("../data/rosch1975/rosch1975_alternate.csv", "w") as f:
    writer = csv.writer(f)
    writer.writerow(["preamble", "stimulus", "item", "category", "rank"])
    writer.writerows(alternate_stimuli)

In [None]:
stimuli

In [20]:
feature_stimuli = []
for file in glob.glob("../data/hampton_features/*.txt"):
    category = re.search(r'(?<=features\/)(.*)(?=\.txt)', file).group(1)
    instances = items[category]
    with open(file, "r") as f:
        for i, entry in enumerate(f):
            feature, production_frequency = entry.split(";")
            for word in instances:
                if category == "sport":
                    feature_stimuli.append([word.capitalize(), f"{feature}.", word, category, i+1])
#                     feature_stimuli.append([f"{word} {feature}.", word, category, i+1])
#                     print(f"{j}_{i}, {word} {feature}.")
                elif category == "bird":
                    feature_stimuli.append([inflector.a(word).capitalize(), f"{feature}.", word, category, i+1])
                else:
                    if is_plural(word):
                        feature_words = feature.split(" ")
                        verb = inflector.plural_verb(feature_words[0])
                        feature_sentence = " ".join([verb] + feature_words[1:])
                        feature_stimuli.append([word.capitalize(), f"{feature_sentence}.", word, category, i+1])
#                         feature_stimuli.append([f"{word} {feature_sentence}.", word, category, i+1])
#                         print(f"{j}_{i}, {word} {feature_sentence}.")
                    else:
                        if category == "vegetable" or category == "fruit":
                            feature_stimuli.append([word.capitalize(), f"{feature}.", word, category, i+1])
#                             feature_stimuli.append([f"{word} {feature}.", word, category, i+1])
                        else:
                            feature_stimuli.append([inflector.a(word).capitalize(), f"{feature}.", word, category, i+1])
#                             feature_stimuli.append([f"{inflector.a(word)} {feature}.", word, category, i+1])

In [21]:
feature_stimuli

[['A robin', 'is alive.', 'robin', 'bird', 1],
 ['A sparrow', 'is alive.', 'sparrow', 'bird', 1],
 ['A bluejay', 'is alive.', 'bluejay', 'bird', 1],
 ['A bluebird', 'is alive.', 'bluebird', 'bird', 1],
 ['A canary', 'is alive.', 'canary', 'bird', 1],
 ['A blackbird', 'is alive.', 'blackbird', 'bird', 1],
 ['A dove', 'is alive.', 'dove', 'bird', 1],
 ['A lark', 'is alive.', 'lark', 'bird', 1],
 ['A swallow', 'is alive.', 'swallow', 'bird', 1],
 ['A parakeet', 'is alive.', 'parakeet', 'bird', 1],
 ['An oriole', 'is alive.', 'oriole', 'bird', 1],
 ['A mockingbird', 'is alive.', 'mockingbird', 'bird', 1],
 ['A redbird', 'is alive.', 'redbird', 'bird', 1],
 ['A wren', 'is alive.', 'wren', 'bird', 1],
 ['A finch', 'is alive.', 'finch', 'bird', 1],
 ['A starling', 'is alive.', 'starling', 'bird', 1],
 ['A cardinal', 'is alive.', 'cardinal', 'bird', 1],
 ['An eagle', 'is alive.', 'eagle', 'bird', 1],
 ['A hummingbird', 'is alive.', 'hummingbird', 'bird', 1],
 ['A seagull', 'is alive.', 'seagul

In [22]:
with open("../data/rosch1975/rosch1975.csv", "w") as f:
    writer = csv.writer(f)
    writer.writerow(["preamble", "stimulus", "item", "category", "rank"])
    writer.writerows(stimuli)

In [23]:
with open("../data/rosch1975/rosch1975_features.csv", "w") as f:
    writer = csv.writer(f)
    writer.writerow(["preamble", "stimulus", "item", "category", "feature"])
    writer.writerows(feature_stimuli)