## Generating data for experiments

In [138]:
import inflect
from pattern.en import singularize, pluralize
import glob
import re
from collections import defaultdict
import csv

In [24]:
inflector = inflect.engine()

In [69]:
def is_plural(plural_form):
    singular_form = singularize(plural_form)
    plural = True if plural_form != singular_form else False
    return plural

## Rosch (1975)

**Paper:** Cognitive representations of semantic categories

In [13]:
categories = ["furniture", "fruit", "vehicle", "weapon", "vegetable", 
              "tool", "bird", "sport", "toy", "clothing"]

In [26]:
inflector.plural_verb("has")

'have'

In [32]:
## Get word, see if plural, if plural then convert verb of feature to plural.
singularize("")

'past'

In [73]:
singularize("tennis")

'tenni'

In [131]:
stimuli = []
items = defaultdict(list)
for file in glob.glob("../data/rosch1975/*.txt"):
    category = re.search(r'(?<=5\/)(.*)(?=\.txt)', file).group(1)
    with open(file, "r") as f:
        for rank, word in enumerate(f):
            word = word.strip()
            items[category].append(word)
            if category == "sport":
                stimulus = f"{word} is a sport."
            else:
                if is_plural(word):
                    if category == "clothing":
                        stimulus = f"{word} are clothes."
                    else:
                        stimulus = f"{word} are {pluralize(category)}."
                else:
                    if category == "furniture" or category == "clothing":
                        stimulus = f"{inflector.a(word)} is a type of {category}."
                    elif category == "vegetable" or category == "fruit":
                        stimulus = f"{word} is a {category}."
                    else:
                        stimulus = f"{inflector.a(word)} is a {category}."
            stimuli.append((stimulus, word, category, rank+1))

In [136]:
len(stimuli)

566

In [133]:
feature_stimuli = []
for file in glob.glob("../data/hampton_features/*.txt"):
    category = re.search(r'(?<=features\/)(.*)(?=\.txt)', file).group(1)
    instances = items[category]
    with open(file, "r") as f:
        for i, entry in enumerate(f):
            feature, production_frequency = entry.split(";")
            for word in instances:
                if category == "sport":
                    feature_stimuli.append([f"{word} {feature}.", word, category, i+1])
#                     print(f"{j}_{i}, {word} {feature}.")
                else:
                    if is_plural(word):
                        feature_words = feature.split(" ")
                        verb = inflector.plural_verb(feature_words[0])
                        feature_sentence = " ".join([verb] + feature_words[1:])
                        feature_stimuli.append([f"{word} {feature_sentence}.", word, category, i+1])
#                         print(f"{j}_{i}, {word} {feature_sentence}.")
                    else:
                        if category == "vegetable" or category == "fruit":
                            feature_stimuli.append([f"{word} {feature}.", word, category, i+1])
                        else:
                            feature_stimuli.append([f"{inflector.a(word)} {feature}.", word, category, i+1])

In [141]:
feature_stimuli

[['a robin is alive.', 'robin', 'bird', 1],
 ['a sparrow is alive.', 'sparrow', 'bird', 1],
 ['a bluejay is alive.', 'bluejay', 'bird', 1],
 ['a bluebird is alive.', 'bluebird', 'bird', 1],
 ['a canary is alive.', 'canary', 'bird', 1],
 ['a blackbird is alive.', 'blackbird', 'bird', 1],
 ['a dove is alive.', 'dove', 'bird', 1],
 ['a lark is alive.', 'lark', 'bird', 1],
 ['a swallow is alive.', 'swallow', 'bird', 1],
 ['a parakeet is alive.', 'parakeet', 'bird', 1],
 ['an oriole is alive.', 'oriole', 'bird', 1],
 ['a mockingbird is alive.', 'mockingbird', 'bird', 1],
 ['a redbird is alive.', 'redbird', 'bird', 1],
 ['a wren is alive.', 'wren', 'bird', 1],
 ['a finch is alive.', 'finch', 'bird', 1],
 ['a starling is alive.', 'starling', 'bird', 1],
 ['a cardinal is alive.', 'cardinal', 'bird', 1],
 ['an eagle is alive.', 'eagle', 'bird', 1],
 ['a hummingbird is alive.', 'hummingbird', 'bird', 1],
 ['a seagull is alive.', 'seagull', 'bird', 1],
 ['a woodpecker is alive.', 'woodpecker', 'b

In [140]:
with open("../data/rosch1975/rosch1975.csv", "w") as f:
    writer = csv.writer(f)
    writer.writerow(["stimulus", "item", "category", "rank"])
    writer.writerows(stimuli)

In [142]:
with open("../data/rosch1975/rosch1975_features.csv", "w") as f:
    writer = csv.writer(f)
    writer.writerow(["stimulus", "item", "category", "feature"])
    writer.writerows(feature_stimuli)