In [70]:
import csv
import itertools
import pathlib

from semantic_memory import memory
from collections import defaultdict

In [10]:
MEM_PATH = "../data/comps-world"

# load semantic memory
mem = memory.Memory(
    concept_path=f"{MEM_PATH}/concept_senses.csv",
    feature_path=f"{MEM_PATH}/xcslb_compressed.csv",
    matrix_path=f"{MEM_PATH}/concept_matrix.txt",
    feature_metadata=f"{MEM_PATH}/feature_lexicon.csv",
)
mem.create()

lexicon = defaultdict(memory.Concept)
senses = defaultdict(str)
with open("../data/comps-world/categories.csv", "r") as f:
    entries = csv.DictReader(f)
    for line in entries:
        lexicon[line["concept"]] = memory.Concept(
            concept=line["concept"],
            category=line["category"],
            sense=line["node-value"],
            article=line["article"],
            singular=line["singular"],
            plural=line["plural"],
            generic=line["generic"],
        )
        senses[line["concept"]] = line["node-value"]
    for c, entry in mem.lexicon.items():
        lexicon[c] = entry
        # senses[entry.sense] = c
        senses[c] = entry.sense

lexicon = dict(lexicon)
senses = dict(senses)

521it [00:00, 4890.86it/s]


In [23]:
CATEGORIES = ['animal', 'vertebrate', 'invertebrate', 'mammal', 'bird', 'reptile', 'amphibian', 'fish', 'arthropod', 'insect']
CATEGORIES += ['bird of prey', 'aquatic bird', 'canine', 'feline', 'primate', 'aquatic mammal', 'rodent']
CATEGORIES += mem.taxonomy['vertebrate.n.01'].leaf_values()
CATEGORIES += mem.taxonomy['invertebrate.n.01'].leaf_values()

In [37]:
for c in CATEGORIES:
    if c not in lexicon:
        print(c)

big_concept_list = [lexicon[c] for c in CATEGORIES]

In [66]:
def is_a(c1, c2):
    """c1 is a c2"""
    if c1.generic == "s":
        concept1 = c1.article
        if c2.concept in ("bird of prey", "aquatic bird", "aquatic mammal"):
            taxonomic_phrase = "is"
            concept2 = c2.article
        else:
            taxonomic_phrase = "is a type of"
            concept2 = c2.singular
    else:
        concept1 = c1.plural
        if c2.concept in ("bird of prey", "aquatic bird", "aquatic mammal"):
            taxonomic_phrase = "are"
            concept2 = c2.plural
        else:
            taxonomic_phrase = "are a type of"
            concept2 = c2.singular

    return f"{concept1} {taxonomic_phrase} {concept2}"

In [67]:
# is_a(lexicon['broccoli'], lexicon['animal'])
is_a(lexicon['eagle'], lexicon['bird']) # not good

'an eagle is a type of bird'

In [68]:
pairwise = [] # child, parent, statement
for c1, c2 in itertools.product(big_concept_list, big_concept_list):
    if c1.concept != "animal":
        pairwise.append((c1.concept, c2.concept, is_a(c1, c2)))

In [74]:
pathlib.Path("../data/tsv/").mkdir(parents=True, exist_ok=True)

In [75]:
"""Answer the question. Is it true that {}? Answer with Yes or No:"""

with open("../data/tsv/stimuli.csv", "w") as f:
    writer = csv.writer(f)
    writer.writerow(["concept1", "concept2", "question"])
    for c1, c2, isa_statement in pairwise:
        writer.writerow((c1, c2, f"Answer the question. Is it true that {isa_statement}? Answer with Yes or No:"))