In [1]:
from collections import defaultdict, Counter
from semantic_memory import memory

import torch

In [2]:
world = memory.Memory(
    concept_path="../data/concept_senses.csv",
    feature_path="../data/xcslb_compressed.csv",
    matrix_path="../data/concept_matrix.txt",
    feature_metadata="../data/feature_lexicon.csv",
)
world.create()


521it [00:00, 2119.85it/s]


In [3]:
'''
- Select category from taxonomy
- get all possible features
- for all descendants, count features/len(descendants)
'''

'\n- Select category from taxonomy\n- get all possible features\n- for all descendants, count features/len(descendants)\n'

In [4]:
world

Semantic Memory containing 521 concepts and 3649 properties.

In [13]:
category = 'mammals'
members = world.taxonomy['vertebrate.n.01'].descendants()

# cat_feature_space = defaultdict(float)

In [14]:
ratio = world.vectors(members).sum(0)/len(members)

In [12]:
members[34]

'emu'

In [15]:
'''
given category and a threshold proportion (which dictates which features are to be chosen),
return dictionary of those features with instances and exceptions.

if threshold is 0.8, then it selects selects features that are at present in at least 80% of the descendants of the category.

{
    feature1: {
        instances: [],
        exceptions: []
    },
    feature2: {
        instances: [],
        exceptions: []
    },
    ...
}

TODO: add a way to add category lemma names as a dictionary (from the taxonomy)
'''
def get_generics(category, threshold = 0.85):
    generics = defaultdict(lambda: defaultdict(list))
    members = world.taxonomy[category].descendants()

    coverage = world.vectors(members).sum(0)/len(members)

    candidate_features = torch.bitwise_and(coverage >= threshold, coverage < 1.0).nonzero().flatten()

    subspace = world.vectors(members)[:, candidate_features]

    idx = {
        'instances': (subspace != 0.0).nonzero().tolist(),
        'exceptions': (subspace == 0.0).nonzero().tolist()
    }

    for k, v in idx.items():
        for concept, feature in v:
            feature = world.features[candidate_features[feature].item()]
            concept = members[concept]
            generics[feature][k].append(concept)

    for k, v in generics.items():
        v.default_factory = None
    generics.default_factory = None

    return generics


In [16]:
bird_generics = get_generics('bird.n.01', 0.85)

bird_generics['can fly']

defaultdict(None,
            {'instances': ['budgie',
              'parakeet',
              'buzzard',
              'falcon',
              'hawk',
              'eagle',
              'owl',
              'canary',
              'magpie',
              'raven',
              'nightingale',
              'robin',
              'starling',
              'sparrow',
              'wren',
              'chicken',
              'turkey',
              'dove',
              'pigeon',
              'partridge',
              'peacock',
              'crane',
              'flamingo',
              'heron',
              'duck',
              'goose',
              'pelican',
              'seagull',
              'swan',
              'hummingbird',
              'kingfisher',
              'woodpecker'],
             'exceptions': ['cockerel', 'penguin', 'emu', 'ostrich']})

In [18]:
world.feature_lexicon['can be airborne']

Feature(feature='can be airborne', feature_type='encyclopedic', negation='cannot be airborne')

In [17]:
selected_features = torch.bitwise_and(ratio >= 0.85, ratio < 1.0).nonzero().flatten()
selected_features

tensor([ 976, 1565, 1567, 1710, 1901])

In [24]:
world.vectors(members)[:, selected_features]

torch.Size([111, 5])

In [29]:
for (k, v) in (world.vectors(members)[:, selected_features] == 0).nonzero().tolist():
    print(members[k], world.features[selected_features[v].item()])

dolphin has a tongue
dolphin has forelimbs
whale has a nose
whale has forelimbs
turtle has a nose
carp can make sounds
carp has a neck
carp has a nose
carp has a tongue
carp has forelimbs
goldfish can make sounds
goldfish has a neck
goldfish has a nose
goldfish has a tongue
goldfish has forelimbs
minnow can make sounds
minnow has a neck
minnow has a nose
minnow has a tongue
minnow has forelimbs
cod can make sounds
cod has a neck
cod has a nose
cod has a tongue
cod has forelimbs
herring can make sounds
herring has a neck
herring has a nose
herring has a tongue
herring has forelimbs
sardine can make sounds
sardine has a neck
sardine has a nose
sardine has a tongue
sardine has forelimbs
salmon can make sounds
salmon has a neck
salmon has a nose
salmon has a tongue
salmon has forelimbs
trout can make sounds
trout has a neck
trout has a nose
trout has a tongue
trout has forelimbs
eel can make sounds
eel has a neck
eel has a nose
eel has a tongue
eel has forelimbs
flounder can make sounds
fl

In [19]:
feature = world.features[976]
print(feature)
print('----')
for m in members:
    if feature not in world.concept_features[m]:
        print(m)
        

can make sounds
----
carp
goldfish
minnow
cod
herring
sardine
salmon
trout
eel
flounder
mackerel
tuna
seahorse
shark


In [57]:
world.taxonomy['aquatic_bird.n.01'].descendants()

['crane',
 'flamingo',
 'heron',
 'duck',
 'goose',
 'pelican',
 'penguin',
 'seagull',
 'swan']

is semi aquatic
----
seagull


In [83]:
(world.vectors.embeddings[0] == world.vectors('accordion')).sum()

tensor(3649)

In [81]:
world.taxonomy['living_thing.n.01'].descendants()

['bat',
 'bear',
 'cat',
 'cheetah',
 'leopard',
 'lion',
 'panther',
 'tiger',
 'dog',
 'fox',
 'hyena',
 'wolf',
 'raccoon',
 'skunk',
 'otter',
 'beaver',
 'chipmunk',
 'squirrel',
 'gerbil',
 'guinea_pig',
 'hamster',
 'mouse',
 'porcupine',
 'rat',
 'buffalo',
 'cow',
 'ox',
 'goat',
 'sheep',
 'deer',
 'moose',
 'giraffe',
 'camel',
 'hippo',
 'llama',
 'pig',
 'donkey',
 'horse',
 'pony',
 'zebra',
 'rhino',
 'gorilla',
 'monkey',
 'hedgehog',
 'rabbit',
 'seal',
 'walrus',
 'dolphin',
 'whale',
 'elephant',
 'kangaroo',
 'platypus',
 'crocodile',
 'alligator',
 'lizard',
 'iguana',
 'rattlesnake',
 'tortoise',
 'turtle',
 'frog',
 'toad',
 'budgie',
 'parakeet',
 'buzzard',
 'falcon',
 'hawk',
 'eagle',
 'owl',
 'canary',
 'magpie',
 'raven',
 'nightingale',
 'robin',
 'starling',
 'sparrow',
 'wren',
 'chicken',
 'cockerel',
 'turkey',
 'dove',
 'pigeon',
 'partridge',
 'peacock',
 'crane',
 'flamingo',
 'heron',
 'duck',
 'goose',
 'pelican',
 'penguin',
 'seagull',
 'swan',


In [23]:
world.taxonomy['animal.n.01']

Node animal.n.01
Parent:organism.n.01
Children: ['chordate.n.01', 'young.n.01', 'invertebrate.n.01', 'larva.n.01']