In [4]:
import torch

from collections import defaultdict, Counter
from semantic_memory import memory, vsm_utils

In [5]:
world = memory.Memory(
    concept_path="../data/concept_senses.csv",
    feature_path="../data/xcslb_compressed.csv",
    matrix_path="../data/concept_matrix.txt",
    feature_metadata="../data/feature_lexicon.csv",
)

world.create()

521it [00:00, 2829.98it/s]


In [6]:
world.categories['vehicle']

['bicycle',
 'buggy',
 'bus',
 'car',
 'caravan',
 'carriage',
 'cart',
 'coach',
 'glider',
 'helicopter',
 'limousine',
 'lorry',
 'moped',
 'motorcycle',
 'rocket',
 'sledge',
 'tank',
 'taxi',
 'tractor',
 'train',
 'tricycle',
 'trolley',
 'truck',
 'van',
 'wheelbarrow',
 'wheelchair',
 'airplane',
 'ambulance',
 'unicycle']

In [7]:
feature = 'can fly' # target property
n = 1 # number of negative samples

In [8]:
positives = world.feature_space[feature]['positive']

In [35]:
# sort negatives by similarity (generalized wup)
# for all negatives, get sims with rest of positive; select 
rest = world.feature_space[feature]['negative']

sims = [world.taxonomy.generalized_wup_sim(positives + [c]) for c in rest]
v, i = torch.topk(torch.tensor(sims), k = n * len(positives))
sampled = [rest[ii] for ii in i]

In [36]:
sampled

['yoyo',
 'rattle',
 'raft',
 'doll',
 'earmuffs',
 'catapult',
 'sink',
 'box',
 'dice',
 'puppet',
 'armor',
 'peeler',
 'bin',
 'chandelier',
 'basket',
 'chopping_board',
 'bag',
 'hair',
 'ball',
 'clamp',
 'harpoon',
 'table',
 'barrel',
 'basin',
 'train',
 'bathtub',
 'hoe',
 'jar',
 'strainer',
 'chest_of_drawers',
 'tray',
 'lamp',
 'mirror',
 'caterpillar',
 'club',
 'spirit_level',
 'coach',
 'drill',
 'plow',
 'dresser',
 'wallet',
 'sledge',
 'bolt',
 'bomb',
 'rake',
 'bookcase',
 'camera',
 'bottle',
 'cupboard',
 'bowl',
 'worm',
 'ladle',
 'cage',
 'rollerskate']

In [11]:
world.categories

defaultdict(None,
            {'animal': ['bat',
              'bear',
              'beaver',
              'buffalo',
              'calf',
              'camel',
              'cat',
              'cheetah',
              'chipmunk',
              'cow',
              'crocodile',
              'deer',
              'dog',
              'donkey',
              'fox',
              'frog',
              'gerbil',
              'giraffe',
              'goat',
              'gorilla',
              'guinea_pig',
              'hamster',
              'hedgehog',
              'hippo',
              'horse',
              'hyena',
              'kangaroo',
              'lamb',
              'leopard',
              'lion',
              'lizard',
              'llama',
              'monkey',
              'moose',
              'mouse',
              'panther',
              'pig',
              'platypus',
              'pony',
              'porcupine',
              'rabbit',
    

In [16]:
world.categories[world.lexicon[positives[0]].category]

['bicycle',
 'buggy',
 'bus',
 'car',
 'caravan',
 'carriage',
 'cart',
 'coach',
 'glider',
 'helicopter',
 'limousine',
 'lorry',
 'moped',
 'motorcycle',
 'rocket',
 'sledge',
 'tank',
 'taxi',
 'tractor',
 'train',
 'tricycle',
 'trolley',
 'truck',
 'van',
 'wheelbarrow',
 'wheelchair',
 'airplane',
 'ambulance',
 'unicycle']

In [29]:
positive_rest = set()
rest = (set(world.concepts) - positive_rest) - set(positives)
for c in positives:
    for cc in world.categories[world.lexicon[c].category]:
        if cc not in positives:
            positive_rest.add(cc)

In [30]:
sims = [world.taxonomy.generalized_wup_sim(positives + [c]) for c in positive_rest]

In [32]:
v, i = torch.topk(torch.tensor(sims), k = n * len(positives))
sampled = [positive_rest[ii] for ii in i]

TypeError: 'set' object is not subscriptable

In [13]:
world.taxonomy.generalized_wup_sim(positives + ['penguin'])

0.2956989247311828

In [37]:
world.vectors(positives).max(0).values.unsqueeze(0)

tensor([[0., 0., 0.,  ..., 0., 0., 0.]])

In [33]:
v, i = vsm_utils.jaccard(world.vectors(positives).max(0).values.unsqueeze(0), world.vectors(rest)).topk(len(positives))

In [34]:
v

tensor([[0.1366, 0.1335, 0.1308, 0.1307, 0.1261, 0.1250, 0.1230, 0.1215, 0.1199,
         0.1199, 0.1196, 0.1195, 0.1194, 0.1183, 0.1182, 0.1180, 0.1171, 0.1169,
         0.1169, 0.1165, 0.1160, 0.1155, 0.1133, 0.1131, 0.1127, 0.1125, 0.1119,
         0.1119, 0.1113, 0.1113, 0.1109, 0.1109, 0.1107, 0.1107, 0.1107, 0.1107,
         0.1099, 0.1095, 0.1095, 0.1093, 0.1089, 0.1088, 0.1087, 0.1083, 0.1080,
         0.1080, 0.1078, 0.1073, 0.1073, 0.1072, 0.1070, 0.1070, 0.1070, 0.1062]])

In [58]:
[rest[ii] for ii in i[0]]

['cockerel',
 'ostrich',
 'mouse',
 'penguin',
 'wolf',
 'alligator',
 'squirrel',
 'tiger',
 'hyena',
 'rat',
 'fox',
 'emu',
 'rabbit',
 'lizard',
 'lion',
 'leopard',
 'kangaroo',
 'horse',
 'crocodile',
 'frog',
 'cheetah',
 'chipmunk',
 'beaver',
 'hamster',
 'donkey',
 'raccoon',
 'seal',
 'gorilla',
 'tortoise',
 'toad',
 'bear',
 'gerbil',
 'cow',
 'iguana',
 'giraffe',
 'calf',
 'guinea_pig',
 'hippo',
 'llama',
 'platypus',
 'lamb',
 'pony',
 'monkey',
 'goat',
 'camel',
 'ox',
 'panther',
 'sheep',
 'otter',
 'elephant',
 'buffalo',
 'rhino',
 'zebra',
 'cat']

In [39]:
world.taxonomy.wup_sim('airplane', 'glider')

0.8461538461538461

In [41]:
torch.argmax(torch.tensor([0.1, 0.1, 0.1, 0.05]))

tensor(0)

In [81]:
world.features

['absorbs sweat',
 'absorbs water',
 'adds air to a mixture',
 'adds flavor to food',
 'allows movement',
 'attaches to rocks',
 'attracts bees',
 'basks in the sun',
 'beats its chest',
 'belongs to the onion family',
 'breeds rapidly',
 'bruises over time',
 'builds nests on the ground',
 'buries head in sand',
 'can absorb shocks',
 'can affect urine',
 'can ambush its prey',
 'can attached to walls',
 'can attract insects',
 'can avoid traffic jams',
 'can bark',
 'can be a bunk',
 'can be a distraction',
 'can be a pet',
 'can be a residential property',
 'can be a source of entertainment',
 'can be a trophy',
 'can be a unit of measurement',
 'can be acoustic',
 'can be added to a hot drink',
 'can be added to alcoholic drinks',
 'can be added to cake',
 'can be added to cereal',
 'can be aimed',
 'can be airborne',
 'can be analog',
 'can be attached to a DVD player',
 'can be attached to a car',
 'can be attached to a plane',
 'can be attached to a tractor',
 'can be attached t

In [101]:
n = 3
feature = 'can be found in supermarkets'
positives = world.feature_space[feature]['positive']
rest = world.feature_space[feature]['negative']

print(f'POSITIVES: {len(positives)}; NEGATIVES: {len(rest)}')

negatives = []
for concept in positives:
    space = list(set(rest) - set(negatives))
    # get nearest neighbor from rest - concept
    if len(space) > n:
        sims = world.similarity(concept, space)
        vs, idx = torch.topk(sims, n)
        sampled = [space[i] for i in idx.flatten()]
        negatives.extend(sampled)
    else:
        sims = world.similarity(concept, rest)
        vs, idx = torch.topk(sims, n)
        sampled = [rest[i] for i in idx.flatten()]
        negatives.extend(sampled)

POSITIVES: 265; NEGATIVES: 256


In [102]:
len(negatives)

795

In [94]:
len(rest), len(positives)

(256, 54)

In [59]:
world.similarity(positives[0], rest)

tensor([[0.0612, 0.0355, 0.3269, 0.0068, 0.0072, 0.0183, 0.0168, 0.0614, 0.0654,
         0.0833, 0.0157, 0.0600, 0.0161, 0.0075, 0.0504, 0.0000, 0.0381, 0.0381,
         0.0545, 0.0068, 0.0556, 0.2947, 0.0667, 0.0598, 0.0600, 0.0504, 0.0769,
         0.0227, 0.0278, 0.0210, 0.0583, 0.0084, 0.0194, 0.0632, 0.2667, 0.0182,
         0.0792, 0.0190, 0.1368, 0.0198, 0.0072, 0.2857, 0.0594, 0.1031, 0.0095,
         0.0680, 0.0800, 0.0603, 0.0536, 0.0727, 0.0700, 0.0194, 0.0000, 0.0086,
         0.0079, 0.0326, 0.0156, 0.0642, 0.0278, 0.3529, 0.0777, 0.4839, 0.0084,
         0.0000, 0.0160, 0.0690, 0.0079, 0.0276, 0.0479, 0.1275, 0.0648, 0.1042,
         0.1485, 0.0177, 0.0190, 0.4184, 0.3333, 0.0083, 0.0160, 0.3095, 0.0153,
         0.3049, 0.0583, 0.0270, 0.0541, 0.0165, 0.0163, 0.0082, 0.0556, 0.0159,
         0.1351, 0.0545, 0.0179, 0.0707, 0.0085, 0.0318, 0.0070, 0.0500, 0.0261,
         0.0357, 0.0561, 0.0081, 0.0481, 0.0000, 0.0156, 0.0588, 0.0566, 0.0180,
         0.0918, 0.0729, 0.4

In [103]:
world.verbalize('airplane', 'can fly')

'an airplane can fly.'