In [65]:
import csv
import itertools

from collections import defaultdict
from tqdm import tqdm

In [2]:
concepts = []
features = []
concept_features = defaultdict(list)
categories = {}

with open('../data/xcslb_compressed.csv', 'r') as f:
    reader = csv.DictReader(f)
    
    for line in reader:
        concepts.append(line['concept'])
        features.append(line['feature'])
        categories[line['concept']] = line['category']
        
        concept_features[line['concept']].append(line['feature'])
        
concepts = list(set(concepts))
features = list(set(features))

In [3]:
feature_space = defaultdict(lambda: defaultdict(list))

for c, f in tqdm(itertools.product(concepts, features), total = len(concepts) * len(features)):
    if f in concept_features[c]:
        feature_space[f]['positive'].append(c)
    else:
        feature_space[f]['negative'].append(c)

100%|██████████| 1901650/1901650 [00:01<00:00, 952017.89it/s]


In [15]:
'''
Pseudo-code for inverse-scaling:

standard template,
{premise: the property "<prop>" is {flipped/unflipped}
examples/shots: 4-shot} * n
query

sample 2m properties (constraint, at least k positive concepts): m aligned and m flipped. 
Additionally, sample 1 query property (no constraint)

randomize order
premise
sample k positive and negative concepts from each
'''

'\nPseudo-code for inverse-scaling:\n\nstandard template,\n{premise: the property "<prop>" is {flipped/unflipped}\nexamples/shots: 4-shot} * n\nquery\n\nsample 2m properties (constraint, at least k concepts): m aligned and m flipped. and 1 query (?)\n\nrandomize order\npremise\nsample k concepts from each\n'

In [66]:
import world

In [67]:
world = world.World()
world.create()

521it [00:00, 3059.95it/s]


## Property Filtering

* only animals
* have at least 5 positive and negative members

In [68]:
animals = world.taxonomy['animal.n.01'].descendants()

In [69]:
animal_props = []

for c, props in world.concept_features.items():
    if c in animals:
        animal_props.extend(props)
        
animal_props = list(set(animal_props))

In [70]:
candidates = []
for prop, members in world.feature_space.items():
#     if prop in animal_props:
    c_p = len([c for c in members['positive']])
    c_n = len([c for c in members['negative']])

    if c_p >=5 and c_n >= 5:
        candidates.append((prop, c_p))

In [71]:
candidates

[('has a trigger', 7),
 ('is a condiment', 8),
 ('is a chair', 6),
 ('can be unhealthy', 22),
 ('has no lid', 5),
 ('is fastened at the front', 5),
 ('can roll', 14),
 ('cannot fly', 6),
 ('can provide warmth', 17),
 ('has a mouthpiece', 9),
 ('can peck', 36),
 ('uses electricity', 15),
 ('has fibrous skin', 9),
 ('is made of skin', 9),
 ('can drag its prey', 10),
 ('has a steering wheel', 14),
 ('is worn on head', 5),
 ('is a flower', 15),
 ('can fit in the pocket', 8),
 ('is planted', 24),
 ('can contain chocolate', 5),
 ('can be found on farms', 17),
 ('can be moored', 14),
 ('can billow', 9),
 ('can kill people', 29),
 ('is luxurious', 5),
 ('can contain fruit flavor', 6),
 ('has a fishy smell', 30),
 ('is used while eating', 7),
 ('can be sweet in taste', 49),
 ('has a long body', 10),
 ('can be eaten raw', 42),
 ('is used for touching and feeling', 6),
 ('is spherical in shape', 18),
 ('can be used to kill animals', 5),
 ('is fragile and delicate', 16),
 ('can have padding', 7),


In [64]:
properties = ['can fly', 
              'is aquatic', 
              'has fangs', 
              'has ears', 
              'can swim',
              'is a musical instrument',
              'has wings', 
              'has fins',
              'is a poultry bird',
              'has two legs',
              'is a herbivore',
              'is a carnivore',
              'has ears',
              'has fur',
              'uses electricity',
              'has talons', 
              'is a predator', 
              'is cold blooded', 
              'is warm blooded', 
              'has feet',
              'is a mammal',
              'is a fish',
              'can breathe',
              'produces wool',
              'has a beak',
              'is an invertebrate',
              'is a reptile',
              'can lay eggs',
              'has feathers',
              'is a songbird',
              'has sails',
              'can be found in seas',
              'can spin webs',
              'is a fruit',
              'is a vegetable',
              'can kill people',
              'hunts in packs',
              'is a machine',
              'is planted',
              'has a tail',
              'is a flower',
              'is a weapon',
              'has wheels',
              'is domesticated',
              'can breathe',
              'is a farm animal',
              'can heat',
              'can be sweet in taste',
              'is feline',
              'can make music',
              'is an animal',
              'has striped patterns']

properties = list(set(properties))

print(len(properties))

50


In [24]:
for f in world.features:
    print(f)

is worn for swimming
can be button shaped
has a trigger
can be grilled
is used in fish and chips
can shoot spikes
can contain cloth material
has six strings
has a floor
is not a horse
likes mirrors
reflects light
is held sideways
can have a bin bag
is a sink
is a condiment
can have yellow skin
is mulled
can cut down trees
stores nuts for winter
can store crockery
is a chair
can be unhealthy
has footboard
has a cap
has a cockpit
can flick out
uses ink
has no lid
is an ornamental fish
can squirt ink
has a firm flesh
comes from cows
can be fructose
can go through corks
uses its tail to balance
is an aircraft
is fastened at the front
wakes up early
can come in granules
is put into bouquets
can be connected to an amplifier
has an overflow channel
has a barrow
can have a lever
is made of muscle
is ground
produces catkin flowers
can roll
can be covered in chocolate
has stitching
cannot fly
has a long blade
can provide warmth
mimics human speech
is pink in color
has a mouthpiece
can be worn wi

is used to conceal
can be decaffeinated
can be operated by pirates
is eaten as beef
is used to make lemonade
eats scraps
can live long
is an explosive
is a big fruit
can be used for announcements
can eat
can eat animals
has a windscreen
used to have antennas
has knobs
is soluble
has a long tail
is used by artists and sculptors
has a cargo hold
is used by paramedics
has a hard seed
can keep rabbits
eats aphids
is not a finger
is frozen
has white meat
can unscrew
has a red crest
can hurt people
has a mouth
has big front teeth
can contain metal
has a big seed
is used with flour
is paired with a shield
has bitter skin
is crunchy
can have contact lenses
can have wire mesh
is cup shaped
has folded skin
can be swung
has leather straps
can be hit with a hammer
has an optic disc
can be toasted
can damage crops
is attached to a gun
has strong teeth
rinses water
is semicircular in shape
can transport people
can have a orange belly
can have scratches
can be used for gardening
is the size of one's 

has a rotating chamber
has a bulbous end
is quick to cook
is tropical
has a plug
is used at tea-time
can have kittens
can be found on trees
is a household item
has a retina
does drag along the ground
has sharp spines
can be squashy
has loud sirens
can float
can flatten
has a chin strap
is dark red in color
is kept in vinegar
can be violent
can wallow
disperses seeds in the wind
has a jacket
contains oil and fat
is eaten by Inuits and Eskimos
has a harsh sound
contains gunpowder
can be readymade
has a loud cry
has a part for thumb
can contain pasta or noodles
is peaty
can be yellow in color
has dials
can work in teams
is used for smelling
is found in urban areas
has moist skin
has a disc shape
contains a thigh
has white stripes
can be used to eat food
has small horns
is eaten with a spoon
is kept in a cage
is elliptical in shape
can be tailored
can be worn
cannot have metal placed in it
has velcro
is composed of florets
has a hole
can breathe
has short hair
has striped patterns
is a dri

In [72]:
world.feature_space['is domesticated']['positive']

['guinea_pig',
 'pigeon',
 'duck',
 'horse',
 'rat',
 'goose',
 'lamb',
 'goat',
 'chicken',
 'turkey',
 'cow',
 'sheep',
 'buffalo',
 'llama',
 'canary',
 'mouse',
 'calf',
 'cat',
 'camel',
 'dove',
 'budgie',
 'rabbit',
 'goldfish',
 'dog',
 'donkey',
 'pig']

In [62]:
world.concept_features['guitar']

{'can be a source of entertainment',
 'can be acoustic',
 'can be bass',
 'can be connected to an amplifier',
 'can be loud',
 'can be made of metal',
 'can be made of plastic',
 'can be made of wood',
 'can be played',
 'can be plucked',
 'can be strummed',
 'can have metal strings',
 'can have straps',
 'can make music',
 'can make sounds',
 'can play different notes',
 'can produce classical music',
 'can require cleaning',
 'can require electricity',
 'can resonate',
 'has a bridge',
 'has a fretboard',
 'has a handle',
 'has a hard surface',
 'has a hole',
 'has a hollow body',
 'has a long neck',
 'has a neck',
 'has knobs',
 'has six strings',
 'has strings',
 'has tuning pegs',
 'is a musical instrument',
 'is a string instrument',
 'is curved in shape',
 'is handheld',
 'is held in hand',
 'is manmade',
 'is played by music bands',
 'is played by musicians',
 'is played with a pick',
 'is played with fingers',
 'is played with hands',
 'is portable',
 'is used in jazz music',


In [28]:
import random
random.seed(42)

In [29]:
pair = random.sample(candidates, 2)