In [41]:
import json
from nltk.corpus import wordnet as wn

### Quick WordNet Intro

##### Wordnet is composed of so called Synsets

##### Use wn.synset(WORD) to find potential synsets

In [2]:
wn.synsets('dog')

[Synset('dog.n.01'),
 Synset('frump.n.01'),
 Synset('dog.n.03'),
 Synset('cad.n.01'),
 Synset('frank.n.02'),
 Synset('pawl.n.01'),
 Synset('andiron.n.01'),
 Synset('chase.v.01')]

##### Or if you know the synset use wn.synset(SYNSET_ID)

In [60]:
synset = wn.synset('dog.n.01')

##### Synsets have a number of attributes. The most important ones are the name and definition

In [39]:
print("Synset: {}".format(synset))
print("Name: {}".format(synset.name()))
print("Definition: {}".format(synset.definition()))
print("Examples: {}".format(synset.examples()))

Synset: Synset('dog.n.01')
Name: dog.n.01
Definition: a member of the genus Canis (probably descended from the common wolf) that has been domesticated by man since prehistoric times; occurs in many breeds
Examples: ['the dog barked all night']


##### All synsets in WordNet lie on a directed acyclic graph.
##### The words higher up and below a synset in the grapoh are the hypernyms and hyponyms of that synset

In [61]:
print("Hypernyms: {}".format(synset.hypernyms()))
print("Hyponyms: {}".format(synset.hyponyms()))

Hypernyms: [Synset('canine.n.02'), Synset('domestic_animal.n.01')]
Hyponyms: [Synset('basenji.n.01'), Synset('corgi.n.01'), Synset('cur.n.01'), Synset('dalmatian.n.02'), Synset('great_pyrenees.n.01'), Synset('griffon.n.02'), Synset('hunting_dog.n.01'), Synset('lapdog.n.01'), Synset('leonberg.n.01'), Synset('mexican_hairless.n.01'), Synset('newfoundland.n.01'), Synset('pooch.n.01'), Synset('poodle.n.01'), Synset('pug.n.01'), Synset('puppy.n.01'), Synset('spitz.n.01'), Synset('toy_dog.n.01'), Synset('working_dog.n.01')]


##### To find all elements below a certain synset we have to scan the hyponyms of that word recursively

In [55]:
def get_hyponyms_recursive(synset):
    hyponyms = synset.hyponyms()
    for s in synset.hyponyms():
        hyp = get_hyponym_recursive(s)
        hyponyms = hyponyms + hyp
    return hyponyms

In [62]:
print("All hyponyms: {}".format(get_hyponyms_recursive(synset)))

All hyponyms: [Synset('basenji.n.01'), Synset('corgi.n.01'), Synset('cur.n.01'), Synset('dalmatian.n.02'), Synset('great_pyrenees.n.01'), Synset('griffon.n.02'), Synset('hunting_dog.n.01'), Synset('lapdog.n.01'), Synset('leonberg.n.01'), Synset('mexican_hairless.n.01'), Synset('newfoundland.n.01'), Synset('pooch.n.01'), Synset('poodle.n.01'), Synset('pug.n.01'), Synset('puppy.n.01'), Synset('spitz.n.01'), Synset('toy_dog.n.01'), Synset('working_dog.n.01'), Synset('cardigan.n.02'), Synset('pembroke.n.01'), Synset('feist.n.01'), Synset('pariah_dog.n.01'), Synset('liver-spotted_dalmatian.n.01'), Synset('brabancon_griffon.n.01'), Synset('courser.n.03'), Synset('dachshund.n.01'), Synset('hound.n.01'), Synset('rhodesian_ridgeback.n.01'), Synset('sporting_dog.n.01'), Synset('terrier.n.01'), Synset('sausage_dog.n.01'), Synset('afghan_hound.n.01'), Synset('basset.n.01'), Synset('beagle.n.01'), Synset('bloodhound.n.01'), Synset('bluetick.n.01'), Synset('boarhound.n.01'), Synset('coonhound.n.01')

### Example: Use hyponyms and synsets to match coco and lvis categories

You need to download the coco and lvis annotations for this test. It is expected that they are in a folder named datasets outside of the rvb_devkit directory.

In [43]:
coco_data = json.load(open('../../datasets/coco/annotations/instances_val2017.json', 'r'))

In [44]:
lvis_data = json.load(open('../../datasets/lvis/annotations/lvis_v0.5_val.json', 'r'))

In [45]:
coco_to_synset = json.load(open('coco_to_synset.json', 'r'))

In [46]:
coco_cats = coco_data['categories']

In [47]:
lvis_cats = lvis_data['categories']

In [48]:
# map coco ids to synsets
coco_id_to_synset = {coco_to_synset[key]['coco_cat_id']: coco_to_synset[key]['synset'] for key in coco_to_synset}

In [49]:
# add synsets to coco cats and remove unused cats
coco_cats_w_synset = []
for cat in coco_cats:
    if cat['id'] in coco_id_to_synset.keys():
        cat['synset'] = coco_id_to_synset[cat['id']]
        coco_cats_w_synset.append(cat)

In [50]:
# get lvis synset to id dict
lvis_synset_to_id = {cat['synset']: cat['id'] for cat in lvis_cats}

In [56]:
# create mapping
coco_to_lvis = {}
for cat in coco_cats_w_synset:
    if cat['synset'] == 'stop_sign.n.01':
        hyponym_names = []
    else:
        synset = wn.synset(cat['synset'])
        all_hyponyms = get_hyponyms_recursive(synset)
        all_hyponym_names = [h.name() for h in all_hyponyms]
    coco_to_lvis[cat['id']] = [lvis_synset_to_id[x] for x in lvis_synset_to_id if (x in all_hyponym_names or x == cat['synset'])]
    # print categories without a match
    if coco_to_lvis[cat['id']] == []:
        print(cat)

{'supercategory': 'food', 'id': 58, 'name': 'hot dog', 'synset': 'frank.n.02'}
{'supercategory': 'appliance', 'id': 79, 'name': 'oven', 'synset': 'oven.n.01'}


In [53]:
# show complete results
coco_to_lvis

{1: [805],
 2: [95],
 3: [9, 180, 211, 303, 340, 604, 658, 702, 876],
 4: [714, 1133],
 5: [4, 443, 607, 947],
 6: [174, 936],
 7: [165, 1135],
 8: [448, 490, 702, 813, 841, 1127, 1134, 1144],
 9: [52, 118, 204, 370, 441, 512, 585, 611, 713, 901, 1191],
 10: [1132],
 11: [452],
 13: [1038],
 14: [779],
 15: [91, 808],
 16: [99,
  247,
  285,
  336,
  393,
  407,
  408,
  415,
  435,
  460,
  513,
  532,
  569,
  586,
  750,
  753,
  775,
  780,
  792,
  798,
  815,
  866,
  945,
  1149,
  1170],
 17: [229, 320],
 18: [163, 250, 358, 382, 867, 871, 962],
 19: [579, 844],
 20: [108, 885, 961],
 21: [81],
 22: [429, 677],
 23: [78, 528, 588],
 24: [1229],
 25: [502],
 27: [34],
 28: [1155],
 31: [35, 280, 971],
 32: [122, 138, 729],
 33: [36],
 34: [480],
 35: [982, 1192],
 36: [993],
 37: [41, 72],
 38: [621],
 39: [58],
 40: [60],
 41: [980],
 42: [1057],
 43: [1099],
 44: [84, 133, 206, 1102, 1184, 1189, 1213],
 46: [468, 1215],
 47: [347, 381, 1088],
 48: [475],
 49: [176, 625, 838, 1