In [1]:
from nltk import word_tokenize
from gensim.models import KeyedVectors
import numpy as np
from pprint import pprint
from cpae.predictors import CpaeEmbedder

In [2]:
def_embeds = KeyedVectors.load_word2vec_format(
    'embeddings/wn_camb_cpae.txt',
    binary=False
)

cpae = CpaeEmbedder.from_path(
    'trained_models/wn_camb_cpae/model.tar.gz',
    'cpae_embedder',
)

2021-11-24 08:22:52,602 - INFO - allennlp.common.plugins - Plugin allennlp_models available
2021-11-24 08:22:52,610 - INFO - allennlp.models.archival - loading archive file trained_models/wn_camb_cpae/model.tar.gz
2021-11-24 08:22:52,611 - INFO - allennlp.models.archival - extracting archive file trained_models/wn_camb_cpae/model.tar.gz to temp dir /tmp/tmpdq72w4a7
2021-11-24 08:22:53,084 - INFO - allennlp.common.params - dataset_reader.type = sense_file
2021-11-24 08:22:53,085 - INFO - allennlp.common.params - dataset_reader.max_instances = None
2021-11-24 08:22:53,086 - INFO - allennlp.common.params - dataset_reader.manual_distributed_sharding = False
2021-11-24 08:22:53,087 - INFO - allennlp.common.params - dataset_reader.manual_multiprocess_sharding = False
2021-11-24 08:22:53,088 - INFO - allennlp.common.params - dataset_reader.tokenizer = whitespace
2021-11-24 08:22:53,089 - INFO - allennlp.common.params - type = whitespace
2021-11-24 08:22:53,090 - INFO - allennlp.common.params 

In [3]:
# we need to tokenize the defintion before we transform it.
definition = ' '.join(word_tokenize('a round fruit with firm, white flesh and a green, red, or yellow skin'))

inputs = {
    'word': None,
    'definition': definition
}

apple_embeds = cpae.embed_inputs(inputs)

pprint(def_embeds.similar_by_vector(np.array(apple_embeds), 20))

2021-11-22 18:43:08,334 - INFO - gensim.models.keyedvectors - precomputing L2-norms of word weight vectors


[('apple.NOUN.camb.01', 1.0),
 ('cantaloupe.NOUN.camb.02', 0.9209263324737549),
 ('apple.NOUN.camb.02', 0.8969683051109314),
 ('swede.NOUN.camb.01', 0.8939467668533325),
 ('rutabaga.NOUN.camb.01', 0.8939467668533325),
 ('melon.NOUN.camb.01', 0.8920690417289734),
 ('apple.NOUN.wn.01', 0.8886181116104126),
 ('anjou.NOUN.wn.02', 0.8873958587646484),
 ('cantaloupe.NOUN.camb.01', 0.8848063945770264),
 ('pumpkin.NOUN.camb.01', 0.883672297000885),
 ('pumpkin.NOUN.camb.02', 0.8808857202529907),
 ('guava.NOUN.camb.02', 0.8697496652603149),
 ('avocado.NOUN.camb.01', 0.8695813417434692),
 ('honeydew.NOUN.camb.02', 0.86556476354599),
 ('kiwi.NOUN.camb.01', 0.8652853965759277),
 ('peach.NOUN.camb.04', 0.8636364936828613),
 ('cocozelle.NOUN.wn.01', 0.859653115272522),
 ('mango.NOUN.camb.01', 0.8568488359451294),
 ('pomelo.NOUN.camb.01', 0.8556997776031494),
 ('marrow.NOUN.camb.02', 0.8552496433258057)]


In [4]:
import jsonlines

with jsonlines.open('../../../sense_file.jsonl') as f:
    sense_file = {row['sense_id']: row for row in f}

def inspect_result(definition, word, pos, topn=10):
    for sense_id, sim in reverse_dict(definition, topn, word, pos):
        sense = sense_file[sense_id]
        print(f'*id*: {sense_id}')
        print(f'*source*: {sense["source"]}')
        print(f'*definition*: {sense["definition"]}')
        print(f'*similarity*: {round(sim, 2)}')
        print('\n' + '=' * 50 + '\n')

def reverse_dict(definition, topn=10, word=None, pos=None):
    definition = ' '.join(word_tokenize(definition))
    embeds = embed_def(definition)
    if word is None and pos is None:
        return def_embeds.similar_by_vector(embeds, topn)
    results = []
    for sense_id, sim in def_embeds.similar_by_vector(
        embeds, len(def_embeds.vocab)):
        _word, _pos, *_ = sense_id.rsplit('.', 3)
        if (word is None or _word == word) and (pos is None or _pos == pos):
            results.append((sense_id, sim))
    return results[:topn]

def embed_def(definition):
    inputs = {
        'word': None,
        'definition': definition
    }
    embeds = cpae.embed_inputs(inputs)
    return np.array(embeds)
        

In [5]:
inspect_result('a round fruit with firm, white flesh and a green, red, or yellow skin',
             word='apple', pos='NOUN')

*id*: apple.NOUN.camb.01
*source*: cambridge
*definition*: a round fruit with firm , white flesh and a green , red , or yellow skin
*similarity*: 1.0


*id*: apple.NOUN.camb.02
*source*: cambridge
*definition*: a round , edible fruit having a red , green , or yellow skin , or the tree on which it grows
*similarity*: 0.9


*id*: apple.NOUN.wn.01
*source*: wordnet
*definition*: fruit with red or yellow or green skin and sweet to tart crisp whitish flesh
*similarity*: 0.89


*id*: apple.NOUN.wn.02
*source*: wordnet
*definition*: native Eurasian tree widely cultivated in many varieties for its firm rounded edible fruits
*similarity*: 0.67




In [6]:
inspect_result('a tall, pointed hat worn by bishops in official ceremonies',
             word='mitre', pos='NOUN')

*id*: mitre.NOUN.camb.01
*source*: cambridge
*definition*: a tall , pointed hat worn by bishops in official ceremonies
*similarity*: 1.0


*id*: mitre.NOUN.wn.03
*source*: wordnet
*definition*: a liturgical headdress worn by bishops on formal occasions
*similarity*: 0.72


*id*: mitre.NOUN.wn.02
*source*: wordnet
*definition*: the surface of a beveled end of a piece where a miter joint is made
*similarity*: 0.46


*id*: mitre.NOUN.camb.02
*source*: cambridge
*definition*: a joint made by two pieces of wood that have both been cut at an angle of 45 ° at the joining ends
*similarity*: 0.22


*id*: mitre.NOUN.wn.01
*source*: wordnet
*definition*: joint that forms a corner ; usually both sides are bevelled at a 45 - degree angle to form a 90 - degree corner
*similarity*: 0.15




In [7]:
inspect_result('time during which some action is awaited',
             word='wait', pos='NOUN')

*id*: wait.NOUN.wn.01
*source*: wordnet
*definition*: time during which some action is awaited
*similarity*: 1.0


*id*: wait.NOUN.camb.01
*source*: cambridge
*definition*: a period of time when you stay in one place until someone comes , or something happens , or until you can do something
*similarity*: 0.46


*id*: wait.NOUN.wn.02
*source*: wordnet
*definition*: the act of waiting ( remaining inactive in one place while expecting something )
*similarity*: 0.36




In [8]:
inspect_result('In gambling , the bank is money that belongs to the owner and can be won by the players.',
             word='bank', pos='NOUN')

*id*: bank.NOUN.camb.02
*source*: cambridge
*definition*: In gambling , the bank is money that belongs to the owner and can be won by the players .
*similarity*: 1.0


*id*: bank.NOUN.camb.09
*source*: cambridge
*definition*: In a casino , the bank is money that is used to pay the players who win .
*similarity*: 0.81


*id*: bank.NOUN.wn.06
*source*: wordnet
*definition*: the funds held by a gambling house or the dealer in some gambling games
*similarity*: 0.59


*id*: bank.NOUN.camb.13
*source*: cambridge
*definition*: an organization where people and businesses can keep , invest , or borrow money , exchange currencies , etc . , or a building where these services are offered
*similarity*: 0.48


*id*: bank.NOUN.camb.12
*source*: cambridge
*definition*: A bank is also a row of similar objects
*similarity*: 0.48


*id*: bank.NOUN.camb.07
*source*: cambridge
*definition*: an organization that holds money belonging to others , investing and lending it to get more money , or the building i

In [10]:
inspect_result('to not notice someone or something',
             word='miss', pos='VERB')

*id*: miss.VERB.camb.05
*source*: cambridge
*definition*: to not notice someone or something
*similarity*: 1.0


*id*: miss.VERB.camb.04
*source*: cambridge
*definition*: to not see or hear something or someone
*similarity*: 0.76


*id*: miss.VERB.camb.08
*source*: cambridge
*definition*: to notice that something is lost or absent
*similarity*: 0.67


*id*: miss.VERB.camb.03
*source*: cambridge
*definition*: to not go to something
*similarity*: 0.66


*id*: miss.VERB.camb.11
*source*: cambridge
*definition*: to feel sad because you can not see a person or place or do something
*similarity*: 0.63


*id*: miss.VERB.wn.01
*source*: wordnet
*definition*: fail to perceive or to catch with the senses or the mind
*similarity*: 0.59


*id*: miss.VERB.camb.06
*source*: cambridge
*definition*: to feel sad that a person or thing is not present
*similarity*: 0.56


*id*: miss.VERB.wn.06
*source*: wordnet
*definition*: be without
*similarity*: 0.55


*id*: miss.VERB.wn.02
*source*: wordnet
*definit

In [9]:
inspect_result('bishop hat', None, None)

*id*: cope.NOUN.wn.02
*source*: wordnet
*definition*: a long cloak ; worn by a priest or bishop on ceremonial occasions
*similarity*: 0.75


*id*: mitre.NOUN.camb.01
*source*: cambridge
*definition*: a tall , pointed hat worn by bishops in official ceremonies
*similarity*: 0.73


*id*: pontifical.NOUN.wn.01
*source*: wordnet
*definition*: the vestments and other insignia of a pontiff ( especially a bishop )
*similarity*: 0.72


*id*: skullcap.NOUN.camb.01
*source*: cambridge
*definition*: a small , round hat that fits closely on the top of the head , worn especially by religious Jewish men or Roman Catholic priests of high rank
*similarity*: 0.7


*id*: coadjutor.NOUN.wn.01
*source*: wordnet
*definition*: an assistant to a bishop
*similarity*: 0.7


*id*: miter.VERB.wn.02
*source*: wordnet
*definition*: confer a miter on ( a bishop )
*similarity*: 0.69


*id*: miter.NOUN.wn.03
*source*: wordnet
*definition*: a liturgical headdress worn by bishops on formal occasions
*similarity*: 0.69
