In [1]:
from src.sensenet.sensenet.sensenet import SenseNet

In [2]:
sensenet = SenseNet.from_path('data/v0.0.1/wn_bi-camb/senset_file_bi-camb.jsonl',
                              'data/v0.0.1/wn_bi-camb/wn_bi-camb_cpae.txt',
                              'data/v0.0.1/wn_bi-camb/wn_bi-camb_cpae/model.tar.gz')

2021-12-02 15:30:08,505 - INFO - allennlp.common.plugins - Plugin allennlp_models available
2021-12-02 15:30:08,514 - INFO - allennlp.models.archival - loading archive file data/v0.0.1/wn_bi-camb/wn_bi-camb_cpae/model.tar.gz
2021-12-02 15:30:08,516 - INFO - allennlp.models.archival - extracting archive file data/v0.0.1/wn_bi-camb/wn_bi-camb_cpae/model.tar.gz to temp dir /tmp/tmpjwq3_kza
2021-12-02 15:30:08,968 - INFO - allennlp.common.params - dataset_reader.type = sense_file
2021-12-02 15:30:08,969 - INFO - allennlp.common.params - dataset_reader.max_instances = None
2021-12-02 15:30:08,970 - INFO - allennlp.common.params - dataset_reader.manual_distributed_sharding = False
2021-12-02 15:30:08,970 - INFO - allennlp.common.params - dataset_reader.manual_multiprocess_sharding = False
2021-12-02 15:30:08,971 - INFO - allennlp.common.params - dataset_reader.tokenizer = whitespace
2021-12-02 15:30:08,972 - INFO - allennlp.common.params - type = whitespace
2021-12-02 15:30:08,972 - INFO - a

# Get `Senset`s of a Word with POS Tag

In [3]:
print(sensenet.sensets('apple', 'NOUN'))

[Senset('apple.NOUN.01'), Senset('apple.NOUN.02')]


# Inspect `Senset`s

In [4]:
def show_sensets(word, pos):
    # sensets = senset_table[(word, pos)]
    sensets = sensenet.sensets(word, pos)
    for senset in sensets:
        source_to_senses = group_senses_by_source(senset.senses)
        print(senset.senset_id)
        print('-' * 50)
        for source, senses in source_to_senses.items():
            print(f'{source}:')
            for sense in senses:
                print(f'  - {sense.definition}')
            print()
        print()
    return

def group_senses_by_source(senses):
    source_to_senses = {}
    for sense in senses:
        source = sense.source
        source_to_senses.setdefault(source, [])
        source_to_senses[source].append(sense)
    return source_to_senses


In [5]:
show_sensets('apple', 'NOUN')

apple.NOUN.01
--------------------------------------------------
wordnet:
  - fruit with red or yellow or green skin and sweet to tart crisp whitish flesh

cambridge:
  - a round fruit with firm , white flesh and a green , red , or yellow skin


apple.NOUN.02
--------------------------------------------------
wordnet:
  - native Eurasian tree widely cultivated in many varieties for its firm rounded edible fruits




In [6]:
show_sensets('mitre', 'NOUN')

mitre.NOUN.01
--------------------------------------------------
wordnet:
  - joint that forms a corner ; usually both sides are bevelled at a 45 - degree angle to form a 90 - degree corner

cambridge:
  - a joint made by two pieces of wood that have both been cut at an angle of 45 ° at the joining ends


mitre.NOUN.02
--------------------------------------------------
wordnet:
  - the surface of a beveled end of a piece where a miter joint is made


mitre.NOUN.03
--------------------------------------------------
wordnet:
  - a liturgical headdress worn by bishops on formal occasions

cambridge:
  - a tall , pointed hat worn by bishops in official ceremonies




In [7]:
show_sensets('baton', 'NOUN')

baton.NOUN.01
--------------------------------------------------
wordnet:
  - a thin tapered rod used by a conductor to lead an orchestra or choir

cambridge:
  - a stick used by a conductor (= person who controls the performance of a group of musicians ) to show the speed of the music


baton.NOUN.02
--------------------------------------------------
wordnet:
  - a short stout club used primarily by policemen

cambridge:
  - a thick , heavy stick used as a weapon by police officers


baton.NOUN.03
--------------------------------------------------
wordnet:
  - a short staff carried by some officials to symbolize an office or an authority


baton.NOUN.04
--------------------------------------------------
wordnet:
  - a hollow metal rod that is wielded or twirled by a drum major or drum majorette

cambridge:
  - a hollow metal stick that a majorette or drum major turns and throws while marching


baton.NOUN.05
--------------------------------------------------
wordnet:
  - a hollow cyli

# Find Similar `Senset`s of a `Senset`

In [8]:
senset = sensenet.senset('apple.NOUN.02')
for similar_senset, similarity in senset.similar_sensets():
    print(similar_senset, similarity)

2021-12-02 15:30:42,238 - INFO - gensim.models.keyedvectors - precomputing L2-norms of word weight vectors


Senset('fig.NOUN.02') 0.7941375374794006
Senset('lemon.NOUN.03') 0.7395809888839722
Senset('radish.NOUN.04') 0.7376625537872314
Senset('turnip.NOUN.01') 0.7320808172225952
Senset('quince.NOUN.01') 0.7314552068710327
Senset('artichoke.NOUN.01') 0.7216981649398804
Senset('mango.NOUN.01') 0.7184211015701294
Senset('onion.NOUN.02') 0.7169082760810852
Senset('carrot.NOUN.02') 0.7150439023971558
Senset('spinach.NOUN.01') 0.7146469354629517


# Reverse Dictionary

In [9]:
for senset, similarity in sensenet.reverse_dictionary('bishop hat'):
    print(senset, similarity)

Senset('crosier.NOUN.01') 0.7406036853790283
Senset('cope.NOUN.02') 0.7396151423454285
Senset('mitre.NOUN.03') 0.7235138416290283
Senset('pontifical.NOUN.01') 0.7155019640922546
Senset('archbishop.NOUN.01') 0.6879871487617493
Senset('skullcap.NOUN.01') 0.6758203506469727
Senset('cathedra.NOUN.01') 0.6744264364242554
Senset('richmondena.NOUN.01') 0.6690065860748291
Senset('episcopate.NOUN.01') 0.6648440361022949
Senset('diocese.NOUN.01') 0.6644286513328552
