## Word-Sense Disambiguation

In [None]:
from nltk.corpus import wordnet as wn
from nltk import wsd

In [None]:
X = 'The die is cast.'
Y = 'Roll the die to get a 6.'
Z = 'What is dead may never die.'

In [None]:
wn.synsets('die')

In [None]:
wn.synsets('die', pos=wn.NOUN)

In [None]:
for syn in wn.synsets('die', pos=wn.NOUN):
    print(syn.definition())

In [None]:
for syn in wn.synsets('die', pos=wn.VERB):
    print(syn.definition())

## Word-Sense Disambiguation with Lesk Algorithm

In [None]:
print(X)
wsd.lesk(X.split(), 'die')

In [None]:
_.definition()

In [None]:
wsd.lesk(X.split(), 'die', pos=wn.NOUN).definition()

In [None]:
print(Y)
wsd.lesk(Y.split(), 'die').definition()

In [None]:
wsd.lesk(Y.split(), 'die', pos=wn.NOUN).definition()

In [None]:
print(Z)
wsd.lesk(Z.split(), 'die').definition()

In [None]:
wsd.lesk(Z.split(), 'die', pos=wn.VERB).definition()

## Automatic POS Tagging + Lesk with spaCy

In [None]:
!pip install spacy

In [None]:
from spacy.cli import download
from spacy import load
# download('en_core_web_sm')
nlp = load('en_core_web_sm')

In [None]:
import warnings

POS_MAP = {
    'VERB': wn.VERB,
    'NOUN': wn.NOUN,
    'PROPN': wn.NOUN
}


def lesk(doc, word):
    found = False
    for token in doc:
        if token.text == word:
            word = token
            found = True
            break
    if not found:
        raise ValueError(f'Word \"{word}\" does not appear in the document: {doc.text}.')
    pos = POS_MAP.get(word.pos_, False)
    if not pos:
        warnings.warn(f'POS tag for {word.text} not found in wordnet. Falling back to default Lesk behaviour.')
    args = [c.text for c in doc], word.text
    kwargs = dict(pos=pos)
    return wsd.lesk(*args, **kwargs)

In [None]:
doc = nlp('Roll the die to get a 6.')

In [None]:
lesk(doc, 'die')

In [None]:
lesk(doc, 'die').definition()

In [None]:
lesk(nlp('I work at google.'), 'google').definition()

In [None]:
lesk(nlp('I will google it.'), 'google').definition()