In [1]:
#Word Sense Disambiguation
#Lesk Algorithm
#Performs the classic Lesk algorithm for Word Sense Disambiguation (WSD) using a the definitions of the ambiguous word.
#Given an ambiguous word and the context in which the word occurs, Lesk returns a Synset with the highest number of overlapping words between the context sentence and different definitions from each Synset.

In [4]:
import nltk
nltk.download('wordnet')
from nltk.wsd import lesk
sent = ['Take', 'me', 'to', 'the', 'station', 'and', 'put', 'me', 'on', 'a', 'train', '.']

[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Unzipping corpora/wordnet.zip.


In [5]:
print(lesk(sent, 'train', 'n'))

Synset('string.n.04')


In [6]:
print(lesk(sent, 'train'))

Synset('train.v.09')


In [7]:
#The definitions for “train” are:

In [8]:
from nltk.corpus import wordnet as wn
for ss in wn.synsets('train'):
    print(ss, ss.definition())

Synset('train.n.01') public transport provided by a line of railway cars coupled together and drawn by a locomotive
Synset('string.n.04') a sequentially ordered set of things or events or ideas in which each successive member is related to the preceding
Synset('caravan.n.01') a procession (of wagons or mules or camels) traveling together in single file
Synset('train.n.04') a series of consequences wrought by an event
Synset('train.n.05') piece of cloth forming the long back section of a gown that is drawn along the floor
Synset('gearing.n.01') wheelwork consisting of a connected set of rotating gears by which force is transmitted or motion or torque is changed
Synset('train.v.01') create by training and teaching
Synset('train.v.02') undergo training or instruction in preparation for a particular role, function, or profession
Synset('discipline.v.01') develop (children's) behavior by instruction and practice; especially to teach self-control
Synset('prepare.v.05') educate for a future r

In [10]:
#Test disambiguation of POS tagged on.

In [11]:
[(s, s.pos()) for s in wn.synsets('on')]

[(Synset('on.a.01'), 'a'),
 (Synset('on.a.02'), 'a'),
 (Synset('along.r.01'), 'r'),
 (Synset('on.r.02'), 'r'),
 (Synset('on.r.03'), 'r')]

In [13]:
sent = 'Take me to the station and put me on a train'.split()
lesk(sent, 'on')

Synset('on.r.03')

In [14]:
lesk(sent, 'on', pos='r')

Synset('on.r.03')

In [15]:
#Test behavior if there is are no matching senses.

In [16]:
lesk('Take me to the station and put me on a train'.split(), 'me', synsets=[])

In [17]:
lesk(sent, 'me')

Synset('maine.n.01')