## Word Sense Disambiguation (WSD, 词义消歧)

In [6]:
# Word Sense Disambiguation (WSD)
from pywsd.lesk import simple_lesk
sent = 'I went to the bank to deposit my money'
ambiguous = 'bank'
answer = simple_lesk(sent, ambiguous, pos='n')
answer

Synset('depository_financial_institution.n.01')

In [8]:
simple_lesk(sent, 'bank')

Synset('depository_financial_institution.n.01')

In [2]:
answer.definition()

'a financial institution that accepts deposits and channels the money into lending activities'

In [3]:
from pywsd import disambiguate
from pywsd.similarity import max_similarity as maxsim
disambiguate('I went to the bank to deposit my money')

[('I', None),
 ('went', Synset('run_low.v.01')),
 ('to', None),
 ('the', None),
 ('bank', Synset('depository_financial_institution.n.01')),
 ('to', None),
 ('deposit', Synset('deposit.v.02')),
 ('my', None),
 ('money', Synset('money.n.03'))]

In [20]:
rs=disambiguate('I went to the bank to deposit my money')
[f"{r[0]}({r[1].name()})" for r in rs if r[1]]

['went(run_low.v.01)',
 'bank(depository_financial_institution.n.01)',
 'deposit(deposit.v.02)',
 'money(money.n.03)']

In [23]:
[(r[0], r[1].name(), r[1].definition()) for r in rs if r[1]]

[('went', 'run_low.v.01', 'to be spent or finished'),
 ('bank',
  'depository_financial_institution.n.01',
  'a financial institution that accepts deposits and channels the money into lending activities'),
 ('deposit', 'deposit.v.02', 'put into a bank account'),
 ('money',
  'money.n.03',
  'the official currency issued by a government or national bank')]

In [26]:
sents='I went to the bank to deposit my money'
extract_syn=lambda r: (r[0], r[1].name(), r[1].definition())
[extract_syn(r) for r in disambiguate(
                sents, algorithm=maxsim,
                similarity_option='wup',
                keepLemmas=False) if r[1]]

[('went',
  'travel.v.01',
  'change location; move, travel, or proceed, also metaphorically'),
 ('bank',
  'bank.n.06',
  'the funds held by a gambling house or the dealer in some gambling games'),
 ('deposit', 'deposit.v.02', 'put into a bank account'),
 ('money', 'money.n.02', 'wealth reckoned in terms of money')]

In [27]:
def extract_sents():
    rs = disambiguate(sents)
    return [extract_syn(r) for r in rs if r[1]]
extract_sents()

[('went', 'run_low.v.01', 'to be spent or finished'),
 ('bank',
  'depository_financial_institution.n.01',
  'a financial institution that accepts deposits and channels the money into lending activities'),
 ('deposit', 'deposit.v.02', 'put into a bank account'),
 ('money',
  'money.n.03',
  'the official currency issued by a government or national bank')]

In [4]:
disambiguate('I went to the bank to deposit my money', algorithm=maxsim, similarity_option='wup', keepLemmas=True)

[('I', 'i', None),
 ('went', 'go', Synset('travel.v.01')),
 ('to', 'to', None),
 ('the', 'the', None),
 ('bank', 'bank', Synset('bank.n.06')),
 ('to', 'to', None),
 ('deposit', 'deposit', Synset('deposit.v.02')),
 ('my', 'my', None),
 ('money', 'money', Synset('money.n.02'))]

In [13]:
from pywsd.lesk import simple_lesk, cosine_lesk, adapted_lesk
cosine_lesk('The sheet is twenty centimeters.', 'twenty')

Synset('twenty_dollar_bill.n.01')

In [15]:
cosine_lesk('The sheet is twenty centimeters.', 'sheet')

Synset('sheet.n.03')

In [16]:
rs=disambiguate('The sheet is twenty centimeters.')
[f"{r[0]}({r[1].name()})" for r in rs if r[1]]

['sheet(tabloid.n.02)', 'twenty(twenty.s.01)', 'centimeters(centimeter.n.01)']

In [18]:
rs=disambiguate('The sheet is twenty centimeters.', algorithm=maxsim, similarity_option='wup')
[f"{r[0]}({r[1].name()})" for r in rs if r[1]]

['sheet(sheet.n.02)', 'twenty(twenty.s.01)', 'centimeters(centimeter.n.01)']

In [19]:
rs=disambiguate('it is a dog.')
[f"{r[0]}({r[1].name()})" for r in rs if r[1]]

['dog(pawl.n.01)']

In [31]:
import requests
from pprint import pprint
response = requests.post('http://localhost:1700/en/wsd/default',
                             json={'sents': 'The sheet is twenty centimeters.'})
if response.status_code == 200:
    rs = response.json()
    pprint(rs)

[['sheet', 'tabloid.n.02', 'newspaper with half-size pages'],
 ['twenty',
  'twenty.s.01',
  'denoting a quantity consisting of 20 items or units'],
 ['centimeters',
  'centimeter.n.01',
  'a metric unit of length equal to one hundredth of a meter']]
