# Word representation using NLTK

https://www.nltk.org/howto/wordnet.html

## I. WordNet

In [1]:
from nltk.corpus import wordnet as wn

wn

<WordNetCorpusReader in '/home/kariminf/Data/NLTK/corpora/wordnet'>

### I.1. Synsets

In [2]:
wn.synsets('fish')

[Synset('fish.n.01'),
 Synset('fish.n.02'),
 Synset('pisces.n.02'),
 Synset('pisces.n.01'),
 Synset('fish.v.01'),
 Synset('fish.v.02')]

In [3]:
wn.synsets('fish', pos=wn.VERB)

[Synset('fish.v.01'), Synset('fish.v.02')]

In [4]:
wn.synset('fish.v.01')

Synset('fish.v.01')

In [5]:
from nltk.corpus.reader.wordnet import WordNetError
try:
    wn.synset('blablabla.v.01')
except WordNetError as e:
    print('Key not found')

Key not found


### I.2. SynSet properties and relations

In [6]:
dog = wn.synset('dog.n.01')

dog.definition()

'a member of the genus Canis (probably descended from the common wolf) that has been domesticated by man since prehistoric times; occurs in many breeds'

In [7]:
dog.pos()

'n'

In [8]:
dog.offset()

2084071

In [9]:
dog.examples()

['the dog barked all night']

In [10]:
dog.lemmas()

[Lemma('dog.n.01.dog'),
 Lemma('dog.n.01.domestic_dog'),
 Lemma('dog.n.01.Canis_familiaris')]

In [11]:
dog.hypernyms()

[Synset('canine.n.02'), Synset('domestic_animal.n.01')]

In [12]:
dog.hyponyms()

[Synset('basenji.n.01'),
 Synset('corgi.n.01'),
 Synset('cur.n.01'),
 Synset('dalmatian.n.02'),
 Synset('great_pyrenees.n.01'),
 Synset('griffon.n.02'),
 Synset('hunting_dog.n.01'),
 Synset('lapdog.n.01'),
 Synset('leonberg.n.01'),
 Synset('mexican_hairless.n.01'),
 Synset('newfoundland.n.01'),
 Synset('pooch.n.01'),
 Synset('poodle.n.01'),
 Synset('pug.n.01'),
 Synset('puppy.n.01'),
 Synset('spitz.n.01'),
 Synset('toy_dog.n.01'),
 Synset('working_dog.n.01')]

In [13]:
dog.member_holonyms()

[Synset('canis.n.01'), Synset('pack.n.06')]

In [14]:
dog.root_hypernyms()

[Synset('entity.n.01')]

### I.3. Lemma relations

In [15]:
reason_lemma = wn.synset('reason.n.01').lemmas()[0]

reason_lemma

Lemma('reason.n.01.reason')

In [16]:
reason_lemma2 = wn.lemma('reason.n.01.reason')

reason_lemma2

Lemma('reason.n.01.reason')

In [17]:
reason_lemma.synset()

Synset('reason.n.01')

In [18]:
reason_lemma.count()

76

In [19]:
worker = wn.lemma('worker.n.01.worker')

worker.derivationally_related_forms()

[Lemma('work.v.02.work'), Lemma('work.v.01.work')]

In [20]:
# of-relation
wn.lemma('vocal.a.01.vocal').pertainyms()

[Lemma('voice.n.02.voice')]

In [21]:
# not-relation
worker.antonyms()

[Lemma('nonworker.n.01.nonworker')]

In [22]:
# imply-relation
wn.synset('eat.v.01').entailments()

[Synset('chew.v.01'), Synset('swallow.v.01')]

## II. Operations

In [23]:
a_dog = wn.synset('dog.n.01')
a_cat = wn.synset('cat.n.01')

a_dog, a_cat

(Synset('dog.n.01'), Synset('cat.n.01'))

### II.1. Lowest Common Hypernyms

In [24]:
a_dog.lowest_common_hypernyms(a_cat)

[Synset('carnivore.n.01')]

### II.2. Similarity

In [25]:
# based on "is-a" relation
a_dog.path_similarity(a_cat)

0.2

In [26]:
#  Leacock-Chodorow Similarity
a_dog.lch_similarity(a_cat)

2.0281482472922856

In [27]:
# Wu-Palmer Similarity
a_dog.wup_similarity(a_cat)

0.8571428571428571

In [28]:
# Resnik Similarity
from nltk.corpus import wordnet_ic
brown_ic = wordnet_ic.ic('ic-brown.dat')
a_dog.res_similarity(a_cat, brown_ic)

7.911666509036577

In [29]:
a_dog.jcn_similarity(a_cat, brown_ic)

0.4497755285516739

In [30]:
a_dog.lin_similarity(a_cat, brown_ic)

0.8768009843733973

## III. Multilingual WordNet

In [31]:
sorted(wn.langs())

['als',
 'arb',
 'bul',
 'cat',
 'cmn',
 'dan',
 'ell',
 'eng',
 'eus',
 'fas',
 'fin',
 'fra',
 'glg',
 'heb',
 'hrv',
 'ind',
 'ita',
 'jpn',
 'nno',
 'nob',
 'pol',
 'por',
 'qcn',
 'slv',
 'spa',
 'swe',
 'tha',
 'zsm']

In [32]:
wn.synsets('犬', lang='jpn')

[Synset('dog.n.01'), Synset('spy.n.01')]

In [33]:
wn.synsets('كلب', lang='arb'), wn.synsets('كلْب', lang='arb')

([], [Synset('dog.n.01')])

In [34]:
wn.synset('dog.n.01').lemma_names('arb')

['كلْب']

In [35]:
wn.synset('spy.n.01').lemma_names('arb')

['جاسُوس', 'عمِيْل_سِرِّيّ', 'عيْن']