In [3]:
import nltk
# import wordnet and shorten its name to wn
from nltk.corpus import wordnet as wn

In [6]:
# for each sense of a word, there is a synset with an id consisting of one of the words,
#    whether it is noun, verb, adj or adverb and a number among the synsets of that word
# given word "dog", returns the ids of the synsets
wn.synsets('dog')

[Synset('dog.n.01'),
 Synset('frump.n.01'),
 Synset('dog.n.03'),
 Synset('cad.n.01'),
 Synset('frank.n.02'),
 Synset('pawl.n.01'),
 Synset('andiron.n.01'),
 Synset('chase.v.01')]

In [7]:
# given a synset id, find words/lemma names (the synonyms) of the first noun sense of "dog"
wn.synset('dog.n.01').lemma_names()

['dog', 'domestic_dog', 'Canis_familiaris']

In [8]:
# given a synset id, find lemmas of the synset (a lemma pairs a word with a synset)
wn.synset('dog.n.01').lemmas()

[Lemma('dog.n.01.dog'),
 Lemma('dog.n.01.domestic_dog'),
 Lemma('dog.n.01.Canis_familiaris')]

In [9]:
# find synset of a lemma
wn.lemma('dog.n.01.domestic_dog').synset()

Synset('dog.n.01')

In [10]:
# find lemma names for all senses of a word
for synset in wn.synsets('dog'):
	print (synset, ":  ", synset.lemma_names())

Synset('dog.n.01') :   ['dog', 'domestic_dog', 'Canis_familiaris']
Synset('frump.n.01') :   ['frump', 'dog']
Synset('dog.n.03') :   ['dog']
Synset('cad.n.01') :   ['cad', 'bounder', 'blackguard', 'dog', 'hound', 'heel']
Synset('frank.n.02') :   ['frank', 'frankfurter', 'hotdog', 'hot_dog', 'dog', 'wiener', 'wienerwurst', 'weenie']
Synset('pawl.n.01') :   ['pawl', 'detent', 'click', 'dog']
Synset('andiron.n.01') :   ['andiron', 'firedog', 'dog', 'dog-iron']
Synset('chase.v.01') :   ['chase', 'chase_after', 'trail', 'tail', 'tag', 'give_chase', 'dog', 'go_after', 'track']


In [11]:
# find definition of the first noun sense of dog, or namely, the dog.n.01 synset
wn.synset('dog.n.01').definition()

'a member of the genus Canis (probably descended from the common wolf) that has been domesticated by man since prehistoric times; occurs in many breeds'

In [12]:
# display an example of the synset
wn.synset('dog.n.01').examples()

['the dog barked all night']

In [13]:
# or show the definitions for all the synsets of a word
for synset in wn.synsets('dog'):
	print (synset, ":  ", synset.definition())

Synset('dog.n.01') :   a member of the genus Canis (probably descended from the common wolf) that has been domesticated by man since prehistoric times; occurs in many breeds
Synset('frump.n.01') :   a dull unattractive unpleasant girl or woman
Synset('dog.n.03') :   informal term for a man
Synset('cad.n.01') :   someone who is morally reprehensible
Synset('frank.n.02') :   a smooth-textured sausage of minced beef or pork usually smoked; often served on a bread roll
Synset('pawl.n.01') :   a hinged catch that fits into a notch of a ratchet to move a wheel forward or prevent it from moving backward
Synset('andiron.n.01') :   metal supports for logs in a fireplace
Synset('chase.v.01') :   go after with the intent to catch


In [14]:
# or combine the synonyms/lemma names, definitions and examples
for synset in wn.synsets('dog'):
	print (synset, ":  ")
	print ('     ', synset.lemma_names())
	print ('     ', synset.definition())
	print ('     ', synset.examples())

Synset('dog.n.01') :  
      ['dog', 'domestic_dog', 'Canis_familiaris']
      a member of the genus Canis (probably descended from the common wolf) that has been domesticated by man since prehistoric times; occurs in many breeds
      ['the dog barked all night']
Synset('frump.n.01') :  
      ['frump', 'dog']
      a dull unattractive unpleasant girl or woman
      ['she got a reputation as a frump', "she's a real dog"]
Synset('dog.n.03') :  
      ['dog']
      informal term for a man
      ['you lucky dog']
Synset('cad.n.01') :  
      ['cad', 'bounder', 'blackguard', 'dog', 'hound', 'heel']
      someone who is morally reprehensible
      ['you dirty dog']
Synset('frank.n.02') :  
      ['frank', 'frankfurter', 'hotdog', 'hot_dog', 'dog', 'wiener', 'wienerwurst', 'weenie']
      a smooth-textured sausage of minced beef or pork usually smoked; often served on a bread roll
      []
Synset('pawl.n.01') :  
      ['pawl', 'detent', 'click', 'dog']
      a hinged catch that fits into a

In [15]:
##  Lexical relations between synsets in WordNet
# find hypernyms of synsets
dog1 = wn.synset('dog.n.01')
dog1.hypernyms()

[Synset('canine.n.02'), Synset('domestic_animal.n.01')]

In [16]:
# find hyponyms
dog1.hyponyms()

[Synset('basenji.n.01'),
 Synset('corgi.n.01'),
 Synset('cur.n.01'),
 Synset('dalmatian.n.02'),
 Synset('great_pyrenees.n.01'),
 Synset('griffon.n.02'),
 Synset('hunting_dog.n.01'),
 Synset('lapdog.n.01'),
 Synset('leonberg.n.01'),
 Synset('mexican_hairless.n.01'),
 Synset('newfoundland.n.01'),
 Synset('pooch.n.01'),
 Synset('poodle.n.01'),
 Synset('pug.n.01'),
 Synset('puppy.n.01'),
 Synset('spitz.n.01'),
 Synset('toy_dog.n.01'),
 Synset('working_dog.n.01')]

In [17]:
# the most general hypernym of a synset
dog1.root_hypernyms()

[Synset('entity.n.01')]

In [18]:
# from the wordnet browser, we see that dog1 has two more relations
dog1.part_meronyms()

[Synset('flag.n.07')]

In [21]:
# what is this?  check it out 
print (wn.synset('flag.n.07').lemma_names(),wn.synset('flag.n.07').definition(), 
       wn.synset('flag.n.07').examples())

['flag'] a conspicuously marked or shaped tail []


In [22]:
dog1.member_holonyms()

[Synset('canis.n.01'), Synset('pack.n.06')]

In [23]:
# look at another word, the adjective "good"
wn.synsets('good')

[Synset('good.n.01'),
 Synset('good.n.02'),
 Synset('good.n.03'),
 Synset('commodity.n.01'),
 Synset('good.a.01'),
 Synset('full.s.06'),
 Synset('good.a.03'),
 Synset('estimable.s.02'),
 Synset('beneficial.s.01'),
 Synset('good.s.06'),
 Synset('good.s.07'),
 Synset('adept.s.01'),
 Synset('good.s.09'),
 Synset('dear.s.02'),
 Synset('dependable.s.04'),
 Synset('good.s.12'),
 Synset('good.s.13'),
 Synset('effective.s.04'),
 Synset('good.s.15'),
 Synset('good.s.16'),
 Synset('good.s.17'),
 Synset('good.s.18'),
 Synset('good.s.19'),
 Synset('good.s.20'),
 Synset('good.s.21'),
 Synset('well.r.01'),
 Synset('thoroughly.r.02')]

In [25]:
# find antonyms, sometimes need to specify for which lemma the antonym is needed
good1 = wn.synset('good.a.01')
# display synonyms of this synset
good1.lemma_names()

['good']

In [26]:
# the antonym function is defined only on the lemma, not the synset
# find antonym for the first lemma of the synset
print(good1.lemmas())
good1.lemmas()[0].antonyms() 

[Lemma('good.a.01.good')]


[Lemma('bad.a.01.bad')]

In [27]:
# find entailments of verbs
print(wn.synset('walk.v.01').entailments())
print(wn.synset('eat.v.01').entailments())

[Synset('step.v.01')]
[Synset('chew.v.01'), Synset('swallow.v.01')]


In [28]:
# trace paths of a synset by visiting its hypernyms
dog1.hypernyms()

[Synset('canine.n.02'), Synset('domestic_animal.n.01')]

In [29]:
# number of paths from the synset to the root concept "entity"
paths=dog1.hypernym_paths()
print(len(paths) )
# look at the first path
paths[0]

2


[Synset('entity.n.01'),
 Synset('physical_entity.n.01'),
 Synset('object.n.01'),
 Synset('whole.n.02'),
 Synset('living_thing.n.01'),
 Synset('organism.n.01'),
 Synset('animal.n.01'),
 Synset('chordate.n.01'),
 Synset('vertebrate.n.01'),
 Synset('mammal.n.01'),
 Synset('placental.n.01'),
 Synset('carnivore.n.01'),
 Synset('canine.n.02'),
 Synset('dog.n.01')]

In [30]:
# or just list the names in the paths
#list the first path
[synset.name() for synset in paths[0]]

['entity.n.01',
 'physical_entity.n.01',
 'object.n.01',
 'whole.n.02',
 'living_thing.n.01',
 'organism.n.01',
 'animal.n.01',
 'chordate.n.01',
 'vertebrate.n.01',
 'mammal.n.01',
 'placental.n.01',
 'carnivore.n.01',
 'canine.n.02',
 'dog.n.01']

In [31]:
#list the second path 
[synset.name() for synset in paths[1]] 

['entity.n.01',
 'physical_entity.n.01',
 'object.n.01',
 'whole.n.02',
 'living_thing.n.01',
 'organism.n.01',
 'animal.n.01',
 'domestic_animal.n.01',
 'dog.n.01']

In [32]:
# Word similarity

# define 3 different types of whales
right = wn.synset('right_whale.n.01')
minke = wn.synset('minke_whale.n.01')  
orca = wn.synset('orca.n.01') 

In [33]:
# look at the paths of these three whales
print(right.hypernym_paths())
print(minke.hypernym_paths())
print(orca.hypernym_paths())

[[Synset('entity.n.01'), Synset('physical_entity.n.01'), Synset('object.n.01'), Synset('whole.n.02'), Synset('living_thing.n.01'), Synset('organism.n.01'), Synset('animal.n.01'), Synset('chordate.n.01'), Synset('vertebrate.n.01'), Synset('mammal.n.01'), Synset('placental.n.01'), Synset('aquatic_mammal.n.01'), Synset('cetacean.n.01'), Synset('whale.n.02'), Synset('baleen_whale.n.01'), Synset('right_whale.n.01')]]
[[Synset('entity.n.01'), Synset('physical_entity.n.01'), Synset('object.n.01'), Synset('whole.n.02'), Synset('living_thing.n.01'), Synset('organism.n.01'), Synset('animal.n.01'), Synset('chordate.n.01'), Synset('vertebrate.n.01'), Synset('mammal.n.01'), Synset('placental.n.01'), Synset('aquatic_mammal.n.01'), Synset('cetacean.n.01'), Synset('whale.n.02'), Synset('baleen_whale.n.01'), Synset('rorqual.n.01'), Synset('lesser_rorqual.n.01')]]
[[Synset('entity.n.01'), Synset('physical_entity.n.01'), Synset('object.n.01'), Synset('whole.n.02'), Synset('living_thing.n.01'), Synset('or

In [34]:
# find the least ancestor of right and minke, and then right and orca
print(right.lowest_common_hypernyms(minke))
print(right.lowest_common_hypernyms(orca))

[Synset('baleen_whale.n.01')]
[Synset('whale.n.02')]


In [35]:
# the function min_depth gives the length of a path from a word to the top of the hierarchy
print(right.min_depth() )
print(wn.synset('baleen_whale.n.01').min_depth() )
print(wn.synset('entity.n.01').min_depth())

15
14
0


In [36]:
# the path similarity gives a similarity score between 0 and 1
print(right.path_similarity(minke) )
print(right.path_similarity(orca))

0.25
0.16666666666666666


In [37]:
# define 2 more words and look at their similarity
tortoise = wn.synset('tortoise.n.01')
novel = wn.synset('novel.n.01')
# note the least ancestor of these two words
print(right.lowest_common_hypernyms(tortoise))
print(right.lowest_common_hypernyms(novel))

[Synset('vertebrate.n.01')]
[Synset('entity.n.01')]


In [38]:
print(right.path_similarity(tortoise) )
print(right.path_similarity(novel))

0.07692307692307693
0.043478260869565216


In [39]:
help(wn)

Help on WordNetCorpusReader in module nltk.corpus.reader.wordnet object:

class WordNetCorpusReader(nltk.corpus.reader.api.CorpusReader)
 |  A corpus reader used to access wordnet or its variants.
 |  
 |  Method resolution order:
 |      WordNetCorpusReader
 |      nltk.corpus.reader.api.CorpusReader
 |      builtins.object
 |  
 |  Methods defined here:
 |  
 |  __init__(self, root, omw_reader)
 |      Construct a new wordnet corpus reader, with the given root
 |      directory.
 |  
 |  all_lemma_names(self, pos=None, lang='eng')
 |      Return all lemma names for all synsets for the given
 |      part of speech tag and language or languages. If pos is
 |      not specified, all synsets for all parts of speech will
 |      be used.
 |  
 |  all_synsets(self, pos=None)
 |      Iterate over all synsets with a given part of speech tag.
 |      If no pos is specified, all synsets for all parts of speech
 |      will be loaded.
 |  
 |  citation(self, lang='omw')
 |      Return the conte

In [40]:
# first get information content from a general corpus
from nltk.corpus import wordnet_ic
brown_ic = wordnet_ic.ic('ic-brown.dat')

In [41]:
# try Resnik Similarity
print(right.res_similarity(orca, brown_ic))
print(right.res_similarity(tortoise, brown_ic))
print(right.res_similarity(novel, brown_ic))

10.939239388446227
5.2175784741185165
-0.0


In [45]:
## SentiWordNet
from nltk.corpus import sentiwordnet as swn
# each word judged to be made up of positive, negative and objective meaning

In [43]:
# sentiwordnet has the same synsets as wordnet, use wn functions
print(list(swn.senti_synsets('breakdown')))
print(wn.synsets('breakdown'))

[SentiSynset('dislocation.n.02'), SentiSynset('breakdown.n.02'), SentiSynset('breakdown.n.03'), SentiSynset('breakdown.n.04')]
[Synset('dislocation.n.02'), Synset('breakdown.n.02'), Synset('breakdown.n.03'), Synset('breakdown.n.04')]


In [46]:
# the print function gives the positive and negative scores
breakdown3 = swn.senti_synset('breakdown.n.03')
print (breakdown3)

<breakdown.n.03: PosScore=0.0 NegScore=0.25>


In [47]:
# there are also separate functions for all the scores
print(breakdown3.pos_score())
print(breakdown3.neg_score())
print(breakdown3.obj_score())

0.0
0.25
0.75


In [49]:
# some more exploration of sentiment scores of words
dogswn1 = swn.senti_synset('dog.n.01')
print(dogswn1)
print(dogswn1.obj_score())

<dog.n.01: PosScore=0.0 NegScore=0.0>
1.0


In [50]:
goodswn1 = swn.senti_synset('good.a.01')
print(goodswn1)
print(goodswn1.obj_score())

<good.a.01: PosScore=0.75 NegScore=0.0>
0.25


In [51]:
# not all words in WordNet have been scored for sentiment in SentiWordNet
#   but the most recent version has scored a lot more so I don't have an example right now
print(wn.synsets('exuberant'))
ex3 = swn.senti_synset('exuberant.s.03')
print(ex3)

[Synset('ebullient.s.01'), Synset('excessive.s.02'), Synset('exuberant.s.03')]
<exuberant.s.03: PosScore=0.0 NegScore=0.25>
