Natural Language Processing with Python – Analyzing Text with the Natural Language Toolkit
Steven Bird, Ewan Klein, and Edward Loper
http://www.nltk.org/book/
Chapter 2.

In [1]:
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
import nltk

# 2. Accessing Text Corpora and Lexical Resources

## 2.5 WordNet

### 2.5.1 Senses and Synonyms

In [5]:
from nltk.corpus import wordnet as wn

In [6]:
wn.synsets('motorcar')
# [Synset('car.n.01')]

[Synset('car.n.01')]

In [7]:
wn.synset('car.n.01').lemma_names()
# ['car', 'auto', 'automobile', 'machine', 'motorcar']

['car', 'auto', 'automobile', 'machine', 'motorcar']

In [8]:
wn.synset('car.n.01').definition()
# 'a motor vehicle with four wheels; usually propelled by an internal combustion engine'

'a motor vehicle with four wheels; usually propelled by an internal combustion engine'

In [10]:
wn.synset('car.n.01').examples()
# ['he needs a car to get to work']

['he needs a car to get to work']

In [11]:
wn.synset('car.n.01').lemmas()
# [Lemma('car.n.01.car'), Lemma('car.n.01.auto'), Lemma('car.n.01.automobile'),
# Lemma('car.n.01.machine'), Lemma('car.n.01.motorcar')]

[Lemma('car.n.01.car'),
 Lemma('car.n.01.auto'),
 Lemma('car.n.01.automobile'),
 Lemma('car.n.01.machine'),
 Lemma('car.n.01.motorcar')]

In [12]:
wn.lemma('car.n.01.automobile')
# Lemma('car.n.01.automobile')

Lemma('car.n.01.automobile')

In [13]:
wn.lemma('car.n.01.automobile').synset()
# Synset('car.n.01')

Synset('car.n.01')

In [14]:
wn.lemma('car.n.01.automobile').name()
# 'automobile'

'automobile'

In [15]:
wn.synsets('car')
# [Synset('car.n.01'), Synset('car.n.02'), Synset('car.n.03'), Synset('car.n.04'),
# Synset('cable_car.n.01')]

[Synset('car.n.01'),
 Synset('car.n.02'),
 Synset('car.n.03'),
 Synset('car.n.04'),
 Synset('cable_car.n.01')]

In [22]:
for synset in wn.synsets('car'):
    print(synset.lemma_names())

# ['car', 'auto', 'automobile', 'machine', 'motorcar']
# ['car', 'railcar', 'railway_car', 'railroad_car']
# ['car', 'gondola']
# ['car', 'elevator_car']
# ['cable_car', 'car']

['car', 'auto', 'automobile', 'machine', 'motorcar']
['car', 'railcar', 'railway_car', 'railroad_car']
['car', 'gondola']
['car', 'elevator_car']
['cable_car', 'car']


In [23]:
wn.lemmas('car')
# [Lemma('car.n.01.car'), Lemma('car.n.02.car'), Lemma('car.n.03.car'),
# Lemma('car.n.04.car'), Lemma('cable_car.n.01.car')]

[Lemma('car.n.01.car'),
 Lemma('car.n.02.car'),
 Lemma('car.n.03.car'),
 Lemma('car.n.04.car'),
 Lemma('cable_car.n.01.car')]

In [82]:
for synset in wn.synsets('dish'):
    print(synset.name() + ':', synset.definition())

dish.n.01: a piece of dishware normally used as a container for holding or serving food
dish.n.02: a particular item of prepared food
dish.n.03: the quantity that a dish will hold
smasher.n.02: a very attractive or seductive looking woman
dish.n.05: directional antenna consisting of a parabolic reflector for microwave or radio frequency radiation
cup_of_tea.n.01: an activity that you like or at which you are superior
serve.v.06: provide (usually but not necessarily food)
dish.v.02: make concave; shape like a dish


### 2.5.2 The WordNet Hierarchy

In [83]:
motorcar = wn.synset('car.n.01')

In [84]:
types_of_motorcar = motorcar.hyponyms()

In [85]:
types_of_motorcar[0]
# Synset('ambulance.n.01')

Synset('ambulance.n.01')

In [86]:
len(types_of_motorcar)

31

In [87]:
sorted(lemma.name() for synset in types_of_motorcar for lemma in synset.lemmas())
# ['Model_T', 'S.U.V.', 'SUV', 'Stanley_Steamer', 'ambulance', 'beach_waggon',
# 'beach_wagon', 'bus', 'cab', 'compact', 'compact_car', 'convertible',
# 'coupe', 'cruiser', 'electric', 'electric_automobile', 'electric_car',
# 'estate_car', 'gas_guzzler', 'hack', 'hardtop', 'hatchback', 'heap',
# 'horseless_carriage', 'hot-rod', 'hot_rod', 'jalopy', 'jeep', 'landrover',
# 'limo', 'limousine', 'loaner', 'minicar', 'minivan', 'pace_car', 'patrol_car',
# 'phaeton', 'police_car', 'police_cruiser', 'prowl_car', 'race_car', 'racer',
# 'racing_car', 'roadster', 'runabout', 'saloon', 'secondhand_car', 'sedan',
# 'sport_car', 'sport_utility', 'sport_utility_vehicle', 'sports_car', 'squad_car',
# 'station_waggon', 'station_wagon', 'stock_car', 'subcompact', 'subcompact_car',
# 'taxi', 'taxicab', 'tourer', 'touring_car', 'two-seater', 'used-car', 'waggon',
# 'wagon']

['Model_T',
 'S.U.V.',
 'SUV',
 'Stanley_Steamer',
 'ambulance',
 'beach_waggon',
 'beach_wagon',
 'bus',
 'cab',
 'compact',
 'compact_car',
 'convertible',
 'coupe',
 'cruiser',
 'electric',
 'electric_automobile',
 'electric_car',
 'estate_car',
 'gas_guzzler',
 'hack',
 'hardtop',
 'hatchback',
 'heap',
 'horseless_carriage',
 'hot-rod',
 'hot_rod',
 'jalopy',
 'jeep',
 'landrover',
 'limo',
 'limousine',
 'loaner',
 'minicar',
 'minivan',
 'pace_car',
 'patrol_car',
 'phaeton',
 'police_car',
 'police_cruiser',
 'prowl_car',
 'race_car',
 'racer',
 'racing_car',
 'roadster',
 'runabout',
 'saloon',
 'secondhand_car',
 'sedan',
 'sport_car',
 'sport_utility',
 'sport_utility_vehicle',
 'sports_car',
 'squad_car',
 'station_waggon',
 'station_wagon',
 'stock_car',
 'subcompact',
 'subcompact_car',
 'taxi',
 'taxicab',
 'tourer',
 'touring_car',
 'two-seater',
 'used-car',
 'waggon',
 'wagon']

In [88]:
motorcar.hypernyms()
# [Synset('motor_vehicle.n.01')]

[Synset('motor_vehicle.n.01')]

In [89]:
paths = motorcar.hypernym_paths()

In [90]:
len(paths)
# 2

2

In [91]:
[synset.name() for synset in paths[0]]
# ['entity.n.01', 'physical_entity.n.01', 'object.n.01', 'whole.n.02', 'artifact.n.01',
# 'instrumentality.n.03', 'container.n.01', 'wheeled_vehicle.n.01',
# 'self-propelled_vehicle.n.01', 'motor_vehicle.n.01', 'car.n.01']

['entity.n.01',
 'physical_entity.n.01',
 'object.n.01',
 'whole.n.02',
 'artifact.n.01',
 'instrumentality.n.03',
 'container.n.01',
 'wheeled_vehicle.n.01',
 'self-propelled_vehicle.n.01',
 'motor_vehicle.n.01',
 'car.n.01']

In [92]:
[synset.name() for synset in paths[1]]
# ['entity.n.01', 'physical_entity.n.01', 'object.n.01', 'whole.n.02', 'artifact.n.01',
# 'instrumentality.n.03', 'conveyance.n.03', 'vehicle.n.01', 'wheeled_vehicle.n.01',
# 'self-propelled_vehicle.n.01', 'motor_vehicle.n.01', 'car.n.01']

['entity.n.01',
 'physical_entity.n.01',
 'object.n.01',
 'whole.n.02',
 'artifact.n.01',
 'instrumentality.n.03',
 'conveyance.n.03',
 'vehicle.n.01',
 'wheeled_vehicle.n.01',
 'self-propelled_vehicle.n.01',
 'motor_vehicle.n.01',
 'car.n.01']

In [93]:
motorcar.root_hypernyms()
# [Synset('entity.n.01')]

[Synset('entity.n.01')]

### 2.5.3 More Lexical Relations

In [94]:
wn.synset('tree.n.01').part_meronyms()
# [Synset('burl.n.02'), Synset('crown.n.07'), Synset('limb.n.02'),
# Synset('stump.n.01'), Synset('trunk.n.01')]

[Synset('burl.n.02'),
 Synset('crown.n.07'),
 Synset('limb.n.02'),
 Synset('stump.n.01'),
 Synset('trunk.n.01')]

In [95]:
wn.synset('tree.n.01').substance_meronyms()
# [Synset('heartwood.n.01'), Synset('sapwood.n.01')]

[Synset('heartwood.n.01'), Synset('sapwood.n.01')]

In [96]:
wn.synset('tree.n.01').member_meronyms()

[]

In [97]:
wn.synset('tree.n.01').member_holonyms()  # tree is contained in forest as a member
# [Synset('forest.n.01')]

[Synset('forest.n.01')]

In [99]:
for synset in wn.synsets('mint', wn.NOUN):
    print(synset.name() + ':', synset.definition())

# batch.n.02: (often followed by `of') a large number or amount or extent
# mint.n.02: any north temperate plant of the genus Mentha with aromatic leaves and
#            small mauve flowers
# mint.n.03: any member of the mint family of plants
# mint.n.04: the leaves of a mint plant used fresh or candied
# mint.n.05: a candy that is flavored with a mint oil
# mint.n.06: a plant where money is coined by authority of the government

batch.n.02: (often followed by `of') a large number or amount or extent
mint.n.02: any north temperate plant of the genus Mentha with aromatic leaves and small mauve flowers
mint.n.03: any member of the mint family of plants
mint.n.04: the leaves of a mint plant used fresh or candied
mint.n.05: a candy that is flavored with a mint oil
mint.n.06: a plant where money is coined by authority of the government


In [100]:
wn.synset('mint.n.04').part_holonyms()
# [Synset('mint.n.02')]

[Synset('mint.n.02')]

In [101]:
wn.synset('mint.n.04').substance_holonyms()
# [Synset('mint.n.05')]

[Synset('mint.n.05')]

In [102]:
wn.synset('walk.v.01').entailments()  # walking involves stepping
# [Synset('step.v.01')]

[Synset('step.v.01')]

In [103]:
wn.synset('eat.v.01').entailments()
# [Synset('chew.v.01'), Synset('swallow.v.01')]

[Synset('chew.v.01'), Synset('swallow.v.01')]

In [104]:
wn.synset('tease.v.03').entailments()
# [Synset('arouse.v.07'), Synset('disappoint.v.01')]

[Synset('arouse.v.07'), Synset('disappoint.v.01')]

In [105]:
wn.lemma('supply.n.02.supply').antonyms()
# [Lemma('demand.n.02.demand')]

[Lemma('demand.n.02.demand')]

In [106]:
wn.lemma('rush.v.01.rush').antonyms()
# [Lemma('linger.v.04.linger')]

[Lemma('linger.v.04.linger')]

In [107]:
wn.lemma('horizontal.a.01.horizontal').antonyms()
# [Lemma('inclined.a.02.inclined'), Lemma('vertical.a.01.vertical')]

[Lemma('vertical.a.01.vertical'), Lemma('inclined.a.02.inclined')]

In [108]:
wn.lemma('staccato.r.01.staccato').antonyms()
# [Lemma('legato.r.01.legato')]

[Lemma('legato.r.01.legato')]

### 2.5.4 Semantic Similarity

In [109]:
right = wn.synset('right_whale.n.01')

In [110]:
orca = wn.synset('orca.n.01')

In [111]:
minke = wn.synset('minke_whale.n.01')

In [112]:
tortoise = wn.synset('tortoise.n.01')

In [113]:
novel = wn.synset('novel.n.01')

In [114]:
right.lowest_common_hypernyms(minke)
# [Synset('baleen_whale.n.01')]

[Synset('baleen_whale.n.01')]

In [115]:
right.lowest_common_hypernyms(orca)
# [Synset('whale.n.02')]

[Synset('whale.n.02')]

In [116]:
right.lowest_common_hypernyms(tortoise)
# [Synset('vertebrate.n.01')]

[Synset('vertebrate.n.01')]

In [117]:
right.lowest_common_hypernyms(novel)
# [Synset('entity.n.01')]

[Synset('entity.n.01')]

In [123]:
wn.synset('baleen_whale.n.01').min_depth()
# 14

14

In [124]:
wn.synset('whale.n.02').min_depth()
# 13

13

In [125]:
wn.synset('vertebrate.n.01').min_depth()
# 8

8

In [126]:
wn.synset('entity.n.01').min_depth()
# 0

0

In [127]:
right.path_similarity(minke)
# 0.25

0.25

In [128]:
right.path_similarity(orca)
# 0.16666666666666666

0.16666666666666666

In [129]:
right.path_similarity(tortoise)
# 0.07692307692307693

0.07692307692307693

In [130]:
right.path_similarity(novel)
# 0.043478260869565216

0.043478260869565216