# WordNet

In [1]:
from nltk.corpus import wordnet as wn

`synsets()` function returns a set of synsets that represent the senses of a given word.

In [2]:
wn.synsets('car')

[Synset('car.n.01'),
 Synset('car.n.02'),
 Synset('car.n.03'),
 Synset('car.n.04'),
 Synset('cable_car.n.01')]

Each synset has its own name. You can make the synset object by specifying the name.

In [3]:
wn.synset('car.n.01')

Synset('car.n.01')

A synset is a set of lemmas, which is the disambiguated form of word.

In [4]:
wn.synset('car.n.01').lemmas()

[Lemma('car.n.01.car'),
 Lemma('car.n.01.auto'),
 Lemma('car.n.01.automobile'),
 Lemma('car.n.01.machine'),
 Lemma('car.n.01.motorcar')]

Each lemma also has its own name so that you can get a lemma object by specifying the name.

In [5]:
wn.lemma('car.n.01.automobile')

Lemma('car.n.01.automobile')

A lemma is basically a pair of synset and word as a symbol name.

In [6]:
wn.lemma('car.n.01.automobile').synset()

Synset('car.n.01')

In [7]:
wn.lemma('car.n.01.automobile').name()

'automobile'

There is a convinient method `lemma_names()` that returns a list of lemma names of the synset.

In [8]:
wn.synset('car.n.01').lemma_names()

['car', 'auto', 'automobile', 'machine', 'motorcar']

In [9]:
[synset.lemma_names() for synset in wn.synsets('car')]

[['car', 'auto', 'automobile', 'machine', 'motorcar'],
 ['car', 'railcar', 'railway_car', 'railroad_car'],
 ['car', 'gondola'],
 ['car', 'elevator_car'],
 ['cable_car', 'car']]

WordNet is a dictionary. So each synset has a description of its definition and sometimes examples of usage.

In [10]:
wn.synset('car.n.01').definition()

'a motor vehicle with four wheels; usually propelled by an internal combustion engine'

In [11]:
[synset.definition() for synset in wn.synsets('car')]

['a motor vehicle with four wheels; usually propelled by an internal combustion engine',
 'a wheeled vehicle adapted to the rails of railroad',
 'the compartment that is suspended from an airship and that carries personnel and the cargo and the power plant',
 'where passengers ride up and down',
 'a conveyance for passengers or freight on a cable railway']

In [12]:
[synset.examples() for synset in wn.synsets('car')]

[['he needs a car to get to work'],
 ['three cars had jumped the rails'],
 [],
 ['the car was on the top floor'],
 ['they took a cable car to the top of the mountain']]

## Hyponyms

In [13]:
motorcar = wn.synset('car.n.01')

In [14]:
types_of_motorcar = motorcar.hyponyms() 

In [15]:
types_of_motorcar[0]

Synset('ambulance.n.01')

In [16]:
[synset.lemma_names() for synset in types_of_motorcar]

[['ambulance'],
 ['beach_wagon',
  'station_wagon',
  'wagon',
  'estate_car',
  'beach_waggon',
  'station_waggon',
  'waggon'],
 ['bus', 'jalopy', 'heap'],
 ['cab', 'hack', 'taxi', 'taxicab'],
 ['compact', 'compact_car'],
 ['convertible'],
 ['coupe'],
 ['cruiser',
  'police_cruiser',
  'patrol_car',
  'police_car',
  'prowl_car',
  'squad_car'],
 ['electric', 'electric_automobile', 'electric_car'],
 ['gas_guzzler'],
 ['hardtop'],
 ['hatchback'],
 ['horseless_carriage'],
 ['hot_rod', 'hot-rod'],
 ['jeep', 'landrover'],
 ['limousine', 'limo'],
 ['loaner'],
 ['minicar'],
 ['minivan'],
 ['Model_T'],
 ['pace_car'],
 ['racer', 'race_car', 'racing_car'],
 ['roadster', 'runabout', 'two-seater'],
 ['sedan', 'saloon'],
 ['sport_utility', 'sport_utility_vehicle', 'S.U.V.', 'SUV'],
 ['sports_car', 'sport_car'],
 ['Stanley_Steamer'],
 ['stock_car'],
 ['subcompact', 'subcompact_car'],
 ['touring_car', 'phaeton', 'tourer'],
 ['used-car', 'secondhand_car']]

## Hypernyms

In [17]:
motorcar.hypernyms()

[Synset('motor_vehicle.n.01')]

In [18]:
paths = motorcar.hypernym_paths()

In [19]:
len(paths)

2

In [20]:
paths[0]

[Synset('entity.n.01'),
 Synset('physical_entity.n.01'),
 Synset('object.n.01'),
 Synset('whole.n.02'),
 Synset('artifact.n.01'),
 Synset('instrumentality.n.03'),
 Synset('container.n.01'),
 Synset('wheeled_vehicle.n.01'),
 Synset('self-propelled_vehicle.n.01'),
 Synset('motor_vehicle.n.01'),
 Synset('car.n.01')]

In [21]:
paths[1]

[Synset('entity.n.01'),
 Synset('physical_entity.n.01'),
 Synset('object.n.01'),
 Synset('whole.n.02'),
 Synset('artifact.n.01'),
 Synset('instrumentality.n.03'),
 Synset('conveyance.n.03'),
 Synset('vehicle.n.01'),
 Synset('wheeled_vehicle.n.01'),
 Synset('self-propelled_vehicle.n.01'),
 Synset('motor_vehicle.n.01'),
 Synset('car.n.01')]

In [22]:
motorcar.root_hypernyms()

[Synset('entity.n.01')]

## Meronyms and Holonyms

In [23]:
wn.synset('tree.n.01').part_meronyms()

[Synset('burl.n.02'),
 Synset('crown.n.07'),
 Synset('limb.n.02'),
 Synset('stump.n.01'),
 Synset('trunk.n.01')]

In [24]:
wn.synset('tree.n.01').substance_meronyms()

[Synset('heartwood.n.01'), Synset('sapwood.n.01')]

In [25]:
wn.synset('tree.n.01').member_holonyms()

[Synset('forest.n.01')]

## Entailments

In [26]:
wn.synset('walk.v.01').entailments()

[Synset('step.v.01')]

In [27]:
wn.synset('eat.v.01').entailments()

[Synset('chew.v.01'), Synset('swallow.v.01')]

## Antonyms

In [28]:
wn.lemma('supply.n.02.supply').antonyms()

[Lemma('demand.n.02.demand')]

In [29]:
wn.lemma('horizontal.a.01.horizontal').antonyms()

[Lemma('vertical.a.01.vertical'), Lemma('inclined.a.02.inclined')]

# Semantic Similarities based on WordNet

In [30]:
nickel = wn.synset('nickel.n.02')
dime = wn.synset('dime.n.01')
money = wn.synset('money.n.01')

In [31]:
nickel.lowest_common_hypernyms(dime)

[Synset('coin.n.01')]

In [32]:
nickel.lowest_common_hypernyms(money)

[Synset('medium_of_exchange.n.01')]

In [33]:
wn.synset('coin.n.01').min_depth()

8

In [34]:
wn.synset('medium_of_exchange.n.01').min_depth()

5

### Path length similarity

In [35]:
nickel.path_similarity(dime)

0.3333333333333333

In [36]:
nickel.path_similarity(money)

0.16666666666666666

### Resnik similarity

In [37]:
from nltk.corpus import wordnet_ic
brown_ic = wordnet_ic.ic('ic-brown.dat')

In [38]:
nickel.res_similarity(dime, brown_ic)

7.455288045755159

In [39]:
nickel.res_similarity(money, brown_ic)

6.254931881899411

### Lin similarity

In [40]:
nickel.lin_similarity(dime, brown_ic)

0.6073758971525297

In [41]:
nickel.lin_similarity(money, brown_ic)

0.6191356626837683