# POS_and_Word_Meaning

In [3]:
from nltk.tokenize import sent_tokenize, word_tokenize

In [4]:
mytext = "Cyprus, officially the Republic of Cyprus, is an island country in the Eastern Mediterranean and the third largest and third most populous island in the Mediterranean. Cyprus is located south of Turkey, west of Syria and Lebanon, northwest of Israel, north of Egypt, and southeast of Greece. Cyprus is a major tourist destination in the Mediterranean. With an advanced, high-income economy and a very high Human Development Index, the Republic of Cyprus has been a member of the Commonwealth since 1961 and was a founding member of the Non-Aligned Movement until it joined the European Union on 1 May 2004. On 1 January 2008, the Republic of Cyprus joined the eurozone."

In [5]:
sentences = sent_tokenize(mytext)
sentences

['Cyprus, officially the Republic of Cyprus, is an island country in the Eastern Mediterranean and the third largest and third most populous island in the Mediterranean.',
 'Cyprus is located south of Turkey, west of Syria and Lebanon, northwest of Israel, north of Egypt, and southeast of Greece.',
 'Cyprus is a major tourist destination in the Mediterranean.',
 'With an advanced, high-income economy and a very high Human Development Index, the Republic of Cyprus has been a member of the Commonwealth since 1961 and was a founding member of the Non-Aligned Movement until it joined the European Union on 1 May 2004.',
 'On 1 January 2008, the Republic of Cyprus joined the eurozone.']

In [6]:
tokens = word_tokenize(sentences[2])
tokens

['Cyprus',
 'is',
 'a',
 'major',
 'tourist',
 'destination',
 'in',
 'the',
 'Mediterranean',
 '.']

## POS Tagging

In [7]:
from nltk import pos_tag
tags = pos_tag(tokens)
tags

[('Cyprus', 'NNP'),
 ('is', 'VBZ'),
 ('a', 'DT'),
 ('major', 'JJ'),
 ('tourist', 'NN'),
 ('destination', 'NN'),
 ('in', 'IN'),
 ('the', 'DT'),
 ('Mediterranean', 'NNP'),
 ('.', '.')]

In [9]:
for i, j in tags:
    if j == 'NNP':
        print(i)

Cyprus
Mediterranean


#### To access documentation for tags, for example for NN:

In [10]:
import nltk.help
nltk.help.upenn_tagset('NN')

NN: noun, common, singular or mass
    common-carrier cabbage knuckle-duster Casino afghan shed thermostat
    investment slide humour falloff slick wind hyena override subhumanity
    machinist ...


## Word senses (for homonyms)¶

**Homonyms =  each of two or more words having the same spelling or pronunciation but different meanings and origins**

**WordNet is a lexical database for the English language in the form of a semantic graph.**

**WordNet groups English words into sets of synonyms called synsets, provides short definitions and usage examples, and records a number of relations among these synonym sets or their members.**

**NLTK provides an interface to the WordNet API.**

In [11]:
from nltk.corpus import wordnet as wn

In [12]:
wn.synsets('human')

[Synset('homo.n.02'),
 Synset('human.a.01'),
 Synset('human.a.02'),
 Synset('human.a.03')]

In [13]:
wn.synsets('human')[0].definition()

'any living or extinct member of the family Hominidae characterized by superior intelligence, articulate speech, and erect carriage'

In [14]:
wn.synsets('human')[1].definition()

'characteristic of humanity'

In [15]:
wn.synsets('human')[2].definition()

'relating to a person'

In [16]:
wn.synsets('human')[3].definition()

'having human form or attributes as opposed to those of animals or divine beings'

In [17]:
human = wn.synsets('Human', pos=wn.NOUN)[0]
human

Synset('homo.n.02')

In [22]:
human.hypernyms()[0].definition()

'a primate of the family Hominidae'

In [23]:
human.hyponyms()

[Synset('homo_erectus.n.01'),
 Synset('homo_habilis.n.01'),
 Synset('homo_sapiens.n.01'),
 Synset('homo_soloensis.n.01'),
 Synset('neandertal_man.n.01'),
 Synset('rhodesian_man.n.01'),
 Synset('world.n.08')]

In [24]:
human.hyponyms()[0].definition()

'extinct species of primitive hominid with upright stature but small brain'

In [26]:
bike = wn.synsets('bicycle')[0]
bike.definition()

'a wheeled vehicle that has two wheels and is moved by foot pedals'

In [31]:
bike.hyponyms()

[Synset('bicycle-built-for-two.n.01'),
 Synset('mountain_bike.n.01'),
 Synset('ordinary.n.04'),
 Synset('push-bike.n.01'),
 Synset('safety_bicycle.n.01'),
 Synset('velocipede.n.01')]

In [27]:
girl = wn.synsets('girl')[1]
girl.definition()

'a youthful female person'

In [32]:
girl.hyponyms()

[Synset('campfire_girl.n.01'),
 Synset('farm_girl.n.01'),
 Synset('flower_girl.n.02'),
 Synset('moppet.n.01'),
 Synset('schoolgirl.n.01'),
 Synset('scout.n.02')]

In [28]:
# The Wu-Palmer metric (WUP) is a measure of similarity based on distance in the graph. There are many other metrics too.

bike.wup_similarity(human) 

0.34782608695652173

In [29]:
girl.wup_similarity(human)

0.5217391304347826

In [33]:
wn.synsets('girl')

[Synset('girl.n.01'),
 Synset('female_child.n.01'),
 Synset('daughter.n.01'),
 Synset('girlfriend.n.02'),
 Synset('girl.n.05')]

In [38]:
wn.synsets('girl')[0].lemmas()

[Lemma('girl.n.01.girl'),
 Lemma('girl.n.01.miss'),
 Lemma('girl.n.01.missy'),
 Lemma('girl.n.01.young_lady'),
 Lemma('girl.n.01.young_woman'),
 Lemma('girl.n.01.fille')]

In [40]:
wn.synsets('girl')[0].lemmas()[0].name()

'girl'

In [41]:
#  A lemma is basically the dictionary form or base form of a word, as opposed to the various inflected forms of a word. 
        
synonyms = []
for syn in wn.synsets('girl'):
    for lemma in syn.lemmas(): 
        synonyms.append(lemma.name())
synonyms

['girl',
 'miss',
 'missy',
 'young_lady',
 'young_woman',
 'fille',
 'female_child',
 'girl',
 'little_girl',
 'daughter',
 'girl',
 'girlfriend',
 'girl',
 'lady_friend',
 'girl']

In [42]:
antonyms = []
for syn in wn.synsets("girl"):
    for l in syn.lemmas():
        if l.antonyms():
            antonyms.append(l.antonyms()[0].name())
antonyms

['male_child', 'boy', 'son', 'boy']