# NLP With Python

In [1]:
import nltk

In [2]:
text="Rain, Rain go away. Come again another Day"
from nltk.tokenize import word_tokenize, sent_tokenize
sents=sent_tokenize(text)
print(sents)

['Rain, Rain go away.', 'Come again another Day']


In [3]:
words=[word_tokenize(sent) for sent in sents]
print(words)

[['Rain', ',', 'Rain', 'go', 'away', '.'], ['Come', 'again', 'another', 'Day']]


### Removing Stopwords

In [4]:
from nltk.corpus import stopwords
from string import punctuation
customStopWords=set(stopwords.words('english')+list(punctuation))

In [5]:
wordsWOStopwords=[word for word in word_tokenize(text) if word not in customStopWords]
print(wordsWOStopwords)

['Rain', 'Rain', 'go', 'away', 'Come', 'another', 'Day']


### Bigrams

In [6]:
from nltk.collocations import *
bigram_measures = nltk.collocations.BigramAssocMeasures()
finder = BigramCollocationFinder.from_words(wordsWOStopwords)
sorted(finder.ngram_fd.items())

[(('Come', 'another'), 1),
 (('Rain', 'Rain'), 1),
 (('Rain', 'go'), 1),
 (('another', 'Day'), 1),
 (('away', 'Come'), 1),
 (('go', 'away'), 1)]

### Stemming

In [7]:
text2 = "Sam closed on closing night when she was in the mood to close."
from nltk.stem.lancaster import LancasterStemmer
st=LancasterStemmer()
stemmedWords=[st.stem(word) for word in word_tokenize(text2)]
print(stemmedWords)

['sam', 'clos', 'on', 'clos', 'night', 'when', 'she', 'was', 'in', 'the', 'mood', 'to', 'clos', '.']


### Parts of speech

In [8]:
nltk.pos_tag(word_tokenize(text2))

[('Sam', 'NNP'),
 ('closed', 'VBD'),
 ('on', 'IN'),
 ('closing', 'NN'),
 ('night', 'NN'),
 ('when', 'WRB'),
 ('she', 'PRP'),
 ('was', 'VBD'),
 ('in', 'IN'),
 ('the', 'DT'),
 ('mood', 'NN'),
 ('to', 'TO'),
 ('close', 'VB'),
 ('.', '.')]

### Contextualizing with Wordnet

In [9]:
from nltk.corpus import wordnet as wn
for ss in wn.synsets('bass'):
    print(ss, ss.definition())

Synset('bass.n.01') the lowest part of the musical range
Synset('bass.n.02') the lowest part in polyphonic music
Synset('bass.n.03') an adult male singer with the lowest voice
Synset('sea_bass.n.01') the lean flesh of a saltwater fish of the family Serranidae
Synset('freshwater_bass.n.01') any of various North American freshwater fish with lean flesh (especially of the genus Micropterus)
Synset('bass.n.06') the lowest adult male singing voice
Synset('bass.n.07') the member with the lowest range of a family of musical instruments
Synset('bass.n.08') nontechnical name for any of numerous edible marine and freshwater spiny-finned fishes
Synset('bass.s.01') having or denoting a low vocal or instrumental range


In [16]:
from nltk.wsd import lesk
sense1 = lesk(word_tokenize("She is a really cool person"), 'cool')
print(sense1, sense1.definition())

Synset('cool.v.01') make cool or cooler


In [17]:
sense1 = lesk(word_tokenize("it is a cool glass of water"), 'cool')
print(sense1, sense1.definition())

Synset('cool.s.05') (used of a number or sum) without exaggeration or qualification
