In [2]:
import nltk
nltk.download("wordnet") #One time download
nltk.download("averaged_perceptron_tagger")

[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\asbaj\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\asbaj\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


True

In [3]:
str="I can do this all day"
tokens=nltk.word_tokenize(str)
print(tokens)

['I', 'can', 'do', 'this', 'all', 'day']


In [4]:
str1="Stay down. Final Warning."
sent_tok=nltk.sent_tokenize(str1)
print(sent_tok)



In [5]:
from nltk.stem.porter import PorterStemmer
porter_stemmer = PorterStemmer()
word_data="It originated from the idea that there are readers who prefer learning new skills from the comforts of their drawing rooms"
#First tokenize the sentence
tokens=nltk.word_tokenize(word_data)
for w in tokens:
    print("{} : {}".format(w,porter_stemmer.stem(w)))

It : It
originated : origin
from : from
the : the
idea : idea
that : that
there : there
are : are
readers : reader
who : who
prefer : prefer
learning : learn
new : new
skills : skill
from : from
the : the
comforts : comfort
of : of
their : their
drawing : draw
rooms : room


In [6]:
words=["program","programs","programmed","programming"]
for w in words:
    print("{} : {}".format(w,porter_stemmer.stem(w)))

program : program
programs : program
programmed : program
programming : program


In [7]:
from nltk.stem import WordNetLemmatizer
wordnet_lemmatizer=WordNetLemmatizer()
word_data="It originated from the idea that there are readers who prefer learning new skills from the comforts of their drawing rooms"
tokens=nltk.word_tokenize(word_data)
for w in tokens:
       print ("{} : {}".format(w,wordnet_lemmatizer.lemmatize(w)))

It : It
originated : originated
from : from
the : the
idea : idea
that : that
there : there
are : are
readers : reader
who : who
prefer : prefer
learning : learning
new : new
skills : skill
from : from
the : the
comforts : comfort
of : of
their : their
drawing : drawing
rooms : room


In [8]:
from nltk.corpus import stopwords 
from nltk.tokenize import word_tokenize,sent_tokenize
stop_words=set(stopwords.words('english'))
txt="Joe waited for the train.The train was late.Mary and Samantha took the bus.I looked for Mary and Samantha at the bus station."
tokenized=sent_tokenize(txt)
for i in tokenized:
    wordlist=word_tokenize(i)
    wordlist=[w for w in wordlist if not w in stop_words]
    tagged=nltk.pos_tag(wordlist)
    print(tagged)

[('Joe', 'NNP'), ('waited', 'VBD'), ('train.The', 'JJ'), ('train', 'NN'), ('late.Mary', 'JJ'), ('Samantha', 'NNP'), ('took', 'VBD'), ('bus.I', 'NN'), ('looked', 'VBD'), ('Mary', 'NNP'), ('Samantha', 'NNP'), ('bus', 'NN'), ('station', 'NN'), ('.', '.')]


In [9]:
from nltk import Nonterminal, nonterminals, Production, CFG
nt1 = Nonterminal('NP')
nt2 = Nonterminal('VP')
nt1.symbol()

'NP'

In [10]:
nt1 == Nonterminal('NP')

True

In [11]:
nt1 == nt2

False

In [12]:
S, NP, VP, PP = nonterminals('S, NP, VP, PP')
N, V, P, DT = nonterminals('N, V, P, DT')
prod1 = Production(S, [NP, VP])
prod2 = Production(NP, [DT, NP])

In [13]:
prod1.lhs()

S

In [14]:
prod1.rhs()

(NP, VP)

In [15]:
prod1 == Production(S, [NP, VP])

True

In [16]:
prod1 == prod2

False

In [17]:
grammar = CFG.fromstring("""
... S -> NP VP
... PP -> P NP
... NP -> 'the' N | N PP | 'the' N PP
... VP -> V NP | V PP | V NP PP
... N -> 'cat'
... N -> 'dog'
... N -> 'rug'
... V -> 'chased'
... V -> 'sat'
... P -> 'in'
... P -> 'on'
... """)

In [18]:
#Recursive descent parser
from nltk.parse import RecursiveDescentParser
rd = RecursiveDescentParser(grammar)

In [19]:
sentence1 = 'the cat chased the dog'.split()
sentence2 = 'the cat chased the dog on the rug'.split()

In [20]:
for t in rd.parse(sentence1):
...     print(t)

(S (NP the (N cat)) (VP (V chased) (NP the (N dog))))


In [21]:
for t in rd.parse(sentence2):
...     print(t)

(S
  (NP the (N cat))
  (VP (V chased) (NP the (N dog) (PP (P on) (NP the (N rug))))))
(S
  (NP the (N cat))
  (VP (V chased) (NP the (N dog)) (PP (P on) (NP the (N rug)))))


In [22]:
# shift reduce parser
from nltk.parse import ShiftReduceParser
sr = ShiftReduceParser(grammar)
sentence1 = 'the cat chased the dog'.split()
sentence2 = 'the cat chased the dog on the rug'.split()

In [23]:
for t in sr.parse(sentence1):
...     print(t)

(S (NP the (N cat)) (VP (V chased) (NP the (N dog))))


In [24]:
for t in sr.parse(sentence2):
...     print(t)

In [25]:
nltk.parse.chart.demo(2, print_times=False, trace=1,
...                       sent='I saw a dog', numparses=1)

* Sentence:
I saw a dog
['I', 'saw', 'a', 'dog']

* Strategy: Bottom-up

|.    I    .   saw   .    a    .   dog   .|
|[---------]         .         .         .| [0:1] 'I'
|.         [---------]         .         .| [1:2] 'saw'
|.         .         [---------]         .| [2:3] 'a'
|.         .         .         [---------]| [3:4] 'dog'
|>         .         .         .         .| [0:0] NP -> * 'I'
|[---------]         .         .         .| [0:1] NP -> 'I' *
|>         .         .         .         .| [0:0] S  -> * NP VP
|>         .         .         .         .| [0:0] NP -> * NP PP
|[--------->         .         .         .| [0:1] S  -> NP * VP
|[--------->         .         .         .| [0:1] NP -> NP * PP
|.         >         .         .         .| [1:1] Verb -> * 'saw'
|.         [---------]         .         .| [1:2] Verb -> 'saw' *
|.         >         .         .         .| [1:1] VP -> * Verb NP
|.         >         .         .         .| [1:1] VP -> * Verb
|.         [--------->

In [26]:
nltk.parse.chart.demo(2, print_times=False, trace=0,
...                       sent='I saw John with a dog', numparses=2)

* Sentence:
I saw John with a dog
['I', 'saw', 'John', 'with', 'a', 'dog']

* Strategy: Bottom-up

Nr edges in chart: 53
(S
  (NP I)
  (VP (VP (Verb saw) (NP John)) (PP with (NP (Det a) (Noun dog)))))
(S
  (NP I)
  (VP (Verb saw) (NP (NP John) (PP with (NP (Det a) (Noun dog))))))



In [27]:
nltk.parse.chart.demo(1, print_times=False, trace=0,
...                       sent='I saw John with a dog', numparses=2)

* Sentence:
I saw John with a dog
['I', 'saw', 'John', 'with', 'a', 'dog']

* Strategy: Top-down

Nr edges in chart: 48
(S
  (NP I)
  (VP (Verb saw) (NP (NP John) (PP with (NP (Det a) (Noun dog))))))
(S
  (NP I)
  (VP (VP (Verb saw) (NP John)) (PP with (NP (Det a) (Noun dog)))))



In [27]:
nltk.parse.chart.demo(5, print_times=False, trace=1,
...                       sent='I saw John with a dog', numparses=2)

* Sentence:
I saw John with a dog
['I', 'saw', 'John', 'with', 'a', 'dog']

* Strategy: Stepping (top-down vs bottom-up)

*** SWITCH TO TOP DOWN
|[------]      .      .      .      .      .| [0:1] 'I'
|.      [------]      .      .      .      .| [1:2] 'saw'
|.      .      [------]      .      .      .| [2:3] 'John'
|.      .      .      [------]      .      .| [3:4] 'with'
|.      .      .      .      [------]      .| [4:5] 'a'
|.      .      .      .      .      [------]| [5:6] 'dog'
|>      .      .      .      .      .      .| [0:0] S  -> * NP VP
|>      .      .      .      .      .      .| [0:0] NP -> * NP PP
|>      .      .      .      .      .      .| [0:0] NP -> * Det Noun
|>      .      .      .      .      .      .| [0:0] NP -> * 'I'
|[------]      .      .      .      .      .| [0:1] NP -> 'I' *
|[------>      .      .      .      .      .| [0:1] S  -> NP * VP
|[------>      .      .      .      .      .| [0:1] NP -> NP * PP
|.      >      .      .      .      .      .| [1