In [1]:
import nltk

In [2]:
#Stemming

from nltk.stem import PorterStemmer

ps = PorterStemmer()
words = ["running", "ran", "doing", "fairly"]
# Apply stemming
stemmed_words = [ps.stem(word) for word in words]
print("Stemmed Words:", stemmed_words)

Stemmed Words: ['run', 'ran', 'do', 'fairli']


In [3]:
#Tokenization

from nltk.tokenize import word_tokenize
from nltk.tokenize import sent_tokenize
from nltk.tokenize import WordPunctTokenizer

tokens = word_tokenize('This is the first NLP class')
print("Word tokens: ",tokens)
text = "A hungry fox once looked everywhere for food. He couldn’t find anything until he stumbled upon a farmer’s wall."
s_tokens = sent_tokenize(text)
print("Sentence tokens: ",s_tokens)
tokenizer = WordPunctTokenizer()
p_tokens = tokenizer.tokenize("I can't allow you to go home early")
print("Punctuation Tokenizer: ",p_tokens)

Word tokens:  ['This', 'is', 'the', 'first', 'NLP', 'class']
Sentence tokens:  ['A hungry fox once looked everywhere for food.', 'He couldn’t find anything until he stumbled upon a farmer’s wall.']
Punctuation Tokenizer:  ['I', 'can', "'", 't', 'allow', 'you', 'to', 'go', 'home', 'early']


In [4]:
#Stop words

from nltk.corpus import stopwords

words = "There is a pen on the table"
tokens = word_tokenize(words)
stop_words = set(stopwords.words('english'))
filtered = [word for word in tokens if word not in stop_words]
print(filtered)

['There', 'pen', 'table']


Wordnet

In [5]:
#Wordnet

from nltk.corpus import wordnet as wn
syn = wn.synsets('fish')[0]
print("Name: ",syn.name())
print("Definition: ",syn.definition())
print("Examples: ",syn.examples())

Name:  fish.n.01
Definition:  any of various mostly cold-blooded aquatic vertebrates usually having scales and breathing through gills
Examples:  ['the shark is a large fish', 'in the living room there was a tank of colorful fish']


In [6]:
# Named Entity Recognition (NER)

from nltk import ne_chunk, pos_tag

text = "Barack Obama was the 44th President of the United States."
# Tokenize words and perform POS tagging
tokens = word_tokenize(text)
pos_tags = pos_tag(tokens)
# Perform Named Entity Recognition
entities = ne_chunk(pos_tags)
print(entities)

(S
  (PERSON Barack/NNP)
  (PERSON Obama/NNP)
  was/VBD
  the/DT
  44th/JJ
  President/NNP
  of/IN
  the/DT
  (GPE United/NNP States/NNPS)
  ./.)


In [22]:
#Parse Tree

from nltk import CFG, ChartParser

grammar = CFG.fromstring("""
    S -> NP VP
    NP -> AT NNS
    VP -> VBD NP
    PP -> P NP
    AT -> 'the'
    NNS -> 'children' | 'cake' 
    VBD -> 'ate'
    P -> 'on'
""")

parser = ChartParser(grammar)
sentence = "the children ate the cake".split()
for tree in parser.parse(sentence):
    tree.pretty_print()
    tree.draw()

                  S              
      ____________|___            
     |                VP         
     |             ___|___        
     NP           |       NP     
  ___|_____       |    ___|___    
 AT       NNS    VBD  AT     NNS 
 |         |      |   |       |   
the     children ate the     cake



In [17]:
grammar = CFG.fromstring("""
    S -> NP VP
    NP -> AT NNS
    VP -> V NP
    PP -> P NP
    AT -> 'the'
    NP -> 'sushi' | 'chopsticks' |'Mark'
    V -> 'eat'
    P -> 'on' | 'with'
""")

parser = ChartParser(grammar)
sentence = "Mark eat sushi with chopsticks".split()
for tree in parser.parse(sentence):
    tree.pretty_print()

In [21]:
from nltk import pos_tag, word_tokenize, RegexpParser
  
# Example text
sample_text = "eat sushi with chopsticks"
  
# Find all parts of speech in above sentence
tagged = pos_tag(word_tokenize(sample_text))
  
#Extract all parts of speech from any text
chunker = RegexpParser("""
                       NP: {<DT>?<JJ>*<NN>}    #To extract Noun Phrases
                       P: {<IN>}               #To extract Prepositions
                       V: {<V.*>}              #To extract Verbs
                       PP: {<p> <NP>}          #To extract Prepositional Phrases
                       VP: {<VP> <NP|PP>*}      #To extract Verb Phrases
                       """)
 
# Print all parts of speech in above sentence
output = chunker.parse(tagged)
print("After Extracting\n", output)

After Extracting
 (S (NP eat/NN) (NP sushi/NN) (P with/IN) chopsticks/NNS)


In [23]:
import nltk
from nltk import CFG

grammar = CFG.fromstring("""
S -> NP VP
NP -> Det N | Det N PP
VP -> V NP | V NP PP
PP -> P NP
Det -> 'the' | 'a'
N -> 'cat' | 'dog' | 'mat' | 'telescope'
V -> 'saw' | 'ate'
P -> 'on' | 'with'
""")

parser = nltk.ChartParser(grammar)

sentence = "the cat saw the dog with the telescope".split()

parses = list(parser.parse(sentence))

if len(parses) > 1:
    print("Ambiguity detected. Multiple parses found.")
    for tree in parses:
        tree.pretty_print()
else:
    print("No ambiguity detected.")


Ambiguity detected. Multiple parses found.
                 S                                
      ___________|_______                          
     |                   VP                       
     |        ___________|________                 
     |       |       |            PP              
     |       |       |        ____|___             
     NP      |       NP      |        NP          
  ___|___    |    ___|___    |     ___|______      
Det      N   V  Det      N   P   Det         N    
 |       |   |   |       |   |    |          |     
the     cat saw the     dog with the     telescope

                 S                            
      ___________|___                          
     |               VP                       
     |        _______|___                      
     |       |           NP                   
     |       |    _______|____                 
     |       |   |   |        PP              
     |       |   |   |    ____|___             
     NP   