In [3]:
import nltk

In [6]:
from nltk.tokenize import sent_tokenize, word_tokenize

text = "Fifty up for Jacob Bethell. Short, on off, Jacob Bethell\u00a0pulls it through mid-wicket for one.\u00a0England beat New Zealand by 8 wickets!"
print(sent_tokenize(text))
print(word_tokenize(text))


['Fifty up for Jacob Bethell.', 'Short, on off, Jacob Bethell\xa0pulls it through mid-wicket for one.', 'England beat New Zealand by 8 wickets!']
['Fifty', 'up', 'for', 'Jacob', 'Bethell', '.', 'Short', ',', 'on', 'off', ',', 'Jacob', 'Bethell', 'pulls', 'it', 'through', 'mid-wicket', 'for', 'one', '.', 'England', 'beat', 'New', 'Zealand', 'by', '8', 'wickets', '!']


In [7]:
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize

text = "Fifty up for Jacob Bethell. Short, on off, Jacob Bethell\u00a0pulls it through mid-wicket for one.\u00a0England beat New Zealand by 8 wickets!"
words = word_tokenize(text)
filtered = [w for w in words if w.lower() not in stopwords.words('english')]
print(filtered)


['Fifty', 'Jacob', 'Bethell', '.', 'Short', ',', ',', 'Jacob', 'Bethell', 'pulls', 'mid-wicket', 'one', '.', 'England', 'beat', 'New', 'Zealand', '8', 'wickets', '!']


In [8]:
from nltk.stem import PorterStemmer

stemmer = PorterStemmer()
words = ["running", "runner", "easily", "fairly"]
for w in words:
    print(w, "->", stemmer.stem(w))

running -> run
runner -> runner
easily -> easili
fairly -> fairli


In [9]:
from nltk.stem import WordNetLemmatizer

lemmatizer = WordNetLemmatizer()
print(lemmatizer.lemmatize("running", pos='v'))
print(lemmatizer.lemmatize("better", pos='a'))


run
good


In [16]:
from nltk import pos_tag, ne_chunk
from nltk.tokenize import word_tokenize
import nltk


tokens = word_tokenize(text)
print(tokens)
tags = pos_tag(tokens)
print(tags)
tree = ne_chunk(tags)
print(tree)

['Fifty', 'up', 'for', 'Jacob', 'Bethell', '.', 'Short', ',', 'on', 'off', ',', 'Jacob', 'Bethell', 'pulls', 'it', 'through', 'mid-wicket', 'for', 'one', '.', 'England', 'beat', 'New', 'Zealand', 'by', '8', 'wickets', '!']
[('Fifty', 'NNP'), ('up', 'RP'), ('for', 'IN'), ('Jacob', 'NNP'), ('Bethell', 'NNP'), ('.', '.'), ('Short', 'NNP'), (',', ','), ('on', 'IN'), ('off', 'IN'), (',', ','), ('Jacob', 'NNP'), ('Bethell', 'NNP'), ('pulls', 'VBZ'), ('it', 'PRP'), ('through', 'IN'), ('mid-wicket', 'NN'), ('for', 'IN'), ('one', 'CD'), ('.', '.'), ('England', 'NNP'), ('beat', 'VBD'), ('New', 'NNP'), ('Zealand', 'NNP'), ('by', 'IN'), ('8', 'CD'), ('wickets', 'NNS'), ('!', '.')]
(S
  (GPE Fifty/NNP)
  up/RP
  for/IN
  (PERSON Jacob/NNP Bethell/NNP)
  ./.
  (PERSON Short/NNP)
  ,/,
  on/IN
  off/IN
  ,/,
  (PERSON Jacob/NNP Bethell/NNP)
  pulls/VBZ
  it/PRP
  through/IN
  mid-wicket/NN
  for/IN
  one/CD
  ./.
  (PERSON England/NNP)
  beat/VBD
  (GPE New/NNP Zealand/NNP)
  by/IN
  8/CD
  wickets/N

In [17]:
grammar = "NP: {<DT>?<JJ>*<NN>}"
parser = nltk.RegexpParser(grammar)
sentence = [("a", "DT"), ("big", "JJ"), ("house", "NN")]
print(parser.parse(sentence))


(S (NP a/DT big/JJ house/NN))


In [18]:
from nltk.corpus import wordnet

syns = wordnet.synsets("good")
print("Synonyms:", [s.lemmas()[0].name() for s in syns])
print("Definition:", syns[0].definition())
print("Example:", syns[0].examples())


Synonyms: ['good', 'good', 'good', 'commodity', 'good', 'full', 'good', 'estimable', 'beneficial', 'good', 'good', 'adept', 'good', 'dear', 'dependable', 'good', 'good', 'effective', 'good', 'good', 'good', 'good', 'good', 'good', 'good', 'well', 'thoroughly']
Definition: benefit
Example: ['for your own good', "what's the good of worrying?"]


In [19]:
from nltk.collocations import BigramCollocationFinder
from nltk.metrics import BigramAssocMeasures

words = word_tokenize("This is a sample sentence to test high school collocations.")
finder = BigramCollocationFinder.from_words(words)
print(finder.nbest(BigramAssocMeasures.likelihood_ratio, 5))


[('This', 'is'), ('a', 'sample'), ('collocations', '.'), ('high', 'school'), ('is', 'a')]


In [20]:
import nltk
from nltk import word_tokenize, pos_tag, RegexpParser

text = "The big red apple fell from the tree."

# Tokenize and POS Tag
tokens = word_tokenize(text)
tags = pos_tag(tokens)

# Define a chunk grammar (NP = Noun Phrase)
chunk_grammar = r"NP: {<DT>?<JJ>*<NN>}"
chunk_parser = RegexpParser(chunk_grammar)

# Parse
chunk_tree = chunk_parser.parse(tags)
print(chunk_tree)


(S
  (NP The/DT big/JJ red/JJ apple/NN)
  fell/VBD
  from/IN
  (NP the/DT tree/NN)
  ./.)


In [21]:
comm = "Scores are level now! Short and wide, Jacob Bethell\u00a0slaps it through backward point for a brace. He moves to 49 now!"
print(sent_tokenize(comm))
print(word_tokenize(comm))

['Scores are level now!', 'Short and wide, Jacob Bethell\xa0slaps it through backward point for a brace.', 'He moves to 49 now!']
['Scores', 'are', 'level', 'now', '!', 'Short', 'and', 'wide', ',', 'Jacob', 'Bethell', 'slaps', 'it', 'through', 'backward', 'point', 'for', 'a', 'brace', '.', 'He', 'moves', 'to', '49', 'now', '!']


In [22]:
comm1="SIX! All the way! Shorter one from Nathan Smith, on middle and leg, Jacob Bethell\u00a0swivels and pulls it magnificently over deep square leg as the ball sails into the stands."
print(sent_tokenize(comm1))
print(word_tokenize(comm1))

['SIX!', 'All the way!', 'Shorter one from Nathan Smith, on middle and leg, Jacob Bethell\xa0swivels and pulls it magnificently over deep square leg as the ball sails into the stands.']
['SIX', '!', 'All', 'the', 'way', '!', 'Shorter', 'one', 'from', 'Nathan', 'Smith', ',', 'on', 'middle', 'and', 'leg', ',', 'Jacob', 'Bethell', 'swivels', 'and', 'pulls', 'it', 'magnificently', 'over', 'deep', 'square', 'leg', 'as', 'the', 'ball', 'sails', 'into', 'the', 'stands', '.']
