In [1]:
import nltk

In [2]:
from nltk import word_tokenize
from nltk import pos_tag

In [3]:
string = "I was watching movies"

In [4]:
print(pos_tag(word_tokenize(string)))

[('I', 'PRP'), ('was', 'VBD'), ('watching', 'VBG'), ('movies', 'NNS')]


In [5]:
# PRP: Personal pronoun
# VBD: Verb, past tense
# VBG: Veb, gerund
# NNS: Noun plural

In [6]:
#Retrieving all nouns
s = 'My favourite scientist is Carl Sagan'
tagged = pos_tag(word_tokenize(s))

In [7]:
allnoun = [word for word, pos in tagged if pos in ['NN','NNP']]
allnoun

['scientist', 'Carl', 'Sagan']

## Stanford tagger

### The Brown Corpus was the first million-word electronic corpus of English, created in 1961 at Brown University. This corpus contains text from 500 sources, and the sources have been categorized by genre, such as news, editorial, and so on

In [9]:
from nltk.corpus import brown

In [10]:
brown.categories()

['adventure',
 'belles_lettres',
 'editorial',
 'fiction',
 'government',
 'hobbies',
 'humor',
 'learned',
 'lore',
 'mystery',
 'news',
 'religion',
 'reviews',
 'romance',
 'science_fiction']

In [11]:
tags = [tag for (word, tag) in brown.tagged_words(categories='news')]

In [12]:
import operator
freq = nltk.FreqDist(tags)
tags_freq = sorted(freq.items(), key=operator.itemgetter(1))
tags_freq[-10:]

[('VBD', 2524),
 ('CC', 2664),
 ('JJ', 4392),
 ('.', 4452),
 ('NNS', 5066),
 (',', 5133),
 ('NP', 6866),
 ('AT', 8893),
 ('IN', 10616),
 ('NN', 13162)]

## Named Entity Recognition (NER)

In [23]:
from nltk import ne_chunk

In [24]:
# the ne_chunk method recognizes people(names), places(location),
#and organizations.

In [25]:
text = "Stephen Hawking teach maths at the Oxford University in England"

In [26]:
print(ne_chunk(nltk.pos_tag(word_tokenize(text)), binary=False))

(S
  (PERSON Stephen/NNP)
  Hawking/NNP
  teach/VB
  maths/NNS
  at/IN
  the/DT
  (ORGANIZATION Oxford/NNP University/NNP)
  in/IN
  (GPE England/NNP))


In [27]:
# if bynary parameter is True it provides the output for the entire
# sentence tree and tags everything.
print(ne_chunk(nltk.pos_tag(word_tokenize(text)), binary=True))

(S
  (NE Stephen/NNP)
  Hawking/NNP
  teach/VB
  maths/NNS
  at/IN
  the/DT
  (NE Oxford/NNP University/NNP)
  in/IN
  (NE England/NNP))


## References: 

https://www.ling.upenn.edu/courses/Fall_2003/ling001/penn_treebank_pos.html

http://www.nltk.org/book/ch02.html

https://nlp.stanford.edu/software/

http://www.nltk.org/api/nltk.tag.html#module-nltk.tag.stanford

https://en.wikipedia.org/wiki/Brown_Corpus

https://nlp.stanford.edu/software/CRF-NER.html