# POSTagging

## with nltk

In [1]:
import nltk

nltk.download('punkt')
nltk.download('universal_tagset')
nltk.download('averaged_perceptron_tagger')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package universal_tagset to /root/nltk_data...
[nltk_data]   Unzipping taggers/universal_tagset.zip.
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.


True

In [2]:
tokens = nltk.word_tokenize("Can you buy me a red chili pepper for grocery?")

print("Part of Speech : ", nltk.pos_tag(tokens))

Part of Speech :  [('Can', 'MD'), ('you', 'PRP'), ('buy', 'VB'), ('me', 'PRP'), ('a', 'DT'), ('red', 'JJ'), ('chili', 'NN'), ('pepper', 'NN'), ('for', 'IN'), ('grocery', 'NN'), ('?', '.')]


In [3]:
print("Part of Speech : ", nltk.pos_tag(tokens, tagset="universal"))

Part of Speech :  [('Can', 'VERB'), ('you', 'PRON'), ('buy', 'VERB'), ('me', 'PRON'), ('a', 'DET'), ('red', 'ADJ'), ('chili', 'NOUN'), ('pepper', 'NOUN'), ('for', 'ADP'), ('grocery', 'NOUN'), ('?', '.')]


## with spaCy

In [4]:
import spacy

nlp = spacy.load("en_core_web_sm")
doc = nlp("I want an early upgrade")

for token in doc:
  print(token.text, token.pos_)

I PRON
want VERB
an DET
early ADJ
upgrade NOUN


# Named Entity Recognition(NER)

## with nltk

In [6]:
import nltk

nltk.download('maxent_ne_chunker')
nltk.download('words')

from nltk.chunk import ne_chunk

[nltk_data] Downloading package maxent_ne_chunker to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping chunkers/maxent_ne_chunker.zip.
[nltk_data] Downloading package words to /root/nltk_data...
[nltk_data]   Unzipping corpora/words.zip.


In [32]:
sentence = "Legendary scientist Albert Einstein is born in Ulm, Germany."
tokens = nltk.word_tokenize(sentence)
tagged_tokens = nltk.pos_tag(tokens)
entities = nltk.chunk.ne_chunk(tagged_tokens)

print(entities)

(S
  Legendary/JJ
  scientist/NN
  (PERSON Albert/NNP Einstein/NNP)
  is/VBZ
  born/VBN
  in/IN
  (GPE Ulm/NNP)
  ,/,
  (GPE Germany/NNP)
  ./.)


In [33]:
entities.productions()

[S -> ('Legendary', 'JJ') ('scientist', 'NN') PERSON ('is', 'VBZ') ('born', 'VBN') ('in', 'IN') GPE (',', ',') GPE ('.', '.'),
 PERSON -> ('Albert', 'NNP') ('Einstein', 'NNP'),
 GPE -> ('Ulm', 'NNP'),
 GPE -> ('Germany', 'NNP')]

## with spaCy

In [26]:
import spacy
from spacy import displacy
from collections import Counter

nlp = spacy.load("en_core_web_sm")

In [29]:
sentence = nlp("Michael Jordan is a proffessor at Berkeley")

print([(X.text, X.label_) for X in sentence.ents])

[('Michael Jordan', 'PERSON'), ('Berkeley', 'GPE')]
