# Named Entity Recognition (NER) in NLTK Library

https://machinelearningknowledge.ai/beginners-guide-to-named-entity-recognition-ner-in-nltk-library-python/

In [1]:
# Importing libraries
import nltk
from nltk import word_tokenize,pos_tag
import spacy

### Example 1

The text example has been tokenized with POS tagging applied. We will use the NLTK function ne_chunk() that is pretrained to recognize named entity using POS tag as input.

In [2]:
text = "NASA awarded Elon Musk’s SpaceX a $2.9 billion contract to build the lunar lander."
tokens = word_tokenize(text)
tag=pos_tag(tokens)
print(tag)

ne_tree = nltk.ne_chunk(tag)
print(ne_tree)

[('NASA', 'NNP'), ('awarded', 'VBD'), ('Elon', 'NNP'), ('Musk', 'NNP'), ('’', 'NNP'), ('s', 'VBD'), ('SpaceX', 'NNP'), ('a', 'DT'), ('$', '$'), ('2.9', 'CD'), ('billion', 'CD'), ('contract', 'NN'), ('to', 'TO'), ('build', 'VB'), ('the', 'DT'), ('lunar', 'NN'), ('lander', 'NN'), ('.', '.')]
(S
  (ORGANIZATION NASA/NNP)
  awarded/VBD
  (PERSON Elon/NNP Musk/NNP)
  ’/NNP
  s/VBD
  (ORGANIZATION SpaceX/NNP)
  a/DT
  $/$
  2.9/CD
  billion/CD
  contract/NN
  to/TO
  build/VB
  the/DT
  lunar/NN
  lander/NN
  ./.)


### Example 2

Another example using tagged sentences provided by the NLTK library.

In [3]:
sent = nltk.corpus.treebank.tagged_sents()
print(sent[0])
print(nltk.ne_chunk(sent[0]))

[('Pierre', 'NNP'), ('Vinken', 'NNP'), (',', ','), ('61', 'CD'), ('years', 'NNS'), ('old', 'JJ'), (',', ','), ('will', 'MD'), ('join', 'VB'), ('the', 'DT'), ('board', 'NN'), ('as', 'IN'), ('a', 'DT'), ('nonexecutive', 'JJ'), ('director', 'NN'), ('Nov.', 'NNP'), ('29', 'CD'), ('.', '.')]
(S
  (PERSON Pierre/NNP)
  (ORGANIZATION Vinken/NNP)
  ,/,
  61/CD
  years/NNS
  old/JJ
  ,/,
  will/MD
  join/VB
  the/DT
  board/NN
  as/IN
  a/DT
  nonexecutive/JJ
  director/NN
  Nov./NNP
  29/CD
  ./.)


### Example 3

NER using Spacy.

In [13]:
nlp = spacy.load("en_core_web_sm")
doc = nlp("NASA awarded Elon Musk’s SpaceX a $2.9 billion contract to build the lunar lander.")
print('Sentence:',doc,'\n')
for token in doc:
    print(token.text, token.ent_iob_, token.ent_type_)

Sentence: NASA awarded Elon Musk’s SpaceX a $2.9 billion contract to build the lunar lander. 

NASA B ORG
awarded O 
Elon B PERSON
Musk I PERSON
’s I PERSON
SpaceX O 
a O 
$ B MONEY
2.9 I MONEY
billion I MONEY
contract O 
to O 
build O 
the O 
lunar O 
lander O 
. O 


## Exercise 2

Repeating the previous examples with a new text sample:

### Example 1

In [17]:
text = "Bengio and Elon Musk, along with more than 1,000 other experts and industry executives, "\
        "had already cited potential risks to society in April."
tokens = word_tokenize(text)
tag=pos_tag(tokens)
print(tag)

ne_tree = nltk.ne_chunk(tag)
print(ne_tree)

[('Bengio', 'NNP'), ('and', 'CC'), ('Elon', 'NNP'), ('Musk', 'NNP'), (',', ','), ('along', 'IN'), ('with', 'IN'), ('more', 'JJR'), ('than', 'IN'), ('1,000', 'CD'), ('other', 'JJ'), ('experts', 'NNS'), ('and', 'CC'), ('industry', 'NN'), ('executives', 'NNS'), (',', ','), ('had', 'VBD'), ('already', 'RB'), ('cited', 'VBN'), ('potential', 'JJ'), ('risks', 'NNS'), ('to', 'TO'), ('society', 'VB'), ('in', 'IN'), ('April', 'NNP'), ('.', '.')]
(S
  (GPE Bengio/NNP)
  and/CC
  (PERSON Elon/NNP Musk/NNP)
  ,/,
  along/IN
  with/IN
  more/JJR
  than/IN
  1,000/CD
  other/JJ
  experts/NNS
  and/CC
  industry/NN
  executives/NNS
  ,/,
  had/VBD
  already/RB
  cited/VBN
  potential/JJ
  risks/NNS
  to/TO
  society/VB
  in/IN
  April/NNP
  ./.)


### Example 2

In [15]:
sent = nltk.corpus.treebank.tagged_sents()
print(sent[1])
print(nltk.ne_chunk(sent[1]))

[('Mr.', 'NNP'), ('Vinken', 'NNP'), ('is', 'VBZ'), ('chairman', 'NN'), ('of', 'IN'), ('Elsevier', 'NNP'), ('N.V.', 'NNP'), (',', ','), ('the', 'DT'), ('Dutch', 'NNP'), ('publishing', 'VBG'), ('group', 'NN'), ('.', '.')]
(S
  (PERSON Mr./NNP)
  (PERSON Vinken/NNP)
  is/VBZ
  chairman/NN
  of/IN
  (ORGANIZATION Elsevier/NNP)
  N.V./NNP
  ,/,
  the/DT
  (GPE Dutch/NNP)
  publishing/VBG
  group/NN
  ./.)


### Example 3

In [18]:
nlp = spacy.load("en_core_web_sm")
doc = nlp("Bengio and Elon Musk, along with more than 1,000 other experts and industry executives, "\
          "had already cited potential risks to society in April.")
print('Sentence:',doc,'\n')
for token in doc:
    print(token.text, token.ent_iob_, token.ent_type_)

Sentence: Bengio and Elon Musk, along with more than 1,000 other experts and industry executives, had already cited potential risks to society in April. 

Bengio B PERSON
and O 
Elon B PERSON
Musk I PERSON
, O 
along O 
with O 
more B CARDINAL
than I CARDINAL
1,000 I CARDINAL
other O 
experts O 
and O 
industry O 
executives O 
, O 
had O 
already O 
cited O 
potential O 
risks O 
to O 
society O 
in O 
April B DATE
. O 
