# Named Entity Recognition (NER)

In [47]:
#import paskages 
import spacy

#displacy
from spacy import displacy

#Span
from spacy.tokens import Span

In [2]:
# loads the pre-trained "en_core_web_sm" language model.
nlp=spacy.load("en_core_web_sm")

In [3]:
#pipline name
nlp.pipe_names

['tok2vec', 'tagger', 'parser', 'attribute_ruler', 'lemmatizer', 'ner']

In [12]:
# Input text
doc = nlp("Tesla Inc is going to acquire twitter for $45 billion")

for ent in doc.ents:
    print(ent , " | " , ent.label_ , " | ",spacy.explain(ent.label_) )

Tesla Inc  |  ORG  |  Companies, agencies, institutions, etc.
$45 billion  |  MONEY  |  Monetary values, including unit


###### Displacy

 is used for visualizing spaCy's processed documents, particularly for visualizing named entities and sentence structure.

In [18]:
displacy.render(doc , style='ent')

In [23]:
#capialized 'Twitter' and add 'Inc'; Because he did not recognize Twitter as a company
doc = nlp("Tesla Inc is going to acquire Twitter Inc for $45 billion")

for ent in doc.ents:
    print(ent , " | " , ent.label_ , " | ",spacy.explain(ent.label_) )
    
displacy.render(doc , style='ent')

Tesla Inc  |  ORG  |  Companies, agencies, institutions, etc.
Twitter Inc  |  ORG  |  Companies, agencies, institutions, etc.
$45 billion  |  MONEY  |  Monetary values, including unit


In [27]:
# print the named entity recognition labels
nlp.pipe_labels['ner']

['CARDINAL',
 'DATE',
 'EVENT',
 'FAC',
 'GPE',
 'LANGUAGE',
 'LAW',
 'LOC',
 'MONEY',
 'NORP',
 'ORDINAL',
 'ORG',
 'PERCENT',
 'PERSON',
 'PRODUCT',
 'QUANTITY',
 'TIME',
 'WORK_OF_ART']

In [36]:
doc = nlp("Michael Bloomberg founded Bloomberg  Inc in 1982")

for ent in doc.ents:
    print(ent.text ," | " ,ent.label_ ," | ", spacy.explain(ent.label_))

Michael Bloomberg  |  PERSON  |  People, including fictional
Bloomberg  Inc  |  ORG  |  Companies, agencies, institutions, etc.
1982  |  DATE  |  Absolute or relative dates or periods


In [39]:
doc = nlp("Tesla is going to acquire Twitter for $45 billion")
for ent in doc.ents:
    print(ent.text, " | ", ent.label_)

Tesla  |  ORG
Twitter  |  PRODUCT
$45 billion  |  MONEY


In [43]:
type(doc[0])

spacy.tokens.token.Token

In [44]:
type(doc[0])

spacy.tokens.token.Token

In [45]:
doc[2:5]

going to acquire

In [46]:
type(doc[2:5])

spacy.tokens.span.Span

###### Span

 used to represent a contiguous sequence of tokens in a document

In [64]:
doc = nlp("Tesla is going to acquire twitter for $45 billion")

for ent in doc.ents:
    print(ent , " | " , ent.label_ , " | ",spacy.explain(ent.label_) )

Tesla  |  ORG  |  Companies, agencies, institutions, etc.
$45 billion  |  MONEY  |  Monetary values, including unit


In [69]:
s1=Span(doc ,0,1 , label="ORG")
s2=Span(doc  ,5,6 , label='ORG')

doc.set_ents([s1,s2] , default='unmodified')

In [71]:
for ent in doc.ents:
    print(ent , " | " , ent.label_ )

Tesla  |  ORG
twitter  |  ORG
$45 billion  |  MONEY
