# Named Entity Recognition (NER):
NER is an NLP technique that identifies and classifies named entities (e.g., people, places, dates) in text.
Its advantages include information extraction, data organization, improved search, sentiment analysis, and better language understanding.

In [74]:
import spacy

#create obj for spacy
nlp=spacy.load('en_core_web_sm')


In [75]:
nlp.pipe_names

['tok2vec', 'tagger', 'parser', 'attribute_ruler', 'lemmatizer', 'ner']

In [76]:
doc=nlp(" Bangladesh is small country, its population growth increased by 1% ")

In [77]:
for ent in doc.ents:

  print(ent.text, " | ", ent.label_ , " | ", spacy.explain(ent.label_))

Bangladesh  |  GPE  |  Countries, cities, states
1%  |  PERCENT  |  Percentage, including "%"


In [78]:
doc2=nlp("Apple Inc. is an American multinational technology company headquartered in Cupertino, California. Apple is the world's largest technology company by revenue, with US$394.3 billion in 2022 revenue. As of March 2023, Apple is the world's biggest company by market capitalization")

In [79]:
for ent in doc2.ents:

  print(ent.text, " | ", ent.label_ , " | ", spacy.explain(ent.label_))

Apple Inc.  |  ORG  |  Companies, agencies, institutions, etc.
American  |  NORP  |  Nationalities or religious or political groups
Cupertino  |  GPE  |  Countries, cities, states
California  |  GPE  |  Countries, cities, states
Apple  |  ORG  |  Companies, agencies, institutions, etc.
US$394.3 billion  |  MONEY  |  Monetary values, including unit
2022  |  DATE  |  Absolute or relative dates or periods
March 2023  |  DATE  |  Absolute or relative dates or periods
Apple  |  ORG  |  Companies, agencies, institutions, etc.


In [80]:
from spacy import displacy

In [81]:
displacy.render(doc2, style='ent', jupyter=True)

In [82]:
nlp.pipe_labels['ner']

['CARDINAL',
 'DATE',
 'EVENT',
 'FAC',
 'GPE',
 'LANGUAGE',
 'LAW',
 'LOC',
 'MONEY',
 'NORP',
 'ORDINAL',
 'ORG',
 'PERCENT',
 'PERSON',
 'PRODUCT',
 'QUANTITY',
 'TIME',
 'WORK_OF_ART']

In [83]:
# another example
doc3=nlp("Shakib Al Hasan is a Bangladeshi cricketer and current captain of the Bangladesh national cricket team in all formats.")

In [84]:
for ent in doc3.ents:

  print(ent.text, " | ", ent.label_ , " | ", spacy.explain(ent.label_))

Shakib Al Hasan  |  PERSON  |  People, including fictional
Bangladeshi  |  NORP  |  Nationalities or religious or political groups
Bangladesh  |  ORG  |  Companies, agencies, institutions, etc.


In [85]:
# here we can see some entity are not recognized
#The NER system's limitations are evident in cases like 'Bangladesh national cricket' being misclassified, not recognized as an organization,


from spacy.tokens import Span

In [88]:

s1=Span(doc3, 12 ,16 , "ORG")
doc3.set_ents([s1], default="unmodified")

In [89]:
for ent in doc3.ents:

  print(ent.text, " | ", ent.label_ , " | ", spacy.explain(ent.label_))

Shakib Al Hasan  |  PERSON  |  People, including fictional
Bangladeshi  |  NORP  |  Nationalities or religious or political groups
Bangladesh national cricket team  |  ORG  |  Companies, agencies, institutions, etc.
