In [1]:
#!python -m spacy download en_core_web_sm

In [2]:
import spacy
from spacy import displacy

In [3]:
nlp = spacy.load('en_core_web_sm')
sent = nlp('Mark Zukerber will meet Rishi on Monday 6th June 2024, 10 am for $3 Trillion deal at Mumbai')
sent.ents

(Mark Zukerber, Rishi, Monday 6th June 2024, 10 am, $3 Trillion, Mumbai)

In [4]:
for ent in sent.ents:
    print(ent.text,'-->', ent.label_)

Mark Zukerber --> PERSON
Rishi --> PERSON
Monday 6th June 2024 --> DATE
10 am --> TIME
$3 Trillion --> MONEY
Mumbai --> GPE


In [5]:
raw_text = 'ISRO was previously known as the Indian National Committee for Space Research (INCOSPAR), set up under Jawaharlal Nehru on the suggestions of Dr. Vikram Sarabhai in 1962 recognising the need for space research.'
raw_text

'ISRO was previously known as the Indian National Committee for Space Research (INCOSPAR), set up under Jawaharlal Nehru on the suggestions of Dr. Vikram Sarabhai in 1962 recognising the need for space research.'

In [6]:
sent1 = nlp(raw_text)

for ent in sent1.ents:
    print(ent.text,'-->', ent.label_)

the Indian National Committee for Space Research (INCOSPAR --> ORG
Jawaharlal --> GPE
Vikram Sarabhai --> PERSON
1962 --> DATE


In [7]:
raw_text2 = """Alaska is the largest U.S. state by area, comprising more total area than the next three largest states of Texas, 
California and Montana combined, and is the seventh-largest subnational division in the world. 
It is the third-least populous and most sparsely populated U.S. state, but is, with a population of 736,081 as of 2020, 
the continent's most populous territory located mostly north of the 60th parallel, 
with more than quadruple the combined populations of Northern Canada and Greenland."""

raw_text2

"Alaska is the largest U.S. state by area, comprising more total area than the next three largest states of Texas, \nCalifornia and Montana combined, and is the seventh-largest subnational division in the world. \nIt is the third-least populous and most sparsely populated U.S. state, but is, with a population of 736,081 as of 2020, \nthe continent's most populous territory located mostly north of the 60th parallel, \nwith more than quadruple the combined populations of Northern Canada and Greenland."

In [8]:
sent2 = nlp(raw_text2)

for ent in sent2.ents:
    print(ent.text,'-->', ent.label_)

Alaska --> GPE
U.S. --> GPE
three --> CARDINAL
Texas --> GPE
California --> GPE
Montana --> GPE
seventh --> ORDINAL
third --> ORDINAL
U.S. --> GPE
736,081 --> CARDINAL
2020 --> DATE
60th --> ORDINAL
Northern Canada --> ORG
Greenland --> GPE


In [9]:
spacy.explain('GPE')

'Countries, cities, states'

In [10]:
spacy.explain('CARDINAL')

'Numerals that do not fall under another type'

In [11]:
spacy.explain('ORDINAL')

'"first", "second", etc.'

In [12]:
spacy.explain('NORP')

'Nationalities or religious or political groups'

## Display the NER in interactive way

In [13]:
displacy.render(sent, style='ent', jupyter=True)

In [14]:
displacy.render(sent1, style='ent', jupyter=True)

In [15]:
displacy.render(sent2, style='ent', jupyter=True)

In [16]:
icc_text ="""
The Chairman heads the board of directors and on June 26, 2014, Narayanaswami Srinivasan, the former president of BCCI, 
was announced as the first chairman of the council.
The role of ICC president became a largely honorary position after the establishment of the chairman role and other changes made to the ICC constitution in 2014. 
It has been claimed that the 2014 changes have handed control to the 'Big Three' nations of England, India and Australia. 
The last ICC president was Zaheer Abbas , who was appointed in June 2015 following the resignation of Mustafa Kamal in April 2015. 
When the post of ICC president was abolished in April 2016, Shashank Manohar, 
who replaced Srinivasan in October 2015, became the first independent elected chairman of the ICC."""
icc_text

"\nThe Chairman heads the board of directors and on June 26, 2014, Narayanaswami Srinivasan, the former president of BCCI, \nwas announced as the first chairman of the council.\nThe role of ICC president became a largely honorary position after the establishment of the chairman role and other changes made to the ICC constitution in 2014. \nIt has been claimed that the 2014 changes have handed control to the 'Big Three' nations of England, India and Australia. \nThe last ICC president was Zaheer Abbas , who was appointed in June 2015 following the resignation of Mustafa Kamal in April 2015. \nWhen the post of ICC president was abolished in April 2016, Shashank Manohar, \nwho replaced Srinivasan in October 2015, became the first independent elected chairman of the ICC."

In [17]:
sent4 = nlp(icc_text)

for ent in sent4.ents:
    print(ent.text,'-->', ent.label_)

June 26, 2014 --> DATE
Narayanaswami Srinivasan --> PERSON
BCCI --> ORG
first --> ORDINAL
ICC --> ORG
ICC --> ORG
2014 --> DATE
2014 --> DATE
England --> GPE
India --> GPE
Australia --> GPE
ICC --> ORG
Zaheer Abbas --> PERSON
June 2015 --> DATE
Mustafa Kamal --> PERSON
April 2015 --> DATE
ICC --> ORG
April 2016 --> DATE
Shashank Manohar --> PERSON
Srinivasan --> ORG
October 2015 --> DATE
first --> ORDINAL
ICC --> ORG


In [18]:
for ent in sent4.ents:
    if ent.label_ == 'PERSON' :
        print(ent.text,'-->', ent.label_)

Narayanaswami Srinivasan --> PERSON
Zaheer Abbas --> PERSON
Mustafa Kamal --> PERSON
Shashank Manohar --> PERSON
