In [1]:
# Install SpaCy (if not already installed)
!pip install spacy

# Download the small English model
!python -m spacy download en_core_web_sm


Collecting en-core-web-sm==3.8.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl (12.8 MB)
     ---------------------------------------- 0.0/12.8 MB ? eta -:--:--
     -- ------------------------------------- 0.8/12.8 MB 4.9 MB/s eta 0:00:03
     ---- ----------------------------------- 1.3/12.8 MB 4.3 MB/s eta 0:00:03
     ---- ----------------------------------- 1.6/12.8 MB 2.7 MB/s eta 0:00:05
     ----- ---------------------------------- 1.8/12.8 MB 2.1 MB/s eta 0:00:06
     ----- ---------------------------------- 1.8/12.8 MB 2.1 MB/s eta 0:00:06
     ------ --------------------------------- 2.1/12.8 MB 1.7 MB/s eta 0:00:07
     ------- -------------------------------- 2.4/12.8 MB 1.6 MB/s eta 0:00:07
     -------- ------------------------------- 2.6/12.8 MB 1.5 MB/s eta 0:00:07
     --------- ------------------------------ 2.9/12.8 MB 1.5 MB/s eta 0:00:07
     --------- --------------------------

In [2]:
import spacy

# Load the English NLP model
nlp = spacy.load("en_core_web_sm")


In [3]:
# IPL 2025 match details
text = """
The Delhi government will launch a sterilisation and vaccination drive campaign under the Animal Birth Control (ABC) Rules 2023 after the Supreme Court orders on stray dogs. The Supreme Court ordered that the stray dogs must be released after sterilisation and immunisation, except those infected with rabies or exhibiting aggressive behaviour.
"""


In [4]:
# Process the text using the SpaCy NLP pipeline
doc = nlp(text)
doc


The Delhi government will launch a sterilisation and vaccination drive campaign under the Animal Birth Control (ABC) Rules 2023 after the Supreme Court orders on stray dogs. The Supreme Court ordered that the stray dogs must be released after sterilisation and immunisation, except those infected with rabies or exhibiting aggressive behaviour.

In [5]:
print("Named Entities in the IPL 2025 Text:\n")
for ent in doc.ents:
    print(f"{ent.text} -> {ent.label_}")


Named Entities in the IPL 2025 Text:

Delhi -> GPE
the Animal Birth Control -> ORG
ABC -> ORG
2023 -> DATE
the Supreme Court -> ORG
The Supreme Court -> ORG


In [6]:
from spacy import displacy

# Visualizing the named entities
displacy.render(doc, style="ent", jupyter=True)


In [9]:
# Extracting only team names (ORG) and players (PERSON)
teams = [ent.text for ent in doc.ents if ent.label_ == "ORG"]
players = [ent.text for ent in doc.ents if ent.label_ == "GPE"]

print("Teams:", teams)
print("Players:", players)


Teams: ['the Animal Birth Control', 'ABC', 'the Supreme Court', 'The Supreme Court']
Players: ['Delhi']


In [7]:
# Extracting match details (quantities and dates)
scores = [ent.text for ent in doc.ents if ent.label_ == "QUANTITY"]
date = [ent.text for ent in doc.ents if ent.label_ == "DATE"]

print("Scores and Quantities:", scores)
print("Match Date:", date)


Scores and Quantities: []
Match Date: ['2023']


In [10]:
# Adding custom entity
from spacy.tokens import Span

# Adding custom entity for the sponsor
sponsor = "the Supreme Court"
span = Span(doc, len(doc)-1, len(doc), label="SPONSOR")
doc.ents = list(doc.ents) + [span]

# Displaying entities including the new one
for ent in doc.ents:
    print(f"{ent.text} -> {ent.label_}")


Delhi -> GPE
the Animal Birth Control -> ORG
ABC -> ORG
2023 -> DATE
the Supreme Court -> ORG
The Supreme Court -> ORG

 -> SPONSOR


Key Takeaways:
NER Pipeline: Pre-trained models identify entities based on context.

Visualization: Use SpaCyâ€™s displacy to visualize named entities.

Filtering: Extract specific types of entities (e.g., teams, players).

Customization: Add new entities to the NER pipeline.

In [6]:
import spacy  

# Load the small English model
nlp = spacy.load("en_core_web_sm")

# Finance-related text
text = "Microsoft's revenue surged by 15% in Q4 2024, reaching $3.5 billion."

# Processing the text
doc = nlp(text)

# Extracting and displaying named entities
for ent in doc.ents:
    print(f"{ent.text} -> {ent.label_}")


Microsoft -> ORG
15% -> PERCENT
Q4 2024 -> DATE
$3.5 billion -> MONEY


In [7]:
# Healthcare-related text
text = "Dr. Emily diagnosed Mark with asthma and prescribed Albuterol."

# Processing the text
doc = nlp(text)

# Displaying named entities
for ent in doc.ents:
    print(f"{ent.text} -> {ent.label_}")


Emily -> PERSON
Mark -> PERSON
Albuterol -> GPE


In [12]:
# Legal-related text
text = "The Supreme Court ruled in favor of case 2023-CV-456, citing the Privacy Act of 1980."

# Processing the text
doc = nlp(text)

# Displaying named entities
for ent in doc.ents:
    print(f"{ent.text} -> {ent.label_}")


The Supreme Court -> ORG
2023-CV-456 -> DATE
the Privacy Act -> LAW
1980 -> DATE


In [8]:
# E-commerce-related text
text = "The Apple iPhone 15 Pro is now available on Amazon for $1199."

# Processing the text
doc = nlp(text)

# Displaying named entities
for ent in doc.ents:
    print(f"{ent.text} -> {ent.label_}")


15 -> CARDINAL
Amazon -> ORG
1199 -> MONEY


In [14]:
# Customer feedback text
text = "I am frustrated with the camera quality of my Samsung Galaxy S24."

# Processing the text
doc = nlp(text)

# Displaying named entities
for ent in doc.ents:
    print(f"{ent.text} -> {ent.label_}")


Samsung Galaxy S24 -> ORG


In [15]:
# Real estate-related text
text = "John bought a 3-bedroom apartment in San Francisco for $850,000 on March 15, 2024."

# Processing the text
doc = nlp(text)

# Displaying named entities
for ent in doc.ents:
    print(f"{ent.text} -> {ent.label_}")


John -> PERSON
3 -> CARDINAL
San Francisco -> GPE
850,000 -> MONEY
March 15, 2024 -> DATE


In [16]:
# Install SpaCy (if not already installed)
!pip install spacy

# Install the visualizer (if needed)
!pip install spacy-streamlit


Collecting spacy-streamlit
  Downloading spacy_streamlit-1.0.6-py3-none-any.whl.metadata (22 kB)
Downloading spacy_streamlit-1.0.6-py3-none-any.whl (12 kB)
Installing collected packages: spacy-streamlit
Successfully installed spacy-streamlit-1.0.6


In [17]:
import spacy
from spacy import displacy
from spacy.tokens import Span
from spacy.pipeline import EntityRuler

# Load the small English model
nlp = spacy.load("en_core_web_sm")


In [18]:
text = """
In the IPL 2025 final, Mumbai Indians defeated Royal Challengers Bangalore by 7 wickets. 
Hardik Pandya scored 92 runs off 47 balls, while Virat Kohli's 105 off 60 was in vain. 
The match took place at Narendra Modi Stadium, Ahmedabad, on May 29, 2025.
The title sponsor for this season was Jio, with Tata Motors as the primary partner.
"""


In [19]:
# Process the text
doc = nlp(text)


In [20]:
print("\nNER Entities with Labels:\n")
for ent in doc.ents:
    print(f"{ent.text} -> {ent.label_}")



NER Entities with Labels:

2025 -> DATE
Mumbai -> GPE
Indians -> NORP
Royal Challengers Bangalore -> ORG
7 -> CARDINAL
Hardik Pandya -> PERSON
92 -> CARDINAL
47 -> CARDINAL
Virat Kohli's -> ORG
105 -> CARDINAL
60 -> CARDINAL
Narendra Modi Stadium, -> FAC
Ahmedabad -> GPE
May 29, 2025 -> DATE
this season -> DATE
Jio -> PERSON
Tata Motors -> ORG


In [21]:
displacy.render(doc, style="ent", jupyter=True)


In [22]:
# Adding custom rules
ruler = nlp.add_pipe("entity_ruler", before="ner")

patterns = [
    {"label": "EVENT", "pattern": "IPL Trophy"},
    {"label": "SPONSOR", "pattern": "Jio"},
    {"label": "SPONSOR", "pattern": "Tata Motors"}
]

# Adding patterns to the ruler
ruler.add_patterns(patterns)

# Reprocessing the text
doc = nlp(text)

# Display the new entities
print("\nCustom Entities with Labels:\n")
for ent in doc.ents:
    print(f"{ent.text} -> {ent.label_}")



Custom Entities with Labels:

2025 -> DATE
Mumbai -> GPE
Indians -> NORP
Royal Challengers Bangalore -> ORG
7 -> CARDINAL
Hardik Pandya -> PERSON
92 -> CARDINAL
47 -> CARDINAL
Virat Kohli's -> ORG
105 -> CARDINAL
60 -> CARDINAL
Narendra Modi Stadium, -> FAC
Ahmedabad -> GPE
May 29, 2025 -> DATE
this season -> DATE
Jio -> SPONSOR
Tata Motors -> SPONSOR


In [23]:
# Manually adding stadium entity
span = Span(doc, 15, 18, label="STADIUM")
doc.ents = list(doc.ents) + [span]

# Display new entity
print("\nWith Added Stadium Entity:\n")
for ent in doc.ents:
    print(f"{ent.text} -> {ent.label_}")



With Added Stadium Entity:

2025 -> DATE
Mumbai -> GPE
Indians -> NORP
Royal Challengers Bangalore -> ORG
7 -> CARDINAL
wickets. 
 -> STADIUM
Hardik Pandya -> PERSON
92 -> CARDINAL
47 -> CARDINAL
Virat Kohli's -> ORG
105 -> CARDINAL
60 -> CARDINAL
Narendra Modi Stadium, -> FAC
Ahmedabad -> GPE
May 29, 2025 -> DATE
this season -> DATE
Jio -> SPONSOR
Tata Motors -> SPONSOR


In [24]:
# Filtering by specific entity types
players = [ent.text for ent in doc.ents if ent.label_ == "PERSON"]
teams = [ent.text for ent in doc.ents if ent.label_ == "ORG"]

print("\nPlayers:", players)
print("Teams:", teams)



Players: ['Hardik Pandya']
Teams: ['Royal Challengers Bangalore', "Virat Kohli's"]


In [25]:
scores = [ent.text for ent in doc.ents if ent.label_ == "QUANTITY"]
date = [ent.text for ent in doc.ents if ent.label_ == "DATE"]
stadium = [ent.text for ent in doc.ents if ent.label_ == "STADIUM"]

print("\nScores:", scores)
print("Match Date:", date)
print("Stadium:", stadium)



Scores: []
Match Date: ['2025', 'May 29, 2025', 'this season']
Stadium: ['wickets. \n']


In [26]:
custom_labels = ["STADIUM", "SPONSOR"]
colors = {"STADIUM": "orange", "SPONSOR": "blue"}

# Custom display
options = {"ents": custom_labels, "colors": colors}
displacy.render(doc, style="ent", options=options, jupyter=True)
