In [None]:
# Step 1: Install spaCy (if not already installed)
!pip install -U spacy



In [None]:
# Step 2: Download spaCy English language model
!python -m spacy download en_core_web_sm

Collecting en-core-web-sm==3.8.0
  Using cached https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl (12.8 MB)
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


In [None]:
# Step 3: Import libraries and load spaCy model
import spacy
from spacy import displacy

# Load the English NLP model
nlp = spacy.load("en_core_web_sm")

In [None]:
# Step 4: Load or define a small dataset (news articles / Wikipedia snippets)
# You can replace these with actual dataset lines if needed
texts = [
    "Apple Inc. is planning to open a new office in Seattle in 2024.",
    "Barack Obama was born on August 4, 1961 in Honolulu, Hawaii.",
    "Google was founded by Larry Page and Sergey Brin while they were Ph.D. students at Stanford University.",
    "The World Health Organization declared COVID-19 a pandemic on March 11, 2020.",
    "Elon Musk's SpaceX successfully launched the Falcon Heavy rocket from Cape Canaveral."
]

In [None]:
# Step 5: Perform Named Entity Recognition on each text
for i, text in enumerate(texts):
    print(f"\n--- Document {i+1} ---")
    doc = nlp(text)
    for ent in doc.ents:
        print(f"{ent.text} ({ent.label_})")


--- Document 1 ---
Apple Inc. (ORG)
Seattle (GPE)
2024 (DATE)

--- Document 2 ---
Barack Obama (PERSON)
August 4, 1961 (DATE)
Honolulu (GPE)
Hawaii (GPE)

--- Document 3 ---
Google (ORG)
Larry Page (PERSON)
Sergey Brin (PERSON)
Ph.D. (WORK_OF_ART)
Stanford University (ORG)

--- Document 4 ---
The World Health Organization (ORG)
March 11, 2020 (DATE)

--- Document 5 ---
Elon Musk's (PERSON)
Falcon Heavy (ORG)
Cape Canaveral (GPE)


In [None]:
# Step 6: Visualize named entities using displaCy
from IPython.core.display import display, HTML

In [None]:
# Visualize one example text
doc = nlp(texts[3])

In [None]:
# Render in Jupyter (Colab) notebook
displacy.render(doc, style="ent", jupyter=True)