In [2]:
#IMPORT LIBRARIES

import spacy
from spacy import displacy                     #for visualizing NER
from spacy import tokenizer
from IPython.display import HTML, display
import re                                      #for text cleaning

In [3]:
#LOAD SPACY MODEL (small English model)

nlp = spacy.load('en_core_web_sm')             #contains tokenizer + POS + NER + parser

In [6]:
#RAW TEXT INPUT (Wikipedia-like Apple history)

apple_text = "Apple Inc. was founded on April 1, 1976, by Steve Jobs, Steve Wozniak, and Ronald Wayne in Los Altos, California. The company revolutionized personal computing with the Apple II in 1977 and the Macintosh in 1984. In 2001, Apple launched the iPod, followed by the iPhone in 2007, which transformed the smartphone industry. Tim Cook became CEO in August 2011 after Steve Jobs resigned due to health issues. Under Cook's leadership, Apple became the first publicly traded U.S. company to reach a $1 trillion market cap in 2018 and briefly exceeded $4 trillion in 2025. As of December 2025, Apple is valued at over $3.5 trillion, making it one of the world's most valuable companies. The company is known for products like the iPhone, iPad, Mac, Apple Watch, and services including Apple Music, iCloud, and Apple TV+. In 2025, Apple continues to focus on AI with Apple Intelligence features and faces challenges in the competitive AI space."
print("===== RAW TEXT =====")
print(apple_text)

===== RAW TEXT =====
Apple Inc. was founded on April 1, 1976, by Steve Jobs, Steve Wozniak, and Ronald Wayne in Los Altos, California. The company revolutionized personal computing with the Apple II in 1977 and the Macintosh in 1984. In 2001, Apple launched the iPod, followed by the iPhone in 2007, which transformed the smartphone industry. Tim Cook became CEO in August 2011 after Steve Jobs resigned due to health issues. Under Cook's leadership, Apple became the first publicly traded U.S. company to reach a $1 trillion market cap in 2018 and briefly exceeded $4 trillion in 2025. As of December 2025, Apple is valued at over $3.5 trillion, making it one of the world's most valuable companies. The company is known for products like the iPhone, iPad, Mac, Apple Watch, and services including Apple Music, iCloud, and Apple TV+. In 2025, Apple continues to focus on AI with Apple Intelligence features and faces challenges in the competitive AI space.


In [7]:
#RUN NER ON RAW TEXT

spacy_doc = nlp(apple_text)

In [8]:
for word in spacy_doc.ents:
    print(word.text, word.label_)

Apple Inc. ORG
April 1, 1976 DATE
Steve Jobs PERSON
Steve Wozniak PERSON
Ronald Wayne PERSON
Los Altos GPE
California GPE
the Apple II LAW
1977 DATE
Macintosh ORG
1984 DATE
2001 DATE
Apple ORG
iPod ORG
iPhone ORG
2007 DATE
Tim Cook PERSON
August 2011 DATE
Steve Jobs PERSON
Cook PERSON
Apple ORG
first ORDINAL
U.S. GPE
$1 trillion MONEY
2018 DATE
$4 trillion MONEY
2025 DATE
December 2025 DATE
Apple ORG
over $3.5 trillion MONEY
iPhone ORG
iPad ORG
Mac PERSON
Apple Watch ORG
Apple Music ORG
Apple ORG
2025 DATE
Apple ORG
AI GPE
Apple Intelligence ORG
AI GPE


In [10]:
#VISUALIZE RAW NER USING DISPLACY
print("\n===== VISUALIZATION: RAW TEXT =====")
html = displacy.render(spacy_doc, style="ent",jupyter=False)
display(HTML(html))


===== VISUALIZATION: RAW TEXT =====


In [12]:
#CLEAN TEXT (LOWERCASE + REMOVE PUNCTUATION)
#NOTE: This is ONLY for experiment. NER works best on raw text.

apple_text_clean = re.sub(r'[^\w\s]','',apple_text).lower()

print("\n===== CLEANED TEXT =====")
print(apple_text_clean)


===== CLEANED TEXT =====
apple inc was founded on april 1 1976 by steve jobs steve wozniak and ronald wayne in los altos california the company revolutionized personal computing with the apple ii in 1977 and the macintosh in 1984 in 2001 apple launched the ipod followed by the iphone in 2007 which transformed the smartphone industry tim cook became ceo in august 2011 after steve jobs resigned due to health issues under cooks leadership apple became the first publicly traded us company to reach a 1 trillion market cap in 2018 and briefly exceeded 4 trillion in 2025 as of december 2025 apple is valued at over 35 trillion making it one of the worlds most valuable companies the company is known for products like the iphone ipad mac apple watch and services including apple music icloud and apple tv in 2025 apple continues to focus on ai with apple intelligence features and faces challenges in the competitive ai space


In [14]:
#RUN NER ON CLEANED TEXT

spacy_doc_clean = nlp(apple_text_clean)

print("\n===== NAMED ENTITIES (CLEANED TEXT) =====")
for ent in spacy_doc_clean.ents:
    print(ent.text, "→", ent.label_)


===== NAMED ENTITIES (CLEANED TEXT) =====
apple inc → ORG
april 1 1976 → DATE
steve jobs → PERSON
steve wozniak → PERSON
ronald wayne → PERSON
los altos → GPE
california → GPE
1977 → DATE
macintosh → ORG
1984 → DATE
2001 → DATE
2007 → DATE
tim cook → PERSON
august 2011 → DATE
steve jobs → PERSON
first → ORDINAL
1 trillion → CARDINAL
2018 → DATE
4 trillion → MONEY
2025 → DATE
december 2025 → DATE
apple → ORG
2025 → DATE


In [15]:
#VISUALIZE CLEANED TEXT NER

print("\n===== VISUALIZATION: CLEANED TEXT =====")
html_clean = displacy.render(spacy_doc_clean, style="ent", jupyter=False)
display(HTML(html_clean))


===== VISUALIZATION: CLEANED TEXT =====
