In [1]:
import spacy
from spacy import displacy

In [2]:
nlp = spacy.load("en_core_web_sm")

In [23]:
def process_text(text, custom_patterns=None):
    """
    Process a text to extract named entities and optionally add custom patterns.

    Args:
        text (str): The text to process.
        custom_patterns (list): List of custom entity patterns (optional).

    Returns:
        None
    """
    if "custom_ruler" in nlp.pipe_names:
        nlp.remove_pipe("custom_ruler")


    if custom_patterns:
        # Add custom entity rules if provided
        ruler = nlp.add_pipe("entity_ruler", before="ner", name="custom_ruler")
        ruler.add_patterns(custom_patterns)

    # Process the text
    doc = nlp(text)

    # Print the entities
    print("\nExtracted Entities:")
    for ent in doc.ents:
        print(f"Entity: {ent.text}, Label: {ent.label_}")

    # Filter and print specific entities
    print("\nFiltered Entities (PERSON, ORG, GPE):")
    for ent in doc.ents:
        if ent.label_ in {"PERSON", "ORG", "GPE"}:
            print(f"{ent.label_}: {ent.text}")

    # Visualize entities (comment out if not running in Jupyter/Colab)
    print("\nEntity Visualization:")
    displacy.render(doc, style="ent", jupyter=True)

In [24]:
text1 = """
Elon Musk, the CEO of SpaceX, announced the launch of the Starship rocket from Cape Canaveral.
This project, funded by NASA, aims to revolutionize space exploration. Musk stated that Tesla's
Gigafactory in Austin, Texas, will contribute to the project.
"""

In [25]:
text2 = """
Apple Inc. revealed its latest iPhone 15 Pro at an event held in Cupertino, California.
CEO Tim Cook emphasized the advanced features of the device, including the A17 Bionic chip.
The device is expected to compete with Samsung's Galaxy series in the global market.
"""

In [26]:
custom_patterns = [
    {"label": "PROJECT", "pattern": "Starship"},
    {"label": "PRODUCT", "pattern": "Gigafactory"},
    {"label": "PRODUCT", "pattern": "iPhone 15 Pro"},
]

In [27]:
print("Processing Text 1:")
process_text(text1, custom_patterns)

Processing Text 1:

Extracted Entities:
Entity: Elon Musk, Label: PERSON
Entity: SpaceX, Label: ORG
Entity: Starship, Label: PROJECT
Entity: Cape Canaveral, Label: GPE
Entity: NASA, Label: ORG
Entity: Musk, Label: PERSON
Entity: Tesla, Label: ORG
Entity: Gigafactory, Label: PRODUCT
Entity: Austin, Label: GPE
Entity: Texas, Label: GPE

Filtered Entities (PERSON, ORG, GPE):
PERSON: Elon Musk
ORG: SpaceX
GPE: Cape Canaveral
ORG: NASA
PERSON: Musk
ORG: Tesla
GPE: Austin
GPE: Texas

Entity Visualization:


In [28]:
print("\nProcessing Text 2:")
process_text(text2, custom_patterns)


Processing Text 2:

Extracted Entities:
Entity: Apple Inc., Label: ORG
Entity: iPhone 15 Pro, Label: PRODUCT
Entity: Cupertino, Label: GPE
Entity: California, Label: GPE
Entity: Tim Cook, Label: PERSON
Entity: A17 Bionic, Label: PRODUCT
Entity: Samsung, Label: ORG
Entity: Galaxy, Label: ORG

Filtered Entities (PERSON, ORG, GPE):
ORG: Apple Inc.
GPE: Cupertino
GPE: California
PERSON: Tim Cook
ORG: Samsung
ORG: Galaxy

Entity Visualization:
