In [None]:
import spacy
from spacy import displacy
import pandas as pd


In [None]:
# Load the English language model (small version)
nlp = spacy.load("en_core_web_sm")
print("spaCy Model Loaded Successfully!")


In [None]:
# Sample text
text = """Elon Musk, the CEO of Tesla and SpaceX, announced that a new rocket launch will take place in Cape Canaveral, Florida on 25th December 2024. 
He also met with Tim Cook, the CEO of Apple, to discuss AI advancements."""

# Process text using spaCy
doc = nlp(text)

# Extract and display entities
print("Named Entities, Entity Types, and Positions:")
for ent in doc.ents:
    print(f"{ent.text} - {ent.label_} - Start: {ent.start_char}, End: {ent.end_char}")

# Visualize entities using displacy
displacy.render(doc, style="ent", jupyter=True)


In [None]:
# Entity labels and their descriptions
print("Entity Labels and Descriptions:")
for label, description in spacy.explain.items():
    if label.startswith("ent_"):
        print(f"{label}: {description}")


In [None]:
# Add a custom entity to recognize specific terms
from spacy.pipeline import EntityRuler

# Create an EntityRuler and add custom patterns
ruler = nlp.add_pipe("entity_ruler", before="ner")

patterns = [
    {"label": "PRODUCT", "pattern": "Tesla Model Y"},
    {"label": "TECHNOLOGY", "pattern": "ChatGPT"},
    {"label": "EVENT", "pattern": "World AI Conference"}
]
ruler.add_patterns(patterns)

# Test the custom NER
custom_text = """The Tesla Model Y is now the best-selling vehicle in the US. OpenAI's ChatGPT will be showcased 
at the World AI Conference in January 2024."""

custom_doc = nlp(custom_text)

# Display custom entities
print("Custom Entities:")
for ent in custom_doc.ents:
    print(f"{ent.text} - {ent.label_}")


In [None]:
import random
from spacy.training import Example

# Prepare training data
TRAIN_DATA = [
    ("Google is a tech giant based in California.", {"entities": [(0, 6, "ORG"), (38, 48, "GPE")]}),
    ("Apple's new iPhone was released in Cupertino.", {"entities": [(0, 5, "ORG"), (39, 48, "GPE")]}),
    ("OpenAI created GPT-4 for advanced AI tasks.", {"entities": [(0, 6, "ORG"), (13, 18, "PRODUCT")]}),
]

# Disable other pipelines to focus on NER
nlp.disable_pipes(*[pipe for pipe in nlp.pipe_names if pipe != "ner"])

# Get the NER component
ner = nlp.get_pipe("ner")

# Add new entity labels
ner.add_label("GPE")
ner.add_label("PRODUCT")

# Training the model
optimizer = nlp.resume_training()
for epoch in range(10):
    random.shuffle(TRAIN_DATA)
    losses = {}
    for text, annotations in TRAIN_DATA:
        example = Example.from_dict(nlp.make_doc(text), annotations)
        nlp.update([example], drop=0.5, losses=losses)
    print(f"Epoch {epoch + 1}, Losses: {losses}")

# Test the retrained model
test_text = "Apple released a new MacBook in California."
test_doc = nlp(test_text)
print("\nTest NER Results After Training:")
for ent in test_doc.ents:
    print(f"{ent.text} - {ent.label_}")


In [None]:
# Save the trained model
output_dir = "./custom_ner_model"
nlp.to_disk(output_dir)
print(f"Model saved to {output_dir}")

# Load the trained model
loaded_nlp = spacy.load(output_dir)
print("Custom Model Loaded Successfully!")
