# Using spaCy for NER

https://spacy.io/models/

In [4]:
!python -m spacy download en_core_web_sm

Collecting en-core-web-sm==3.8.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl (12.8 MB)
     ---------------------------------------- 0.0/12.8 MB ? eta -:--:--
     - -------------------------------------- 0.5/12.8 MB 4.2 MB/s eta 0:00:03
     ---- ----------------------------------- 1.3/12.8 MB 3.9 MB/s eta 0:00:03
     ------ --------------------------------- 2.1/12.8 MB 3.9 MB/s eta 0:00:03
     --------- ------------------------------ 2.9/12.8 MB 4.0 MB/s eta 0:00:03
     ----------- ---------------------------- 3.7/12.8 MB 4.0 MB/s eta 0:00:03
     ------------- -------------------------- 4.5/12.8 MB 3.9 MB/s eta 0:00:03
     ----------------- ---------------------- 5.5/12.8 MB 3.9 MB/s eta 0:00:02
     ------------------ --------------------- 6.0/12.8 MB 4.0 MB/s eta 0:00:02
     ---------------------- ----------------- 7.1/12.8 MB 3.9 MB/s eta 0:00:02
     ------------------------ -----------

In [5]:
import spacy

# Load the pre-trained English NER model
nlp = spacy.load("en_core_web_sm")

# Sample input sentence
sentence = "Mario Rossi works at Fiat Chrysler Automobiles in Turin."

# Process the sentence with the NER model
doc = nlp(sentence)

# Extract and print named entities
for ent in doc.ents:
    print(f"{ent.text}: {ent.label_}")


Mario Rossi: PERSON
Fiat Chrysler Automobiles: ORG
Turin: GPE


# Custom Training 

In [6]:
TRAIN_DATA = [
    ("Mario Rossi works at Fiat Chrysler Automobiles in Turin.", 
     {"entities": [(0, 12, "PERSON"), (22, 45, "ORG"), (49, 54, "GPE")]}),
    # Add more training examples here
]

import spacy
from spacy.training import Example

# Load the pre-trained model
nlp = spacy.load("en_core_web_sm")
ner = nlp.get_pipe("ner")

# Add new entity labels
ner.add_label("PERSON")
ner.add_label("ORG")
ner.add_label("GPE")

# Prepare the training data
examples = []
for text, annotations in TRAIN_DATA:
    doc = nlp.make_doc(text)
    example = Example.from_dict(doc, annotations)
    examples.append(example)

# Train the model
nlp.update(examples)

# Save the model
nlp.to_disk("custom_ner_model")




# load custom model 

In [7]:
import spacy

# Load the custom NER model
nlp_custom = spacy.load("custom_ner_model")

# Sample input sentence
sentence = "Mario Rossi works at Fiat Chrysler Automobiles in Turin."

# Process the sentence with the custom NER model
doc_custom = nlp_custom(sentence)

# Extract and print named entities
for ent in doc_custom.ents:
    print(f"{ent.text}: {ent.label_}")


Mario Rossi: PERSON
Fiat Chrysler Automobiles: ORG
Turin: GPE
