In [1]:
!pip install -qq spacy



In [2]:
import spacy
from spacy.training.example import Example
import random

In [5]:
TRAIN_DATA = [
    ("Apple is looking at buying U.K. startup for $1 billion", {"entities": [(0, 5, "ORG"), (27, 30, "GPE"), (44, 54, "MONEY")]}),
    ("San Francisco considers banning sidewalk delivery robots", {"entities": [(0, 13, "GPE")]}),
    ("London is a big city in the United Kingdom.", {"entities": [(0, 6, "GPE"), (31, 48, "GPE")]})
]


In [6]:
nlp = spacy.blank("en")  # create blank Language class
ner = nlp.add_pipe("ner")  # add NER to the pipeline


In [7]:
for _, annotations in TRAIN_DATA:
    for ent in annotations.get("entities"):
        ner.add_label(ent[2])

In [8]:
optimizer = nlp.begin_training()

# Loop for the number of training iterations
for itn in range(30):
    random.shuffle(TRAIN_DATA)
    losses = {}
    # Batch up the examples using spaCy's minibatch
    for batch in spacy.util.minibatch(TRAIN_DATA, size=2):
        for text, annotations in batch:
            example = Example.from_dict(nlp.make_doc(text), annotations)
            nlp.update([example], drop=0.5, losses=losses)
    print(f"Losses at iteration {itn}: {losses}")




Losses at iteration 0: {'ner': 22.2813860476017}
Losses at iteration 1: {'ner': 20.26408190280199}
Losses at iteration 2: {'ner': 18.04890089482069}
Losses at iteration 3: {'ner': 15.047378847375512}
Losses at iteration 4: {'ner': 10.883829958736897}
Losses at iteration 5: {'ner': 8.52976240310818}
Losses at iteration 6: {'ner': 7.148952103918418}
Losses at iteration 7: {'ner': 6.273150556691689}
Losses at iteration 8: {'ner': 6.238875547969656}
Losses at iteration 9: {'ner': 6.912819771964678}
Losses at iteration 10: {'ner': 6.385145119866522}
Losses at iteration 11: {'ner': 6.017273740705264}
Losses at iteration 12: {'ner': 4.669669389770959}
Losses at iteration 13: {'ner': 5.0690452271198865}
Losses at iteration 14: {'ner': 5.1290121271530325}
Losses at iteration 15: {'ner': 5.614411006268583}
Losses at iteration 16: {'ner': 4.752944039349586}
Losses at iteration 17: {'ner': 13.858567287407169}
Losses at iteration 18: {'ner': 4.540569777230758}
Losses at iteration 19: {'ner': 5.4565

In [9]:
nlp.to_disk("./custom_ner_model")

In [10]:
nlp = spacy.load("custom_ner_model")

# Test the model
doc = nlp("Apple is planning to buy a startup in San Francisco for $2 million")
for ent in doc.ents:
    print(ent.text, ent.label_)

Apple GPE
San Francisco GPE
$2 million MONEY
