In [4]:
import spacy
from spacy.training import Example
from spacy.util import minibatch, compounding

# Load blank model
nlp = spacy.blank("en")

TRAIN_DATA = [
    ("Who is Shaka Khan?", {"entities": [(7, 17, "PERSON")]}),
    ("I like London and Berlin.", {"entities": [(7, 13, "LOC"), (18, 24, "LOC")]}),
]


# Create NER pipeline
if "ner" not in nlp.pipe_names:
    ner = nlp.add_pipe("ner")
else:
    ner = nlp.get_pipe("ner")

# Add labels
for _, annotations in TRAIN_DATA:
    for ent in annotations.get("entities"):
        ner.add_label(ent[2])

# Initialize the model
optimizer = nlp.begin_training()

# Train the model
for i in range(100):
    losses = {}
    batches = minibatch(TRAIN_DATA, size=compounding(4.0, 32.0, 1.001))
    for batch in batches:
        texts, annotations = zip(*batch)
        examples = [Example.from_dict(nlp.make_doc(text), ann) for text, ann in zip(texts, annotations)]
        nlp.update(examples, drop=0.5, losses=losses)
    print(losses)

{'ner': 9.89999908208847}
{'ner': 9.711391389369965}
{'ner': 9.497089684009552}
{'ner': 9.234238028526306}
{'ner': 8.978684306144714}
{'ner': 8.830875635147095}
{'ner': 8.467540919780731}
{'ner': 8.155297875404358}
{'ner': 7.820043325424194}
{'ner': 7.593694448471069}
{'ner': 7.429515182971954}
{'ner': 6.9613522589206696}
{'ner': 6.380577445030212}
{'ner': 5.934195190668106}
{'ner': 5.914072215557098}
{'ner': 5.5701030641794205}
{'ner': 5.124268859624863}
{'ner': 4.929553955793381}
{'ner': 4.655949234962463}
{'ner': 4.409284025430679}
{'ner': 4.174156978726387}
{'ner': 4.193833708763123}
{'ner': 3.884112097322941}
{'ner': 4.284377112984657}
{'ner': 4.6554360799491405}
{'ner': 3.646094713360071}
{'ner': 3.2352809412404895}
{'ner': 3.4890903467312455}
{'ner': 4.03764959378168}
{'ner': 3.629265365190804}
{'ner': 3.579766741488129}
{'ner': 3.0400449680164456}
{'ner': 3.586963829351589}
{'ner': 2.0271899341605604}
{'ner': 2.7338501289486885}
{'ner': 2.3636536300182343}
{'ner': 2.10837188689

In [2]:
!pip install spacy

Defaulting to user installation because normal site-packages is not writeable
Collecting spacy
  Downloading spacy-3.7.5-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (6.7 MB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.7/6.7 MB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0mm eta [36m0:00:01[0m[36m0:00:01[0m
[?25hCollecting spacy-legacy<3.1.0,>=3.0.11
  Downloading spacy_legacy-3.0.12-py2.py3-none-any.whl (29 kB)
Collecting srsly<3.0.0,>=2.4.3
  Downloading srsly-2.4.8-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (494 kB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m494.3/494.3 kB[0m [31m8.6 MB/s[0m eta [36m0:00:00[0m31m13.5 MB/s[0m eta [36m0:00:01[0m
Collecting pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4
  Downloading pydantic-2.8.0-py3-none-any.whl (423 kB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m423.1/423.1 kB[0m [31m10.2 MB/s[0m eta [36m0:00:00[0mm eta 

  Attempting uninstall: typing-extensions
    Found existing installation: typing_extensions 4.3.0
    Uninstalling typing_extensions-4.3.0:
      Successfully uninstalled typing_extensions-4.3.0
  Attempting uninstall: pygments
    Found existing installation: Pygments 2.11.2
    Uninstalling Pygments-2.11.2:
      Successfully uninstalled Pygments-2.11.2
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
tensorflow 2.9.2 requires protobuf<3.20,>=3.9.2, but you have protobuf 4.22.1 which is incompatible.[0m[31m
[0mSuccessfully installed annotated-types-0.7.0 blis-0.7.11 catalogue-2.0.10 click-8.1.7 cloudpathlib-0.18.1 confection-0.1.5 cymem-2.0.8 langcodes-3.4.0 language-data-1.2.0 marisa-trie-1.2.0 markdown-it-py-3.0.0 mdurl-0.1.2 murmurhash-1.0.10 preshed-3.0.9 pydantic-2.8.0 pydantic-core-2.20.0 pygments-2.18.0 rich-13.7.1 smart-open-7.0.4 spacy-3.7.5 