In [2]:
# source: https://github.com/explosion/spacy/blob/master/examples/training/train_new_entity_type.py
import warnings
warnings.filterwarnings("ignore")

import random
from pathlib import Path
import spacy


# new entity label
LABEL = 'ANIMAL'
n_iter = 20
new_model_name = 'animal'

In [3]:
# training data
# Note: If you're using an existing model, make sure to mix in examples of
# other entity types that spaCy correctly recognized before. Otherwise, your
# model might learn the new type, but "forget" what it previously knew.
TRAIN_DATA = [
    ("Horses are too tall and they pretend to care about your feelings", {
        'entities': [(0, 6, 'ANIMAL')]
    }),

    ("Do they bite?", {
        'entities': []
    }),

    ("horses are too tall and they pretend to care about your feelings", {
        'entities': [(0, 6, 'ANIMAL')]
    }),

    ("horses pretend to care about your feelings", {
        'entities': [(0, 6, 'ANIMAL')]
    }),

    ("they pretend to care about your feelings, those horses", {
        'entities': [(48, 54, 'ANIMAL')]
    }),

    ("horses?", {
        'entities': [(0, 6, 'ANIMAL')]
    })
]

In [4]:
# create blank Language class
nlp = spacy.blank('en')

# nlp.create_pipe works for built-ins that are registered with spaCy
# create pipeline
ner = nlp.create_pipe('ner')
nlp.add_pipe(ner)

# add new entity label to entity recognizer
ner.add_label(LABEL)   
# prepare for training - preprocessing, creating optimizer
optimizer = nlp.begin_training()




In [5]:
for itn in range(n_iter):
    random.shuffle(TRAIN_DATA)
    losses = {}
    for text, annotations in TRAIN_DATA:
        # update models 
        nlp.update([text], [annotations], sgd=optimizer, drop=0.35,
                           losses=losses)
    print(losses)


{'ner': 22.53822418953619}
{'ner': 4.208199911431277}
{'ner': 4.449946057009506}
{'ner': 0.0036542861775723415}
{'ner': 1.8907879911256948}
{'ner': 1.1650205795862946e-05}
{'ner': 0.618101441567477}
{'ner': 0.0004006197051601455}
{'ner': 7.777732594995193e-08}
{'ner': 0.0003964712086599656}
{'ner': 2.124084202219681e-08}
{'ner': 1.5069610619002138e-15}
{'ner': 1.840358264956125}
{'ner': 0.0008784232842263698}
{'ner': 6.015378403685215e-06}
{'ner': 0.008246748739679788}
{'ner': 9.697466031529106e-17}
{'ner': 6.40385654895844e-19}
{'ner': 6.762641629381138e-19}
{'ner': 3.863779321510275e-11}


In [6]:
test_text = 'Do you like horses?'
doc = nlp(test_text)
print("Entities in '%s'" % test_text)
for ent in doc.ents:
    print(ent.label_, ent.text)


Entities in 'Do you like horses?'
ANIMAL horses
