In [1]:
! pip install spacy==3.2.3



In [3]:
train_data = [
    (
        'Chef added some salt and pepper to the rice.',
        {'entities': [
            (16, 20, 'SPICE'),
            (25, 31, 'SPICE'),
            (39, 43, 'INGREDIENT')
        ]}
    ),
    (
        'The pasta was set to boil with some salt.',
        {'entities': [
            (4, 9, 'INGREDIENT'),
            (36, 40, 'SPICE')
        ]}
    ),
    (
        'Adding egg to the rice dish with some pepper.',
        {'entities': [
            (7, 10, 'INGREDIENT'),
            (18, 22, 'INGREDIENT'),
            (38, 44, 'SPICE')
        ]}
    )
]

In [4]:
import spacy

nlp = spacy.blank("en")
print("Created a blank en model")

nlp.add_pipe('ner', last=True)
ner = nlp.get_pipe("ner")
print("pipe_names", nlp.pipe_names)

for _, annotations in train_data:
    for ent in annotations.get("entities"):
        ner.add_label(ent[2])

# begin training
optimizer = nlp.begin_training()

Created a blank en model
pipe_names ['ner']


In [5]:
import random
from spacy.training.example import Example
n_iter = 100
pipe_exceptions = ["ner", "trf_wordpiece", "trf_tok2vec"]
other_pipes = [
  pipe
  for pipe in nlp.pipe_names
  if pipe not in pipe_exceptions
]
with nlp.disable_pipes(*other_pipes):
    for _ in range(n_iter):
        random.shuffle(train_data)
        losses = {}
        for batch in spacy.util.minibatch(
          train_data, size=2
        ):
            for text, annots in batch:
                doc = nlp.make_doc(text)
                nlp.update(
                  [Example.from_dict(doc, annots)],
                  drop=0.5,
                  sgd=optimizer,
                  losses=losses
                )
        print("Losses", losses)

Losses {'ner': 26.406894624233246}
Losses {'ner': 24.422075152397156}
Losses {'ner': 21.87881565093994}
Losses {'ner': 19.64219379425049}
Losses {'ner': 15.305394291877747}
Losses {'ner': 12.999607905745506}
Losses {'ner': 12.782691605389118}
Losses {'ner': 12.8466052392032}
Losses {'ner': 11.307000368833542}
Losses {'ner': 10.45554256887408}
Losses {'ner': 10.894643188439659}
Losses {'ner': 9.768918068963103}
Losses {'ner': 7.752329136012122}
Losses {'ner': 7.633916775346734}
Losses {'ner': 6.42156014192733}
Losses {'ner': 6.938113203519606}
Losses {'ner': 6.401758093438275}
Losses {'ner': 6.4496848738296535}
Losses {'ner': 4.9973037921934065}
Losses {'ner': 4.269507087167838}
Losses {'ner': 4.057356054887464}
Losses {'ner': 4.828880522265705}
Losses {'ner': 2.970670067203211}
Losses {'ner': 3.4044468573899644}
Losses {'ner': 3.3010748354853776}
Losses {'ner': 1.106695153670516}
Losses {'ner': 1.959482599984381}
Losses {'ner': 1.0102093725370893}
Losses {'ner': 2.183977517460707}
Loss

In [6]:
def get_entities(raw_text):
    doc = nlp(raw_text)
    result = []
    for word in doc.ents:
        result.append((word.text,word.label_))
    return result

print(get_entities("Add water to the spaghetti"))
# >> [('water', 'INGREDIENT'), ('spaghetti', 'INGREDIENT')]

print(get_entities("Add some paprika on top to your pasta."))
# >> [('paprika', 'SPICE'), ('pasta', 'INGREDIENT')]

[('water', 'INGREDIENT'), ('spaghetti', 'INGREDIENT')]
[('paprika', 'SPICE'), ('pasta', 'INGREDIENT')]
