In [7]:
! pip install spacy



In [8]:
import spacy

In [5]:
train_data = [
    (
        'Chef added some salt and pepper to the rice.',
        {'entities': [
            (16, 20, 'SPICE'),
            (25, 31, 'SPICE'),
            (39, 43, 'INGREDIENT')
        ]}
    ),
    (
        'The pasta was set to boil with some salt.',
        {'entities': [
            (4, 9, 'INGREDIENT'),
            (36, 40, 'SPICE')
        ]}
    ),
    (
        'Adding egg to the rice dish with some pepper.',
        {'entities': [
            (7, 10, 'INGREDIENT'),
            (18, 22, 'INGREDIENT'),
            (38, 44, 'SPICE')
        ]}
    )
]

In [6]:
train_data

[('Chef added some salt and pepper to the rice.',
  {'entities': [(16, 20, 'SPICE'),
    (25, 31, 'SPICE'),
    (39, 43, 'INGREDIENT')]}),
 ('The pasta was set to boil with some salt.',
  {'entities': [(4, 9, 'INGREDIENT'), (36, 40, 'SPICE')]}),
 ('Adding egg to the rice dish with some pepper.',
  {'entities': [(7, 10, 'INGREDIENT'),
    (18, 22, 'INGREDIENT'),
    (38, 44, 'SPICE')]})]

In [9]:
nlp = spacy.blank("en")
print("Created a blank en model")

nlp.add_pipe('ner', last=True)
ner = nlp.get_pipe("ner")
print("pipe_names", nlp.pipe_names)

for _, annotations in train_data:
    for ent in annotations.get("entities"):
        ner.add_label(ent[2])

# begin training
optimizer = nlp.begin_training()

Created a blank en model
pipe_names ['ner']


In [16]:
import random
from spacy.training.example import Example
n_iter = 100
pipe_exceptions = ["ner", "trf_wordpiece", "trf_tok2vec"]
other_pipes = [
  pipe
  for pipe in nlp.pipe_names
  if pipe not in pipe_exceptions
]
with nlp.disable_pipes(*other_pipes):
    for _ in range(n_iter):
        random.shuffle(train_data)
        losses = {}
        for batch in spacy.util.minibatch(
          train_data, size=2
        ):
            for text, annots in batch:
                doc = nlp.make_doc(text)
                nlp.update(
                  [Example.from_dict(doc, annots)],
                  drop=0.5,
                  sgd=optimizer,
                  losses=losses
                )
        print("Losses", losses)

Losses {'ner': 1.953276395883209}
Losses {'ner': 1.9465234512343834}
Losses {'ner': 4.507014725755795e-14}
Losses {'ner': 2.5767095486947953e-13}
Losses {'ner': 1.9980565122287672e-12}
Losses {'ner': 6.219637996739926e-13}
Losses {'ner': 3.278174346565315e-12}
Losses {'ner': 1.1696053066860184e-14}
Losses {'ner': 1.637714389124956e-09}
Losses {'ner': 8.388284217406897e-05}
Losses {'ner': 1.3035851789437076e-10}
Losses {'ner': 2.183412788873162e-11}
Losses {'ner': 7.435719464869795e-06}
Losses {'ner': 9.92689771668121e-14}
Losses {'ner': 5.705412416518824e-09}
Losses {'ner': 7.566191455197759e-13}
Losses {'ner': 1.4643583358035528}
Losses {'ner': 9.610152188931288e-12}
Losses {'ner': 1.865051679229359e-11}
Losses {'ner': 8.184966268059813e-06}
Losses {'ner': 6.285525181969035e-05}
Losses {'ner': 2.231617884848394e-08}
Losses {'ner': 6.563791989238797e-11}
Losses {'ner': 4.402623283795385e-11}
Losses {'ner': 3.8339630141170114e-07}
Losses {'ner': 1.2371781393696287e-11}
Losses {'ner': 2.

In [17]:
def get_entities(raw_text):
    doc = nlp(raw_text)
    result = []
    for word in doc.ents:
        result.append((word.text,word.label_))
    return result

print(get_entities("Add water to the spaghetti"))
print(get_entities("Add some paprika on top to your pasta."))

[('water', 'INGREDIENT'), ('spaghetti', 'INGREDIENT')]
[('paprika', 'SPICE'), ('pasta', 'SPICE')]
