In [1]:
%pip install -U spacy

Note: you may need to restart the kernel to use updated packages.


# Load train/test data

In [1]:
import json
from sklearn.model_selection import train_test_split

def load_data(path):
    with open(path, 'r') as f:
        data = json.load(f)
    return data

json = load_data("../data/food-diary-examples-classified.json")
data = [x for x in json['annotations'] if x is not None]

data_train, data_test = train_test_split(data, test_size=0.2)

# Create model and train it

In [2]:
import spacy
from spacy.training.example import Example

nlp = spacy.blank('en')
ner = nlp.add_pipe('ner')

# This would be to open an existing model
#!python -m spacy download en_core_web_md
#nlp = spacy.load('en_core_web_md')

ner.add_label("FOOD")
ner.add_label("QUANTITY")
ner.add_label("UNIT")
ner.add_label("CALORIES")
ner.add_label("LIQUID")

#optimizer = nlp.create_optimizer() # Just use this if you create a blank new model. But here we use a pretrained model.
optimizer = nlp.begin_training()

losses = {}
for iter in range(10):
    print(f"Train Iterration {iter + 1}")
    for batch in spacy.util.minibatch(data_train, size=200):
        for text, annotations in batch:
            #print("Text:", text)
            #print("Annotations:", annotations)
            # create Example
            example = Example.from_dict(nlp.make_doc(text), annotations)

            # Update the model
            nlp.update([example], drop=0.3, sgd=optimizer, losses=losses)

ner, losses

  from .autonotebook import tqdm as notebook_tqdm
  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(


Train Iterration 1


" with entities "[[0, 1, 'QUANTITY'], [2, 3, 'QUANTITY'], [4, 8, 'F...". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
" with entities "[[0, 12, 'FOOD'], [13, 16, 'QUANTITY'], [16, 18, '...". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
" with entities "[[0, 3, 'QUANTITY'], [3, 5, 'UNIT'], [6, 10, 'LIQU...". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
" with entities "[[0, 1, 'QUANTITY'], [3, 8, 'FOOD']]". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
" with entities "[[0, 1, 'QUANTITY'], [2, 7, 'FOOD'], [13, 20, 'FOO...". Use `spacy.training.offsets_

Train Iterration 2
Train Iterration 3
Train Iterration 4
Train Iterration 5
Train Iterration 6
Train Iterration 7
Train Iterration 8
Train Iterration 9
Train Iterration 10


(<spacy.pipeline.ner.EntityRecognizer at 0x1739743c0>,
 {'ner': 3380.722982466981})

# Test with test data

In [3]:
for text, _ in data_test:
    print(f"Text:", text)
    doc = nlp(text)
    print('Entities', [(ent.text, ent.label_) for ent in doc.ents])

Text: Strawberries
Entities [('Strawberries', 'FOOD')]
Text: 2 rolls with cheese and jam
Entities [('2', 'QUANTITY'), ('rolls', 'FOOD'), ('cheese', 'FOOD'), ('jam', 'FOOD')]
Text: two slices of spelled toast, one with jam, one with honey and then two cups of coffee
Entities [('two', 'QUANTITY'), ('slices', 'UNIT'), ('spelled toast', 'FOOD'), ('one', 'QUANTITY'), ('jam', 'FOOD'), ('one', 'QUANTITY'), ('honey', 'FOOD'), ('two', 'QUANTITY'), ('cups', 'UNIT'), ('coffee', 'LIQUID')]
Text: freshly baked bread roll, 1 egg and cream cheese with jam
Entities [('freshly', 'FOOD'), ('bread roll', 'FOOD'), ('1', 'QUANTITY'), ('egg', 'FOOD'), ('cream cheese', 'FOOD'), ('jam', 'FOOD')]
Text: 2x crispbread Finn Crisp, Magerine 10g, Gutfried like meat sausage 40g
Entities [('2x crispbread Finn Crisp', 'FOOD'), ('Magerine', 'FOOD'), ('10', 'QUANTITY'), ('g', 'UNIT'), ('Gutfried like meat sausage', 'FOOD'), ('40', 'QUANTITY'), ('g', 'UNIT')]
Text: 100g chocolate 0.3 ml hot chocolate
Entities [('100', 'Q

In [10]:
from spacy import displacy

doc = nlp('A vegan salad from continente and a black tea')
displacy.serve(doc, style="ent", port=8083)




Using the 'ent' visualizer
Serving on http://0.0.0.0:8083 ...

Shutting down server on port 8083.


In [11]:
from spacy import displacy

doc = nlp('A vegan salad from continente and drinking a Gatorade')
displacy.serve(doc, style="ent", port=8083)


Using the 'ent' visualizer
Serving on http://0.0.0.0:8083 ...

Shutting down server on port 8083.


In [46]:
doc = nlp('Wonder chunks chicken style 185g, 1tbsp oil, All Seasons pan-fried vegetables Asian style approx 250g')
print('Entities', [(ent.text, ent.label_) for ent in doc.ents])

Entities [('Wonder chunks chicken', 'FOOD'), ('185', 'QUANTITY'), ('g', 'UNIT'), ('1tbsp', 'QUANTITY'), ('oil', 'FOOD'), ('All', 'UNIT'), ('pan-fried vegetables Asian style', 'FOOD'), ('250', 'QUANTITY'), ('g', 'UNIT')]
