# Food Name Entity Recognition

In [1]:
#Import needed libraries 
import json
import spacy
from spacy import displacy
import random
from spacy.util import minibatch, compounding
from spacy.training import Example 

### Load Model

In [2]:
nlp=spacy.load("en_core_web_sm") #load small pretained model
ner=nlp.get_pipe('ner') #Getting the 'ner' componets because we need to train only 'ner' part of model. 

In [3]:
nlp.pipe_names

['tok2vec', 'tagger', 'parser', 'attribute_ruler', 'lemmatizer', 'ner']

### Data Preprocessing

In [4]:
with open('./dataset/custom_food_ner_data.json') as f: #Load json format dataset
    TRAIN_DATA = json.load(f)

In [5]:
TRAIN_DATA['annotations'] # Train data

[['Revani is a dessert that emerged with the cuisine of the Ottoman Empires.\r',
  {'entities': [[0, 6, 'FOOD']]}],
 ['Hummus is probably one of the best-known Middle Eastern foods.\r',
  {'entities': [[0, 6, 'FOOD']]}],
 ['Falafel is a dish originating from the Levant.\r',
  {'entities': [[0, 7, 'FOOD']]}],
 ['Baba ghannouj is a popular appetizer made from eggplant and tahini.\r',
  {'entities': [[0, 13, 'FOOD']]}],
 ['Knafeh is a buttery, crunchy dessert.\r',
  {'entities': [[0, 6, 'FOOD'], [21, 36, 'FOOD']]}],
 ['Red Wine is for gentleman.\r', {'entities': [[0, 8, 'FOOD']]}],
 ['Girls love strawberries.\r', {'entities': [[11, 23, 'FOOD']]}],
 ['Pumpkin pie is a common dish for thanks giving dinner.\r',
  {'entities': [[0, 11, 'FOOD']]}],
 ["Don't wanna celebrate Christmas without turkey.\r",
  {'entities': [[40, 46, 'FOOD']]}],
 ['Ice Ice pops are must after dinner.\r', {'entities': [[0, 12, 'FOOD']]}],
 ['Lacchi is an Asian radiational food.\r', {'entities': [[0, 6, 'FOOD']]}],
 ['

In [6]:
LABEL = TRAIN_DATA['classes'][0] #Custom Label "FOOD", We are doing add for food names recognition as a 'FOOD'.
LABEL

'FOOD'

In [7]:
nlp.pipe_labels['ner'] #Before adding label in the 'ner' pipes

['CARDINAL',
 'DATE',
 'EVENT',
 'FAC',
 'GPE',
 'LANGUAGE',
 'LAW',
 'LOC',
 'MONEY',
 'NORP',
 'ORDINAL',
 'ORG',
 'PERCENT',
 'PERSON',
 'PRODUCT',
 'QUANTITY',
 'TIME',
 'WORK_OF_ART']

### Add Custom Label in the Pretained Model

In [8]:
ner.add_label(LABEL) #Add the 'FOOD' label in 'ner' pipe.
optimizer = nlp.resume_training() #Resume training 
move_names = list(ner.move_names)
move_names

['B-ORG',
 'B-DATE',
 'B-PERSON',
 'B-GPE',
 'B-MONEY',
 'B-CARDINAL',
 'B-NORP',
 'B-PERCENT',
 'B-WORK_OF_ART',
 'B-LOC',
 'B-TIME',
 'B-QUANTITY',
 'B-FAC',
 'B-EVENT',
 'B-ORDINAL',
 'B-PRODUCT',
 'B-LAW',
 'B-LANGUAGE',
 'I-ORG',
 'I-DATE',
 'I-PERSON',
 'I-GPE',
 'I-MONEY',
 'I-CARDINAL',
 'I-NORP',
 'I-PERCENT',
 'I-WORK_OF_ART',
 'I-LOC',
 'I-TIME',
 'I-QUANTITY',
 'I-FAC',
 'I-EVENT',
 'I-ORDINAL',
 'I-PRODUCT',
 'I-LAW',
 'I-LANGUAGE',
 'L-ORG',
 'L-DATE',
 'L-PERSON',
 'L-GPE',
 'L-MONEY',
 'L-CARDINAL',
 'L-NORP',
 'L-PERCENT',
 'L-WORK_OF_ART',
 'L-LOC',
 'L-TIME',
 'L-QUANTITY',
 'L-FAC',
 'L-EVENT',
 'L-ORDINAL',
 'L-PRODUCT',
 'L-LAW',
 'L-LANGUAGE',
 'U-ORG',
 'U-DATE',
 'U-PERSON',
 'U-GPE',
 'U-MONEY',
 'U-CARDINAL',
 'U-NORP',
 'U-PERCENT',
 'U-WORK_OF_ART',
 'U-LOC',
 'U-TIME',
 'U-QUANTITY',
 'U-FAC',
 'U-EVENT',
 'U-ORDINAL',
 'U-PRODUCT',
 'U-LAW',
 'U-LANGUAGE',
 'O',
 'B-FOOD',
 'I-FOOD',
 'L-FOOD',
 'U-FOOD']

In [9]:
nlp.pipe_labels['ner'] #After adding label in the 'ner' pipes

['CARDINAL',
 'DATE',
 'EVENT',
 'FAC',
 'FOOD',
 'GPE',
 'LANGUAGE',
 'LAW',
 'LOC',
 'MONEY',
 'NORP',
 'ORDINAL',
 'ORG',
 'PERCENT',
 'PERSON',
 'PRODUCT',
 'QUANTITY',
 'TIME',
 'WORK_OF_ART']

In [10]:
pipe_exceptions = ["ner", "trf_wordpiecer", "trf_tok2vec"] #This Pipes we'll train.
other_pipes = [] #Pipes don't need to affceted when training. We'll disabled the pipes of list.

for pipe in nlp.pipe_names:
    if pipe not in pipe_exceptions:
        other_pipes.append(pipe)
other_pipes

['tok2vec', 'tagger', 'parser', 'attribute_ruler', 'lemmatizer']

### Training

In [11]:
with nlp.disable_pipes(*other_pipes) : #Start training by disabling other pipes ['tok2vec', 'tagger', 'parser', 'attribute_ruler', 'lemmatizer'].
    
    sizes = compounding(1.0, 4.0, 1.001)
    epochs = 50 # Epochs

    for epoch in range(epochs):
        random.shuffle(TRAIN_DATA['annotations']) #Suffle the Train datasets.
        batches = minibatch(TRAIN_DATA['annotations'], size=sizes) # Batch up the examples using 'Spacy' minibatch.
        losses = {} # Calcultaion loss dictionary.
        for batch in batches:
            texts, annotations = zip(*batch)
            example = []
            for i in range(len(texts)):
                doc = nlp.make_doc(texts[i])
                example.append(Example.from_dict(doc, annotations[i]))
            nlp.update(example, sgd=optimizer, drop=0.35, losses=losses) # Update operation do minimize the loss and maximize the accuracy. 

        if(epoch % 5==0): #Print loss value each 5 epoch later.  
            print("Losses", losses) 

" with entities "[[21, 29, 'FOOD']]". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training..
" with entities "[[32, 41, 'FOOD']]". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.ve Ice cream.
" with entities "[[28, 38, 'FOOD']]". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.our cream.


Losses {'ner': 249.37368508890447}
Losses {'ner': 40.578790219643366}
Losses {'ner': 16.7881610295452}
Losses {'ner': 9.611851627226097}
Losses {'ner': 0.7715899981363347}
Losses {'ner': 4.736784222505499}
Losses {'ner': 5.091906007577788}
Losses {'ner': 3.609174792004884}
Losses {'ner': 3.2558278376261023}
Losses {'ner': 0.0030797369177470776}


In [12]:
#Function for test the data.           
def food_ner(text):
    doc = nlp(text)
    for ent in doc.ents:
        print(ent.text, "|", ent.label_)

### Test Model

In [13]:
#Text for test
test_text = """I made a chicken curry and noodles for lunch. Butter Chicken curry was spicy as hell."""
food_ner(test_text)

chicken curry | FOOD
noodles | FOOD
Butter Chicken curry | FOOD
