In [1]:
#Intalling necessary libraries

!pip install autocorrect
!pip install spacy
!python -m spacy download en_core_web_sm

Collecting en-core-web-sm==3.2.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.2.0/en_core_web_sm-3.2.0-py3-none-any.whl (13.9 MB)
[K     |████████████████████████████████| 13.9 MB 2.2 MB/s eta 0:00:01


[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')


In [2]:
#Importing necessary libraries

import pandas as pd
import numpy as np
import spacy
import pickle
import keras
import tensorflow as tf
from tensorflow.keras import layers
from keras.preprocessing.sequence import pad_sequences
from autocorrect import Speller
import matplotlib.pyplot as plt
%matplotlib inline


In [3]:
nlp = spacy.load('en_core_web_sm', disable=['parser', 'tagger', 'ner'])

In [4]:
MAX_LEN = 400

def prepare_sequences(texts, max_len, vocab={"<UNK>": 1, "<PAD>": 0}):
    X = [[vocab.get(w.text, vocab["<UNK>"]) for w in s] for s in texts]
    return pad_sequences(maxlen=max_len, sequences=X, padding="post", value=vocab["<PAD>"])

def getList(dict):
    return dict.keys()
      

In [5]:
#Loading the vocabulary of tokens

with open('saved_dictionary.pkl', 'rb') as f:
    vocab = pickle.load(f)

In [6]:
#Loading the model

new_model = keras.models.load_model('my_model.h5')

2022-04-24 18:27:48.966678: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [7]:
new_model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, None, 50)          723100    
_________________________________________________________________
spatial_dropout1d (SpatialDr (None, None, 50)          0         
_________________________________________________________________
bidirectional (Bidirectional (None, None, 128)         58880     
_________________________________________________________________
spatial_dropout1d_1 (Spatial (None, None, 128)         0         
_________________________________________________________________
bidirectional_1 (Bidirection (None, None, 128)         98816     
_________________________________________________________________
time_distributed (TimeDistri (None, None, 1)           129       
Total params: 880,925
Trainable params: 880,925
Non-trainable params: 0
__________________________________________________

In [8]:
#Inputs are different, at the end I added tomato and potato

Recipes = {
  "recipe1": "In a large bowl, combine flour, baking powder, baking soda, salt, cinnamon, nutmeg, brown sugar, and  oats. Add apple, nuts, raisins, eggs, milk, and oil.  Mix until dry ingredients are moistened.**Bake for 55 to 60 minutes, or until done.  Cool on wire rack.",
  "recipe2": "In a large bowl, combine flour, baking powder, baking soda, salt, cinnamon, nutmeg, brown sugar, and  oats. Add apple, nuts, raisins, eggs, milk, and oil.  Mix until dry ingredients are moistened.**Bake for 55 to 60 minutes, or until done.  Cool on wire rack. potato tomato"
}

In [9]:
print(Recipes)

{'recipe1': 'In a large bowl, combine flour, baking powder, baking soda, salt, cinnamon, nutmeg, brown sugar, and  oats. Add apple, nuts, raisins, eggs, milk, and oil.  Mix until dry ingredients are moistened.**Bake for 55 to 60 minutes, or until done.  Cool on wire rack.', 'recipe2': 'In a large bowl, combine flour, baking powder, baking soda, salt, cinnamon, nutmeg, brown sugar, and  oats. Add apple, nuts, raisins, eggs, milk, and oil.  Mix until dry ingredients are moistened.**Bake for 55 to 60 minutes, or until done.  Cool on wire rack. potato tomato'}


In [10]:
keys = getList(Recipes)
keys = list(keys)
recipe_list = []
for key in keys:
    recipe = Recipes[key]
    recipe_list.append(recipe)

In [11]:
recipe_list

['In a large bowl, combine flour, baking powder, baking soda, salt, cinnamon, nutmeg, brown sugar, and  oats. Add apple, nuts, raisins, eggs, milk, and oil.  Mix until dry ingredients are moistened.**Bake for 55 to 60 minutes, or until done.  Cool on wire rack.',
 'In a large bowl, combine flour, baking powder, baking soda, salt, cinnamon, nutmeg, brown sugar, and  oats. Add apple, nuts, raisins, eggs, milk, and oil.  Mix until dry ingredients are moistened.**Bake for 55 to 60 minutes, or until done.  Cool on wire rack. potato tomato']

In [12]:
#Since spell checking increases running time too much, I added spell checked version as a commented lines and
#trained without spell checking

#eval_tokenized = [nlp(spell(t)) for t in recipe_list]
eval_tokenized = [nlp(t) for t in recipe_list]

X_seq_deneme = prepare_sequences(eval_tokenized, max_len=MAX_LEN, vocab=vocab)



In [13]:
#Using saved model for prediction

y_pred_deneme = new_model.predict(X_seq_deneme, verbose=1, batch_size=1024)

2022-04-24 18:27:51.520486: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)




In [14]:
Output = {}

for i in range(len(Recipes)):
    pred_i = y_pred_deneme[i] > 0.05
    #print(eval_tokenized[i])
    #print()
    ingreds = [t.text for t, p in zip(eval_tokenized[i], pred_i) if p]
    ingreds = set(ingreds)
    #print(ingreds)
    output_list = []
    for j in ingreds:
        first = Recipes[keys[i]].find(j)
        last = first + len(j)
        output_list.append([j,first,last])
    Output[keys[i]] = output_list
        
    

In [15]:
Output

{'recipe1': [['raisins', 125, 132],
  ['eggs', 134, 138],
  ['salt', 60, 64],
  ['milk', 140, 144],
  ['apple', 112, 117],
  ['oats', 102, 106],
  ['nutmeg', 76, 82],
  ['flour', 25, 30],
  ['cinnamon', 66, 74],
  ['sugar', 90, 95]],
 'recipe2': [['potato', 260, 266],
  ['raisins', 125, 132],
  ['tomato', 267, 273],
  ['eggs', 134, 138],
  ['salt', 60, 64],
  ['milk', 140, 144],
  ['apple', 112, 117],
  ['oats', 102, 106],
  ['nutmeg', 76, 82],
  ['flour', 25, 30],
  ['cinnamon', 66, 74],
  ['sugar', 90, 95]]}

In [16]:
print(Output)

{'recipe1': [['raisins', 125, 132], ['eggs', 134, 138], ['salt', 60, 64], ['milk', 140, 144], ['apple', 112, 117], ['oats', 102, 106], ['nutmeg', 76, 82], ['flour', 25, 30], ['cinnamon', 66, 74], ['sugar', 90, 95]], 'recipe2': [['potato', 260, 266], ['raisins', 125, 132], ['tomato', 267, 273], ['eggs', 134, 138], ['salt', 60, 64], ['milk', 140, 144], ['apple', 112, 117], ['oats', 102, 106], ['nutmeg', 76, 82], ['flour', 25, 30], ['cinnamon', 66, 74], ['sugar', 90, 95]]}
