In [24]:
from parser_1 import read_recipe_from_url
import re
import sys
import os
import spacy

nlp = spacy.load("en_core_web_lg")

folder_path = os.path.join(os.getcwd(), 'ourtypes')
sys.path.append(folder_path)

from step import Step, Steps
from ingredient import Ingredient

In [25]:
soup = read_recipe_from_url("https://www.allrecipes.com/recipe/255365/edible-cookie-dough/")

In [26]:
ingredients = soup.find("div", {"id":"mntl-structured-ingredients_1-0"})
for child in ingredients.find_all("li"):
    text = child.text
    text = text.replace("\n", "").strip()
    print(text)
    

1 cup all-purpose flour
¾ cup packed brown sugar
½ cup butter
1 teaspoon vanilla extract
½ teaspoon salt
2 tablespoons milk
½ cup milk chocolate chips
½ cup mini chocolate chips


In [41]:
ingredient_objects = {}
for child in ingredients.find_all("li"):
    text = child.text.replace("\n", "").split()[-1]
    ingredient_objects[text] = text

steps = soup.find("div", {"id":"recipe__steps-content_1-0"})
steps_collection = Steps()

for child in steps.find_all("p"):
    step_text = child.text.replace("\n", "").strip()
    
    #find the time mentioned for in the step
    time_match = re.search(r'for (.*? (minutes?|seconds?|hours?)\b)', step_text)
    if time_match:
        time = time_match.group(1)
    else:
         time = None
    
    #find the temperature for that step
    temp_match = re.search(r'(\d+ degrees)', step_text)
    if temp_match:
        temp = temp_match.group(1) 
    else:
        temp = None
    
    doc = nlp(step_text)
    tools = []
    methods = []

    for token in doc:
        #lemmatization (!!) for verbs
        if token.pos_ == 'VERB':
            methods.append(token.lemma_)

    #extract compound nouns for full tool names
    for chunk in doc.noun_chunks:
        root = chunk.root
        if root.dep_ in ['dobj', 'pobj']:  #direct object or object of preposition
            #check if the current chunk is a compound noun
            if any(child.dep_ == 'compound' for child in root.children):
                tools.append(chunk.text)
            else:
                tools.append(root.text)
    
    #identify ingredients present in this step
    step_ingredients = [ingredient_objects[ingr] for ingr in ingredient_objects if ingr in step_text]

    #remove tools are ingredients:
    tools = [tool for tool in tools if all(ingr not in tool for ingr in ingredient_objects)]
    #create Step object
    step_obj = Step(step_ingredients, tools=tools, methods=methods, description=step_text, time=time, temp=temp)

    #add Step object to Steps collection
    steps_collection.add_step(step_obj)

print(steps_collection)

Step 1: Step(Ingredients: ['flour'], Tools: ['dish', 'minute', 'it'], Methods: ['treat', 'use', 'cook', 'stir', 'set'], Time: 1 minute, Temp: None, Description: 'To heat-treat your flour so it is safe to use: Place flour in a microwave-safe dish and cook for 1 minute and 15 seconds, stirring it every 15 seconds. Set aside.')
Step 2: Step(Ingredients: ['flour', 'sugar', 'butter', 'extract', 'salt', 'milk', 'chips'], Tools: ['mixer', 'bowl', 'a crumbly dough forms', 'dough'], Methods: ['Beat', 'Beat', 'add', 'treat', 'mix', 'stir', 'combine', 'fold'], Time: None, Temp: None, Description: 'Beat sugar and butter with an electric mixer in a large bowl until creamy. Beat in vanilla extract and salt. Add heat-treated flour; mix until a crumbly dough forms. Stir in milk until dough is just combined; fold in milk chocolate chips and mini chocolate chips.')
Step 3: Step(Ingredients: ['flour'], Tools: ['illness', 'consumption', 'Step', 'microwave', 'a baking sheet', 'oven', '350 degrees F', 'minu