In [1]:
from bs4 import BeautifulSoup
import requests
import re
import fractions

In [2]:
"""

web scraper

"""

def get_numbers(lst):
    new_list = [int(x.split(':')[1]) if ':' in x else -1 for x in lst]
    return new_list

class RecipeFetcher:

    search_base_url = 'https://www.allrecipes.com/search/results/?wt=%s&sort=re'

    def search_recipes(self, keywords): 
        search_url = self.search_base_url %(keywords.replace(' ','+'))

        page_html = requests.get(search_url)
        page_graph = BeautifulSoup(page_html.content)

        return [recipe.a['href'] for recipe in\
               page_graph.find_all('div', {'class':'grid-card-image-container'})]

    def scrape_recipe(self, recipe_url):
        results = {}

        page_html = requests.get(recipe_url)
        print(recipe_url)
        page_graph = BeautifulSoup(page_html.content)

        results['ingredients'] = [ingredient.text for ingredient in\
                                  page_graph.find_all('span', {'itemprop':'recipeIngredient'})]

        results['directions'] = [direction.text.strip() for direction in\
                                 page_graph.find_all('span', {'class':'recipe-directions__list--item'})
                                 if direction.text.strip()]

        results['nutrition'] = self.scrape_nutrition_facts(recipe_url)
        
        results['calories_and_servings'] = self.scrape_calories_servings(recipe_url)

        return results
    
    def scrape_nutrition_facts(self, recipe_url):
        results = []

        nutrition_facts_url = '%s/fullrecipenutrition' %(recipe_url)

        page_html = requests.get(nutrition_facts_url)
        page_graph = BeautifulSoup(page_html.content)

        r = re.compile("([0-9]*\.?[0-9]*)([a-zA-Z]+)")
        
        nutrient = {}

        for nutrient_row in page_graph.find_all('div', {'class': 'nutrition-row'}):
            
            lst = nutrient_row.text.split(':')
            amount_lst = lst[1]
            name = lst[0].replace('\n', '')
            
            amount = amount_lst.split('\n')
            amount = [x.replace(' ', '') for x in amount[:2]]
            
            nutrient[name] = amount
            
        return nutrient
    
    def scrape_calories_servings(self, recipe_url):
        """
        returns [servings per recipe, amt per serving, calories]
        """

        nutrition_facts_url = '%s/fullrecipenutrition' %(recipe_url)

        page_html = requests.get(nutrition_facts_url)
        page_graph = BeautifulSoup(page_html.content)

        r = re.compile("([0-9]*\.?[0-9]*)([a-zA-Z]+)")
        
        nutrient = {}

        for row in page_graph.find_all('div', {'class': 'nutrition-top light-underline'}):
            lst = row.text.split('\n')
            lst = list(filter(lambda a: a != '\r', lst))
            
            calories = [x.lstrip() for x in lst]
            calories.pop()
            info = get_numbers(calories)
            
            return info

In [27]:
rf = RecipeFetcher()
fluffy_pancakes = rf.search_recipes('veggie pasta')[0]
res = rf.scrape_recipe(fluffy_pancakes)

https://www.allrecipes.com/recipe/45644/roasted-veggie-pasta/


In [28]:
ingredients = res['ingredients']
ingredients

['1/4 pound fresh asparagus',
 '2 red bell pepper, sliced',
 '1/4 pound crimini mushrooms, sliced',
 '10 cloves roasted garlic, chopped',
 '1/2 tomato, quartered',
 '1/2 teaspoon chopped fresh rosemary',
 '1/2 teaspoon chopped fresh oregano',
 '2 tablespoons olive oil',
 '8 ounces dry fettuccini noodles',
 '1/4 cup grated Parmesan cheese',
 '2 tablespoons tapenade']

In [29]:
def replace_veggie(meat):
    
    print(first)
    new_meat = meat.replace("(", "")
    new_meat = new_meat.replace(")", "")
    new_meat = new_meat.replace("/", " ")
    meat_lst = new_meat.split()
    
    original_quantity = []
    ingredient_name = ''

    # split ingredient into tokens to separate the 'quantity' from 'ingredient_name'
    for token in meat_lst:            
        if any([str(digit) in token for digit in range(10)]) and not any([char in token for char in ['(', ')']]):
            fraction_obj = sum(map(fractions.Fraction, token.split()))
            as_float = int(fraction_obj)
            original_quantity.append(as_float)
        else:
            ingredient_name = ingredient_name + ' ' + token
       

    new_quantity = original_quantity
    
    if len(new_quantity) > 1:
        num = new_quantity[-1]
    else:
        num = new_quantity[0]
      
    replace_idx = meat_lst.index(str(num)) + 2
    
    replace_term = meat_lst[replace_idx: ]
    s = " "
    term = s.join(replace_term)
    veggie = meat.replace(term, "chicken")
    return veggie   

In [30]:
def replace_meat(ingredients, meat, type_of_meat_lst, first):
    
    new_meat = meat.replace("(", "")
    new_meat = new_meat.replace(")", "")
    new_meat = new_meat.replace("/", " ")
    meat_lst = new_meat.split()
    
    original_quantity = []
    ingredient_name = ''

    # split ingredient into tokens to separate the 'quantity' from 'ingredient_name'
    for token in meat_lst:            
        if any([str(digit) in token for digit in range(10)]) and not any([char in token for char in ['(', ')']]):
            fraction_obj = sum(map(fractions.Fraction, token.split()))
            as_float = int(fraction_obj)
            original_quantity.append(as_float)
        else:
            ingredient_name = ingredient_name + ' ' + token
       

    new_quantity = original_quantity
    
    if len(new_quantity) > 1:
        num = new_quantity[-1]
    else:
        num = new_quantity[0]
      
    replace_idx = meat_lst.index(str(num)) + 2
    
    # second meat replacement
    if not first:
        replace_term = meat_lst[replace_idx: ]
        s = " "
        term = s.join(replace_term)
        veggie = meat.replace(term, "veggie sausage")
        return veggie

    # ground meat replacement
    if type_of_meat_lst[0]:
        replace_term = meat_lst[replace_idx: ]
        s = " "
        term = s.join(replace_term)
        veggie = meat.replace(term, "ground tofu")
        return veggie
       
    # meat replacement
    if type_of_meat_lst[1]:
        replace_term = meat_lst[replace_idx: ]
        s = " "
        term = s.join(replace_term)
        veggie = meat.replace(term, "tofu")
        return veggie
        
    # sandwich meat replacement
    if type_of_meat_lst[2]:
        replace_term = meat_lst[replace_idx: ]
        s = " "
        term = s.join(replace_term)
        veggie = meat.replace(term, "impossible burger")
        return veggie
        
    # seafood replacement
    if type_of_meat_lst[3]:
        replace_term = meat_lst[replace_idx: ]
        s = " "
        term = s.join(replace_term)
        veggie = meat.replace(term, "tofuna fysh")
        return veggie
        
    # turkey/chicken sandwich meat replacement
    if type_of_meat_lst[4] or type_of_meat_lst[5]:
        replace_term = meat_lst[replace_idx: ]
        s = " "
        term = s.join(replace_term)
        veggie = meat.replace(term, "tofurkey")
        return veggie

In [31]:
def look_for_meat(ingredients):
    ground_meat_lst = ['ground beef', 'ground chicken', 'ground meat', 'ground turkey', 'ground lamb', 'ground pork', 'ground bison']
    meat_lst = ['chicken', 'steak', 'beef', 'lamb', 'bacon', 'pork', 'duck', 'bison', 'rabbit', 'cow', 'sausage', 'turkey']
    sandwich_meat_lst = ['hamburger', 'cheeseburger', 'sloppy joe']
    turkey_sandwich = ['turkey', 'slice']
    chicken_sandwich = ['chicken', 'slice']
    seafood_lst = ['salmon', 'cod', 'fish', 'halibut', 'shellfish', 'crab', 'lobster', 'shrimp', 'prawn', 'scallop']
    vegetarians_lst = ['tofu', 'tofurkey', 'impossible burger', 'veggie burger']
    
    ground = False
    meat = False
    sandwich = False
    seafood = False
    turkey = False
    chicken = False
    
    vegetarian = True
    
    first = True

    for n, ingredient in enumerate(ingredients):
        if any(x in ingredient.lower() for x in vegetarians_lst):
            new_ingredient = replace_veggie(ingredient)
            ingredients[n] = new_ingredient
            vegetarian = False
            break
            
        if 'bread' not in ingredient.lower():
            #if any(x in ingredient.lower() for x in turkey_sandwich):
            if 'slice' in ingredient.lower() and 'turkey' in ingredient.lower():
                turkey = True
                replace_meat_lst = [ground, meat, sandwich, seafood, turkey, chicken]
                new_ingredient = replace_meat(ingredients, ingredient, replace_meat_lst, first)
                ingredients[n] = new_ingredient
                first = False
                vegetarian = False
                continue
            #if any(x in ingredient.lower() for x in chicken_sandwich):
            if 'slice' in ingredient.lower() and 'chicken' in ingredient.lower():
                chicken = True
                replace_meat_lst = [ground, meat, sandwich, seafood, turkey, chicken]
                new_ingredient = replace_meat(ingredients, ingredient, replace_meat_lst, first)
                ingredients[n] = new_ingredient
                first = False
                vegetarian = False
                continue
        if any(x in ingredient.lower() for x in ground_meat_lst):
            ground = True
            replace_meat_lst = [ground, meat, sandwich, seafood, turkey, chicken]
            new_ingredient = replace_meat(ingredients, ingredient, replace_meat_lst, first)
            ingredients[n] = new_ingredient
            first = False
            vegetarian = False
            continue
        if any(x in ingredient.lower() for x in meat_lst):
            meat = True
            replace_meat_lst = [ground, meat, sandwich, seafood, turkey, chicken]
            new_ingredient = replace_meat(ingredients, ingredient, replace_meat_lst, first)
            ingredients[n] = new_ingredient
            first = False
            vegetarian = False
            continue
        if any(x in ingredient.lower() for x in sandwich_meat_lst):
            sandwich = True
            replace_meat_lst = [ground, meat, sandwich, seafood, turkey, chicken]
            new_ingredient = replace_meat(ingredients, ingredient, replace_meat_lst, first)
            ingredients[n] = new_ingredient
            first = False
            vegetarian = False
            continue
        if any(x in ingredient.lower() for x in seafood_lst):
            seafood = True
            replace_meat_lst = [ground, meat, sandwich, seafood, turkey, chicken]
            new_ingredient = replace_meat(ingredients, ingredient, replace_meat_lst, first)
            ingredients[n] = new_ingredient
            first = False
            vegetarian = False
            continue
            
    if vegetarian:
        new_ingredient = '1 pound chicken breast'
        ingredients.append(new_ingredient)
              
    return ingredients

In [32]:
ingredients

['1/4 pound fresh asparagus',
 '2 red bell pepper, sliced',
 '1/4 pound crimini mushrooms, sliced',
 '10 cloves roasted garlic, chopped',
 '1/2 tomato, quartered',
 '1/2 teaspoon chopped fresh rosemary',
 '1/2 teaspoon chopped fresh oregano',
 '2 tablespoons olive oil',
 '8 ounces dry fettuccini noodles',
 '1/4 cup grated Parmesan cheese',
 '2 tablespoons tapenade']

In [33]:
p = look_for_meat(ingredients)
p

['1/4 pound fresh asparagus',
 '2 red bell pepper, sliced',
 '1/4 pound crimini mushrooms, sliced',
 '10 cloves roasted garlic, chopped',
 '1/2 tomato, quartered',
 '1/2 teaspoon chopped fresh rosemary',
 '1/2 teaspoon chopped fresh oregano',
 '2 tablespoons olive oil',
 '8 ounces dry fettuccini noodles',
 '1/4 cup grated Parmesan cheese',
 '2 tablespoons tapenade',
 '1 pound chicken breast']