In [63]:
from bs4 import BeautifulSoup
import requests
import re
import fractions

In [2]:
"""

web scraper

"""

def get_numbers(lst):
    new_list = [int(x.split(':')[1]) if ':' in x else -1 for x in lst]
    return new_list

class RecipeFetcher:

    search_base_url = 'https://www.allrecipes.com/search/results/?wt=%s&sort=re'

    def search_recipes(self, keywords): 
        search_url = self.search_base_url %(keywords.replace(' ','+'))

        page_html = requests.get(search_url)
        page_graph = BeautifulSoup(page_html.content)

        return [recipe.a['href'] for recipe in\
               page_graph.find_all('div', {'class':'grid-card-image-container'})]

    def scrape_recipe(self, recipe_url):
        results = {}

        page_html = requests.get(recipe_url)
        print(recipe_url)
        page_graph = BeautifulSoup(page_html.content)

        results['ingredients'] = [ingredient.text for ingredient in\
                                  page_graph.find_all('span', {'itemprop':'recipeIngredient'})]

        results['directions'] = [direction.text.strip() for direction in\
                                 page_graph.find_all('span', {'class':'recipe-directions__list--item'})
                                 if direction.text.strip()]

        results['nutrition'] = self.scrape_nutrition_facts(recipe_url)
        
        results['calories_and_servings'] = self.scrape_calories_servings(recipe_url)

        return results
    
    def scrape_nutrition_facts(self, recipe_url):
        results = []

        nutrition_facts_url = '%s/fullrecipenutrition' %(recipe_url)

        page_html = requests.get(nutrition_facts_url)
        page_graph = BeautifulSoup(page_html.content)

        r = re.compile("([0-9]*\.?[0-9]*)([a-zA-Z]+)")
        
        nutrient = {}

        for nutrient_row in page_graph.find_all('div', {'class': 'nutrition-row'}):
            
            lst = nutrient_row.text.split(':')
            amount_lst = lst[1]
            name = lst[0].replace('\n', '')
            
            amount = amount_lst.split('\n')
            amount = [x.replace(' ', '') for x in amount[:2]]
            
            nutrient[name] = amount
            
        return nutrient
    
    def scrape_calories_servings(self, recipe_url):
        """
        returns [servings per recipe, amt per serving, calories]
        """

        nutrition_facts_url = '%s/fullrecipenutrition' %(recipe_url)

        page_html = requests.get(nutrition_facts_url)
        page_graph = BeautifulSoup(page_html.content)

        r = re.compile("([0-9]*\.?[0-9]*)([a-zA-Z]+)")
        
        nutrient = {}

        for row in page_graph.find_all('div', {'class': 'nutrition-top light-underline'}):
            lst = row.text.split('\n')
            lst = list(filter(lambda a: a != '\r', lst))
            
            calories = [x.lstrip() for x in lst]
            calories.pop()
            info = get_numbers(calories)
            
            return info

In [136]:
rf = RecipeFetcher()
fluffy_pancakes = rf.search_recipes('meat lasagna')[0]
res = rf.scrape_recipe(fluffy_pancakes)

https://www.allrecipes.com/recipe/218091/classic-and-simple-meat-lasagna/


In [137]:
ingredients = res['ingredients']
ingredients

['12 whole wheat lasagna noodles',
 '1 pound lean ground beef',
 '2 cloves garlic, chopped',
 '1/2 teaspoon garlic powder',
 '1 teaspoon dried oregano, or to taste',
 'salt and ground black pepper to taste',
 '1 (16 ounce) package cottage cheese',
 '2 eggs',
 '1/2 cup shredded Parmesan cheese',
 '1 1/2 (25 ounce) jars tomato-basil pasta sauce',
 '2 cups shredded mozzarella cheese']

In [138]:
"""
- replace ground meat with ground tofu
- replace meat with tofu
- replace hamburger meat with veggie paddy

"""

'\n- replace ground meat with ground tofu\n- replace meat with tofu\n- replace hamburger meat with veggie paddy\n\n'

In [139]:
def look_for_meat(ingredients):
    ground_meat_lst = ['ground beef', 'ground chicken', 'ground meat', 'ground turkey', 'ground lamb', 'ground pork', 'ground bison']
    meat_lst = ['chicken', 'steak', 'beef', 'lamb', 'bacon', 'pork', 'duck', 'bison', 'rabbit', 'cow', 'turkey']
    sandwich_meat_lst = ['hamburger', 'cheeseburger', 'sloppy joe']
    seafood_lst = ['seafood', 'salmon', 'cod', 'fish', 'halibut', 'shellfish', 'crab', 'lobster', 'shrimp', 'prawn', 'scallop']
    
    ground = False
    meat = False
    sandwich = False
    seafood = False
    
    for ingredient in ingredients:
        if any(x in ingredient.lower() for x in ground_meat_lst):
            ground = True
            replace_meat_lst = [ground, meat, sandwich, seafood]
            return ingredient, replace_meat_lst
        if any(x in ingredient.lower() for x in meat_lst):
            meat = True
            replace_meat_lst = [ground, meat, sandwich, seafood]
            return ingredient, replace_meat_lst
        if any(x in ingredient.lower() for x in sandwich_meat_lst):
            sandwich = True
            replace_meat_lst = [ground, meat, sandwich, seafood]
            return ingredient, replace_meat_lst
        if any(x in ingredient.lower() for x in seafood_lst):
            seafood = True
            replace_meat_lst = [ground, meat, sandwich, seafood]
            return ingredient, replace_meat_lst

In [140]:
p, pp = look_for_meat(ingredients)

In [141]:
p

'1 pound lean ground beef'

In [142]:
pp

[True, False, False, False]

In [143]:
def replace_meat(ingredients, meat, type_of_meat_lst):
    meat_lst = meat.split()
    quantity = ""
    
    print(meat_lst)
    print([int(s) for s in meat.split() if s.isdigit()])

    # split ingredient into tokens to separate the 'quantity' from 'ingredient_name'
    for word in meat:            
        if any([str(digit) in word for digit in range(10)]) and not any([char in word for char in ['(', ')']]):
            print(word)
            quantity += word
            #break
            
    replace_idx = meat_lst.index(quantity) + 2

    # ground meat replacement
    if type_of_meat_lst[0]:
        replace_term = meat_lst[replace_idx: ]
        s = " "
        term = s.join(replace_term)
        veggie = meat.replace(term, "ground tofu")
        print(veggie)
       
    # meat replacement
    if type_of_meat_lst[1]:
        replace_term = meat_lst[replace_idx: ]
        s = " "
        term = s.join(replace_term)
        veggie = meat.replace(term, "tofu")
        print(veggie)
        
    # sandwich meat replacement
    if type_of_meat_lst[2]:
        replace_term = meat_lst[replace_idx: ]
        s = " "
        term = s.join(replace_term)
        veggie = meat.replace(term, "impossible burger")
        print(veggie)
        
    # seafood replacement
    if type_of_meat_lst[3]:
        replace_term = meat_lst[replace_idx: ]
        s = " "
        term = s.join(replace_term)
        veggie = meat.replace(term, "tofuna fysh")
        print(veggie)

In [144]:
replace_meat(ingredients, p, pp)

['1', 'pound', 'lean', 'ground', 'beef']
[1]
1
1 pound ground tofu
