In [153]:
from bs4 import BeautifulSoup
import requests
import re

In [154]:
def get_numbers(lst):
    new_list = [int(x.split(':')[1]) if ':' in x else -1 for x in lst]
    return new_list

In [147]:
class RecipeFetcher:

    search_base_url = 'https://www.allrecipes.com/search/results/?wt=%s&sort=re'

    def search_recipes(self, keywords): 
        search_url = self.search_base_url %(keywords.replace(' ','+'))

        page_html = requests.get(search_url)
        page_graph = BeautifulSoup(page_html.content)

        return [recipe.a['href'] for recipe in\
               page_graph.find_all('div', {'class':'grid-card-image-container'})]

    def scrape_recipe(self, recipe_url):
        results = {}

        page_html = requests.get(recipe_url)
        print(recipe_url)
        page_graph = BeautifulSoup(page_html.content)

        results['ingredients'] = [ingredient.text for ingredient in\
                                  page_graph.find_all('span', {'itemprop':'recipeIngredient'})]

        results['directions'] = [direction.text.strip() for direction in\
                                 page_graph.find_all('span', {'class':'recipe-directions__list--item'})
                                 if direction.text.strip()]

        results['nutrition'] = self.scrape_nutrition_facts(recipe_url)
        
        results['calories_and_servings'] = self.scrape_calories_servings(recipe_url)

        return results
    
    def scrape_nutrition_facts(self, recipe_url):
        results = []

        nutrition_facts_url = '%s/fullrecipenutrition' %(recipe_url)

        page_html = requests.get(nutrition_facts_url)
        page_graph = BeautifulSoup(page_html.content)

        r = re.compile("([0-9]*\.?[0-9]*)([a-zA-Z]+)")
        
        nutrient = {}

        for nutrient_row in page_graph.find_all('div', {'class': 'nutrition-row'}):
            
            lst = nutrient_row.text.split(':')
            amount_lst = lst[1]
            name = lst[0].replace('\n', '')
            
            amount = amount_lst.split('\n')
            amount = [x.replace(' ', '') for x in amount[:2]]
            
            nutrient[name] = amount
            
        return nutrient
    
    def scrape_calories_servings(self, recipe_url):
        """
        returns [servings per recipe, amt per serving, calories]
        """

        nutrition_facts_url = '%s/fullrecipenutrition' %(recipe_url)

        page_html = requests.get(nutrition_facts_url)
        page_graph = BeautifulSoup(page_html.content)

        r = re.compile("([0-9]*\.?[0-9]*)([a-zA-Z]+)")
        
        nutrient = {}

        for row in page_graph.find_all('div', {'class': 'nutrition-top light-underline'}):
            lst = row.text.split('\n')
            lst = list(filter(lambda a: a != '\r', lst))
            
            calories = [x.lstrip() for x in lst]
            calories.pop()
            info = get_numbers(calories)
            
            return info

            
        

In [150]:
rf = RecipeFetcher()
chow_mein = rf.search_recipes('fluffy pancakes')[0]
res = rf.scrape_recipe(chow_mein)

https://www.allrecipes.com/recipe/162760/fluffy-pancakes/


In [144]:
res['calories_and_servings']

[4, -1, 526]

In [151]:
res

{'ingredients': ['3/4 cup milk',
  '2 tablespoons white vinegar',
  '1 cup all-purpose flour',
  '2 tablespoons white sugar',
  '1 teaspoon baking powder',
  '1/2 teaspoon baking soda',
  '1/2 teaspoon salt',
  '1 egg',
  '2 tablespoons butter, melted',
  'cooking spray'],
 'directions': ['Combine milk with vinegar in a medium bowl and set aside for 5 minutes to "sour".\n                                    Watch Now',
  'Combine flour, sugar, baking powder, baking soda, and salt in a large mixing bowl. Whisk egg and butter into "soured" milk. Pour the flour mixture into the wet ingredients and whisk until lumps are gone.\n                                    Watch Now',
  'Heat a large skillet over medium heat, and coat with cooking spray. Pour 1/4 cupfuls of batter onto the skillet, and cook until bubbles appear on the surface. Flip with a spatula, and cook until browned on the other side.\n                                    Watch Now'],
 'nutrition': {'Total Fat': ['8.2g', '13%'],
  

In [2]:
type('4')

str