In [22]:
import lxml.html
import requests
from bs4 import BeautifulSoup as bs
import pandas as pd
import numpy as np
import re
import string
import warnings
import os
warnings.filterwarnings('ignore')

In [116]:
def get_recipe_keys(page_numbers):
    recipe_keys = []
    for page_number in page_numbers:
        url = 'http://www.epicurious.com/search?content=recipe&page=%d&sort=highestRated' % page_number
        r = requests.get(url)
        all_tags = bs(r.content)
        recipies = np.array([x.get_text().strip() for x in all_tags.find_all("a")])[np.arange(1,105,6)]
        recipe_key = ["-".join(x.lower().split(" ")) for x in recipies]
        recipe_keys.append(recipe_key)
    return([item for sublist in recipe_keys for item in sublist])

In [127]:
def get_recipe_stats(recipe_name):
    url = 'http://www.epicurious.com/recipes/food/views/%s' % recipe_name
    try:
        r = requests.get(url)
    except:
        return(np.nan)
    all_tags = bs(r.content)
    name = all_tags.find("h1").get_text().strip()
    try:
        cooking_stats = [int(re.findall(string = x.get_text(), pattern = "^\d+")[0]) for x in all_tags.find_all("dd")]
        servings = cooking_stats[0]
        active_time = cooking_stats[1]
        total_time = cooking_stats[2]
    except:
        pass
    nutritional_labels = [x.get_text().strip() for x in all_tags.find_all(class_ = "nutri-label")]
    nutritional_data = [x.get_text().strip() for x in all_tags.find_all(class_ = "nutri-data")]
    
    #For page reviews
    url = 'http://www.epicurious.com/recipes/food/reviews/slow-cooker-amaretto-poached-peaches'
    r = requests.get(url)
    all_tags = bs(r.content)
    reviews = [x.find("p").get_text().strip() for x in all_tags.find_all("div" ,class_ = "review-text")]
    return(dict(zip(nutritional_labels, nutritional_data)))

In [128]:
recipe_names = get_recipe_keys(range(1,2))
recipe_names

['pasta-with-shrimp-in-tomato-cream',
 'green-goddess-buddha-bowl',
 'crispy-chicken-thighs-with-spring-vegetables',
 'one-skillet-steak-and-spring-veg-with-spicy-mustard',
 'one-skillet-roasted-butternut-squash-with-spiced-chickpeas',
 'pasta-with-ramp-pesto-and-guanciale',
 'warm-chicken-salad-with-creamy-dill-dressing',
 'spiced-moroccan-vegetable-soup-with-chickpeas,-cilantro,-and-lemon',
 'granola-bark',
 'mini-beef-and-mushroom-patties',
 'spring-risotto',
 'classic-cream-cheese-frosting',
 'soy-sauce-and-citrus-marinated-chicken',
 'turkish-lamb-chops-with-sumac,-tahini,-and-dill',
 'gluten-free-rice-buns',
 'mixed-citrus-daiquiri',
 'gingery-chicken-soup-with-zucchini-“noodles”',
 'cinnamon-white-hot-chocolate']

In [129]:
recipe_reviews = []
for recipe_name in recipe_names:
    print(recipe_name, end = " ")
    recipe_reviews.append(get_recipe_stats(recipe_name))

pasta-with-shrimp-in-tomato-cream green-goddess-buddha-bowl crispy-chicken-thighs-with-spring-vegetables one-skillet-steak-and-spring-veg-with-spicy-mustard one-skillet-roasted-butternut-squash-with-spiced-chickpeas pasta-with-ramp-pesto-and-guanciale warm-chicken-salad-with-creamy-dill-dressing spiced-moroccan-vegetable-soup-with-chickpeas,-cilantro,-and-lemon granola-bark mini-beef-and-mushroom-patties spring-risotto classic-cream-cheese-frosting soy-sauce-and-citrus-marinated-chicken turkish-lamb-chops-with-sumac,-tahini,-and-dill gluten-free-rice-buns mixed-citrus-daiquiri gingery-chicken-soup-with-zucchini-“noodles” cinnamon-white-hot-chocolate 

In [130]:
recipe_reviews

[{'Calories': '828',
  'Carbohydrates': '77 g(26%)',
  'Cholesterol': '259 mg(86%)',
  'Fat': '40 g(61%)',
  'Fiber': '5 g(18%)',
  'Monounsaturated Fat': '14 g',
  'Polyunsaturated Fat': '3 g',
  'Protein': '36 g(71%)',
  'Saturated Fat': '21 g(105%)',
  'Sodium': '1111 mg(46%)'},
 {'Calories': '1210',
  'Carbohydrates': '179 g(60%)',
  'Cholesterol': '194 mg(65%)',
  'Fat': '42 g(65%)',
  'Fiber': '16 g(66%)',
  'Monounsaturated Fat': '23 g',
  'Polyunsaturated Fat': '8 g',
  'Protein': '37 g(74%)',
  'Saturated Fat': '8 g(42%)',
  'Sodium': '1248 mg(52%)'},
 {'Calories': '1006',
  'Carbohydrates': '29 g(10%)',
  'Cholesterol': '344 mg(115%)',
  'Fat': '69 g(107%)',
  'Fiber': '6 g(24%)',
  'Monounsaturated Fat': '31 g',
  'Polyunsaturated Fat': '14 g',
  'Protein': '62 g(125%)',
  'Saturated Fat': '18 g(90%)',
  'Sodium': '1373 mg(57%)'},
 {'Calories': '620',
  'Carbohydrates': '19 g(6%)',
  'Cholesterol': '93 mg(31%)',
  'Fat': '49 g(75%)',
  'Fiber': '6 g(25%)',
  'Monounsaturated