### Setup

In [1]:
import re
import os
import time
import random
import requests
import json
import numpy as np
import pandas as pd
from os import system
from math import floor
from copy import deepcopy
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver import Edge # I use Microsoft Edge driver specific to my version of Microsoft Edge.
from recipe_scrapers import scrape_me

In [2]:
pd.set_option('display.max_columns', 20)
pd.set_option('display.max_colwidth', 200)

# DRIVER

In [3]:
caps = {
}

driver = Edge(executable_path='/usr/local/bin/msedgedriver', capabilities=caps )

# Functions

In [5]:
def scrape(recipe_url):
    """
    This function takes a URL link to a recipe and, using the imported recipe-scrapers package, 
    adds the scraped content to a dictionary.
    
    To use this function, please first install the package and run the following import statement:
        pip install recipe-scrapers
        from recipe_scrapers import scrape_me    
    """
    scraper = scrape_me(recipe_url)
    
    title = scraper.title()
    total_time = scraper.total_time()
    yields = scraper.yields()
    ingredients = scraper.ingredients()
    instructions = scraper.instructions()
    image = scraper.image()
    host = scraper.host()
    links = scraper.links()
    nutrients = scraper.nutrients()
    
    recipe_dict = {
        'title': title,
        'cooktime': total_time,
        'yields': yields,
        'ingredients': ingredients,
        'instructions': instructions,
        'nutrients': nutrients,
        'image': image,
        'host': host,
        'links': links
    }
    return recipe_dict

In [6]:
# Note: this only has been tuned for FoodNetwork
def collect_urls(recipe_url):
    """
    Takes in a recipe URL and returns a list of all URLs linked on that page
    
    Note: At the moment this is only functional for FoodNetwork
    """
    recipe_dict = scrape(recipe_url)
    urls_list = []
    for i in recipe_dict['links']:
        if i['href'][:30] == '//www.foodnetwork.com/recipes/' and i['href'] not in urls_list and i['href'][30:36] != ['photos'] and i['href'][30:38] != ['packages']:
            urls_list.append('https:'+i['href'])
        else:
            continue
    return urls_list

# test_url = 'https://www.epicurious.com/search/mediterranean?content=recipe&'
# collect_urls(test_url) # doesn't work for Epicurious yet

In [7]:
def scrape_links(recipe_dict, recipes_list=recipes):
    """
    
    args: recipe_dict, recipes_list (recipes by default)
    
    This funtion takes a variable assigned to a scraped recipe 
    (i.e. — a dictionary object that contains details for a recipe) 
    for its first argument, and appends recipe dictionaries for EACH 
    of the links on that recipe's page to a previously instantiated list of recipes 
    (which are also dictionary objects).
    
    Note: Having trouble with epicurious links
    
    """
#     recipes_list = [recipe_dict]
    urls_list = recipe_dict['links']
    for i in urls_list:
        if i[:6] == 'https:':
            addtl = scrape(addtl_url)
            recipes_list.append(addtl)
        else:
            addtl_url = 'https:'+i
            addtl = scrape(addtl_url)
            recipes_list.append(addtl)
    return recipes_list

In [17]:
def append_https(url_list):
    recipe_urls = []
    for i in url_list:
        recipe_urls.append('https:'+i)
    return recipe_urls

# Scraping Recipes

There are many places that we may source recipes from across the web. I'm going to look among sources that use a format that will be easy to manage. Many sites that have recipes use a microdata format known as ***hrecipe***, among them include the following:
- food network
- epicurious
- williams-sonoma  
  
Let's see what we can find over on food network's site.

In [4]:
recipes = []

## Food Network

### Example Scrape with `recipe-scrapers` Package

First let's give things a shot with a single test recipe—one for *Herbed Chicken Marsala*.

In [8]:
# Set url variable to first recipe URL
url = 'https://www.foodnetwork.com/recipes/food-network-kitchen/herbed-chicken-marsala-recipe-2121049'

# We instantiate the scraper object
scraper = scrape_me(url)

In [10]:
title = scraper.title()
total_time = scraper.total_time()
yields = scraper.yields()
ingredients = scraper.ingredients()
instructions = scraper.instructions()
image = scraper.image()
host = scraper.host()
links = scraper.links()
nutrients = scraper.nutrients()

In [11]:
title

'Herbed Chicken Marsala'

In [12]:
total_time

35

In [13]:
yields

'4 serving(s)'

In [14]:
ingredients # notice that FoodNetwork has a functional object in the ingredients list. We'll parse those out later.

['Deselect All',
 'Four 4-ounce boneless, skinless chicken breast cutlets',
 'Kosher salt and freshly ground black pepper',
 '1/3 cup whole wheat flour',
 '1 1/2 tablespoons extra-virgin olive oil',
 '3/4 cup low-sodium chicken broth',
 '1/3 cup sun-dried tomatoes (not packed in oil; not rehydrated), finely chopped or very thinly sliced',
 '1/2 teaspoon finely chopped rosemary',
 '10 ounces white button or cremini (baby bella) mushrooms, sliced',
 '1/3 cup sweet marsala wine',
 '2 teaspoons unsalted butter',
 '1 to 2 tablespoons roughly chopped flat-leaf parsley']

On the FoodNetwork website, the instructions are split in to numbered steps, which are seperated below with `\n`.  
It might be sensible to keep the instructions structured this way in a singe column of our dataframe, rather than have many columns for each step. If some recipes only have dozens of steps, our dataframe will be sparse.

In [15]:
instructions

"Place the chicken cutlets between 2 pieces of plastic wrap and pound with a meat mallet (or the flat side of a chef's knife) until about 1/3-inch thick. Sprinkle with 1/4 teaspoon salt and 1/4 teaspoon pepper.\nPut the flour on a medium plate. Heat the oil in a large nonstick skillet over medium-high heat. Dredge the chicken in the flour to fully coat, shaking off any excess. Add the chicken to the skillet and fry until fully cooked and golden brown, about 4 minutes per side. Transfer to a platter and tent with foil to keep warm.\nAdd 1/2 cup of the broth, the sun-dried tomatoes and rosemary to any remaining drippings in the skillet and cook, stirring frequently, for 1 minute to plump the tomatoes. Add the mushrooms, 1/4 teaspoon salt and 1/2 teaspoon pepper and cook until the mushrooms are soft, about 5 minutes. Add the marsala and bring to a boil. Add the remaining 1/4 cup broth and the butter and simmer until the butter is fully melted, about 30 seconds.\nSpoon the mushroom mixture

In [16]:
image # yummy

'https://food.fnr.sndimg.com/content/dam/images/food/fullset/2013/2/14/0/FNK_Herbed-Chicken-Marsala_s4x3.jpg.rend.hgtvcom.406.305.suffix/1371614296995.jpeg'

![image](https://food.fnr.sndimg.com/content/dam/images/food/fullset/2013/2/14/0/FNK_Herbed-Chicken-Marsala_s4x3.jpg.rend.hgtvcom.406.305.suffix/1371614296995.jpeg)

In [17]:
host

'foodnetwork.com'

In [18]:
links # all the other links on the page, not relevant to the recipe but on other sites outside links may be for further resources for the specified recipe

[{'href': 'https://watch.foodnetwork.com/?utm_source=marketingsite&utm_medium=trendingline_watchfullseasons_text'},
 {'href': '//www.foodnetwork.com/shows/tv-schedule'},
 {'href': '//www.foodnetwork.com/videos'},
 {'href': '//www.foodnetwork.com/how-to/packages/shopping'},
 {'href': '//www.foodnetwork.com/features/articles/sweepstakes-and-contests'},
 {'href': '//www.foodnetwork.com/magazine'},
 {'href': '//www.foodnetwork.com/fn-dish'},
 {'href': '//www.foodnetwork.com/shows/a-z'},
 {'href': '//www.foodnetwork.com/profiles/talent'},
 {'href': '//www.foodnetwork.com/restaurants'},
 {'class': ['m-SocialLinks__a-Icon--facebook'],
  'href': 'https://www.facebook.com/FoodNetwork',
  'target': '_blank'},
 {'class': ['m-SocialLinks__a-Icon--twitter'],
  'href': 'https://twitter.com/FoodNetwork',
  'target': '_blank'},
 {'class': ['m-SocialLinks__a-Icon--instagram'],
  'href': 'https://instagram.com/FoodNetwork',
  'target': '_blank'},
 {'class': ['m-SocialLinks__a-Icon--youtube'],
  'href': 

In [19]:
nutrients

{'calories': '294 calorie',
 'fatContent': '11 grams',
 'saturatedFatContent': '3 grams',
 'cholesterolContent': '78 milligrams',
 'sodiumContent': '569 milligrams',
 'carbohydrateContent': '19 grams',
 'fiberContent': '3 grams',
 'proteinContent': '30 grams',
 'sugarContent': '7 grams'}

In [29]:
title = scraper.title()
total_time = scraper.total_time()
yields = scraper.yields()
ingredients = scraper.ingredients()
instructions = scraper.instructions()
image = scraper.image()
host = scraper.host()
links = scraper.links()
nutrients = scraper.nutrients()

In [32]:
urls_list = scraper.links()

### Acquiring more recipes using Links on the First Recipe's Page

In [33]:
# urls_list

In [34]:
urls_list[4]

{'href': '//www.foodnetwork.com/features/articles/sweepstakes-and-contests'}

In [37]:
url

'https://www.foodnetwork.com/recipes/food-network-kitchen/herbed-chicken-marsala-recipe-2121049'

### $#HERE#$

In [38]:
# def scrape_links(recipe_dict, recipes_list=recipes):
#     """
    
#     args: recipe_dict, recipes_list (recipes by default)
    
#     This funtion takes a variable assigned to a scraped recipe 
#     (i.e. — a dictionary object that contains details for a recipe) 
#     for its first argument, and appends recipe dictionaries for EACH 
#     of the links on that recipe's page to a previously instantiated list of recipes 
#     (which are also dictionary objects).
    
    
#     """
#     recipes_list = [recipe_dict]
#     for i in urls_list:
#         if i[:6] == 'https:':
#             addtl = scrape(addtl_url)
#             recipes_list.append(addtl)
#         else:
#             addtl_url = 'https:'+i
#             addtl = scrape(addtl_url)
#             recipes_list.append(addtl)
#     return recipes_list

NameError: name 'recipe_dict' is not defined

In [39]:
len('//www.foodnetwork.com/recipes/')

30

In [154]:
# recipe_dict['links'][4]['href']

[{'href': 'https://watch.foodnetwork.com/?utm_source=marketingsite&utm_medium=trendingline_watchfullseasons_text'},
 {'href': '//www.foodnetwork.com/shows/tv-schedule'},
 {'href': '//www.foodnetwork.com/videos'},
 {'href': '//www.foodnetwork.com/how-to/packages/shopping'},
 {'href': '//www.foodnetwork.com/features/articles/sweepstakes-and-contests'},
 {'href': '//www.foodnetwork.com/magazine'},
 {'href': '//www.foodnetwork.com/fn-dish'},
 {'href': '//www.foodnetwork.com/shows/a-z'},
 {'href': '//www.foodnetwork.com/profiles/talent'},
 {'href': '//www.foodnetwork.com/restaurants'},
 {'class': ['m-SocialLinks__a-Icon--facebook'],
  'href': 'https://www.facebook.com/FoodNetwork',
  'target': '_blank'},
 {'class': ['m-SocialLinks__a-Icon--twitter'],
  'href': 'https://twitter.com/FoodNetwork',
  'target': '_blank'},
 {'class': ['m-SocialLinks__a-Icon--instagram'],
  'href': 'https://instagram.com/FoodNetwork',
  'target': '_blank'},
 {'class': ['m-SocialLinks__a-Icon--youtube'],
  'href': 

In [40]:
links[0]['href']

'https://watch.foodnetwork.com/?utm_source=marketingsite&utm_medium=trendingline_watchfullseasons_text'

In [41]:
ingredients

['Deselect All',
 'Four 4-ounce boneless, skinless chicken breast cutlets',
 'Kosher salt and freshly ground black pepper',
 '1/3 cup whole wheat flour',
 '1 1/2 tablespoons extra-virgin olive oil',
 '3/4 cup low-sodium chicken broth',
 '1/3 cup sun-dried tomatoes (not packed in oil; not rehydrated), finely chopped or very thinly sliced',
 '1/2 teaspoon finely chopped rosemary',
 '10 ounces white button or cremini (baby bella) mushrooms, sliced',
 '1/3 cup sweet marsala wine',
 '2 teaspoons unsalted butter',
 '1 to 2 tablespoons roughly chopped flat-leaf parsley']

In [236]:
# for ingredient in ingredients:
#     ingredient_dict = {}
#     if i != 'Deselect All':
#         ingredients.pop(i)
# ingredients

TypeError: 'str' object cannot be interpreted as an integer

In [42]:
ingredients[0]

'Deselect All'

In [43]:
pd.DataFrame(recipes)

NameError: name 'recipes' is not defined

## More from Food Network

#### Pasta Lasagna Rolls + Associated Links

In [62]:
# Instantiate empty list if you haven't above
# recipes = []

# Below is useful to check what recipes are currently in your list
# for i in recipes:
#     print(i['title'])

Oklahoma Onion Burgers with Creamy BBQ Coleslaw
French Toast
Caprese Chicken Breasts Pan-Fried from Frozen
Easy Cornpone
Roasted Asparagus
Lasagna Rolls
Overnight Cinnamon Rolls
Shrimp Fettuccine Alfredo
Chicken Fettuccine Alfredo
Pesto Lasagna Rolls


In [47]:
# First let's scrape the recipe webpage for 'Pesto Lasagna Rolls'
plr_recipe = scrape('https://www.foodnetwork.com/recipes/food-network-kitchen/pesto-lasagna-rolls-3696449')
recipes.append(plr_recipe)

# Now let's grab the URLs on that recipe's page
plr = collect_urls('https://www.foodnetwork.com/recipes/food-network-kitchen/pesto-lasagna-rolls-3696449')
plr # has a lot of links that are either repeats, or not recipes. I clean them below with .pop() but will want to systematize this process

['https://www.foodnetwork.com/recipes/photos/30-minute-dinner-recipes',
 'https://www.foodnetwork.com/recipes/packages/baking-guide',
 'https://www.foodnetwork.com/recipes/food-network-kitchen/oklahoma-onion-burgers-with-creamy-bbq-coleslaw-9867150',
 'https://www.foodnetwork.com/recipes/robert-irvine/french-toast-recipe-1951408',
 'https://www.foodnetwork.com/recipes/food-network-kitchen/caprese-chicken-breasts-pan-fried-from-frozen-9884169',
 'https://www.foodnetwork.com/recipes/food-network-kitchen/easy-cornpone-9876126',
 'https://www.foodnetwork.com/recipes/giada-de-laurentiis/roasted-asparagus-recipe-1916065',
 'https://www.foodnetwork.com/recipes/food-network-kitchen/pesto-lasagna-rolls-3696449.recipePrint',
 'https://www.foodnetwork.com/recipes/packages/comfort-foods/easy-comfort-food-recipes',
 'https://www.foodnetwork.com/recipes/giada-de-laurentiis/lasagna-rolls-recipe-1943979',
 'https://www.foodnetwork.com/recipes/giada-de-laurentiis/lasagna-rolls-recipe-1943979',
 'https:

In [48]:
# links that gave us trouble:
# - located in /photos directory
# - located in /packages directory
# - have '.recipePrint' extension at end of URL
# - many duplicate recipes (may not cause trouble but smart to clean at the start)
plr.pop(0)
plr.pop(0)
plr.pop()
plr.pop()
plr.pop()
plr.pop()
plr.pop()
plr.pop(-2)
plr.pop(-3)
plr.pop(-4)
plr.pop(-5)
plr.pop(-5)

'https://www.foodnetwork.com/recipes/food-network-kitchen/pesto-lasagna-rolls-3696449.recipePrint'

In [49]:
plr

['https://www.foodnetwork.com/recipes/food-network-kitchen/oklahoma-onion-burgers-with-creamy-bbq-coleslaw-9867150',
 'https://www.foodnetwork.com/recipes/robert-irvine/french-toast-recipe-1951408',
 'https://www.foodnetwork.com/recipes/food-network-kitchen/caprese-chicken-breasts-pan-fried-from-frozen-9884169',
 'https://www.foodnetwork.com/recipes/food-network-kitchen/easy-cornpone-9876126',
 'https://www.foodnetwork.com/recipes/giada-de-laurentiis/roasted-asparagus-recipe-1916065',
 'https://www.foodnetwork.com/recipes/giada-de-laurentiis/lasagna-rolls-recipe-1943979',
 'https://www.foodnetwork.com/recipes/alton-brown/overnight-cinnamon-rolls-recipe-2014250',
 'https://www.foodnetwork.com/recipes/food-network-kitchen/shrimp-fettuccine-alfredo-3364199',
 'https://www.foodnetwork.com/recipes/food-network-kitchen/chicken-fettuccine-alfredo-3364118']

In [50]:
# recipes variable instantiated above as an empty list, at the top of the 'Scraping Recipes' section
# This can give us an attribute error. 
# Process for troubleshooting is in the cell below
for i in plr:
    recipe = scrape(i)
    recipes.append(recipe)

In [51]:
# Checking to see which recipe is giving us trouble. 
# Scraping worked until 'pesto lasanga rolls' came up again with a PRINT extension. Modified cleaning above to pop that off.
for i in recipes:
    print(i['title'])

Oklahoma Onion Burgers with Creamy BBQ Coleslaw
French Toast
Caprese Chicken Breasts Pan-Fried from Frozen
Easy Cornpone
Roasted Asparagus
Lasagna Rolls
Overnight Cinnamon Rolls
Shrimp Fettuccine Alfredo
Chicken Fettuccine Alfredo


In [22]:
# No problems in recipes with indexes 0-4
# for i in plr[:5]:
#     recipe = scrape(i)
#     recipes.append(recipe)

In [26]:
# If problem occurs at recipe with index 5, the below would throw an error
# for i in plr[5]:
#     recipe = scrape(i)
#     recipes.append(recipe)

In [52]:
# len(recipes)

9

In [38]:
# for i in recipes:
#     print(i['title'])

Oklahoma Onion Burgers with Creamy BBQ Coleslaw
French Toast
Caprese Chicken Breasts Pan-Fried from Frozen
Easy Cornpone
Roasted Asparagus
Oklahoma Onion Burgers with Creamy BBQ Coleslaw
French Toast
Caprese Chicken Breasts Pan-Fried from Frozen
Easy Cornpone
Roasted Asparagus
Roasted Asparagus


#### Ina Garten Mac & Cheese

In [268]:
mnc = 'https://www.foodnetwork.com/recipes/ina-garten/mac-and-cheese-recipe2-1945401'
scrape(mnc)

ElementNotFoundInHtml: recipe-scrapers exception: Element not found in html (self.soup.find returned None). Check traceback.

In [267]:
mnc_links = collect_urls(mnc)
mnc_links

ElementNotFoundInHtml: recipe-scrapers exception: Element not found in html (self.soup.find returned None). Check traceback.

In [253]:
https_plr = []
for i in plr:
    https_plr.append('https:'+i)
plr = https_plr
plr

['https://www.foodnetwork.com/recipes/food-network-kitchen/fall-off-the-bone-chicken-5195778',
 'https://www.foodnetwork.com/recipes/food-network-kitchen/pork-veggie-and-rice-noodle-lettuce-wraps-with-sweet-chili-sauce-9628517',
 'https://www.foodnetwork.com/recipes/food-network-kitchen/sourdough-discard-chocolate-chip-cookies-9876093',
 'https://www.foodnetwork.com/recipes/ina-garten/turkey-lasagna-recipe2-1916662',
 'https://www.foodnetwork.com/recipes/food-network-kitchen/juicy-chicken-breasts-baked-from-frozen-9884167',
 'https://www.foodnetwork.com/recipes/food-network-kitchen/pesto-lasagna-rolls-3696449.recipePrint',
 'https://www.foodnetwork.com/recipes/giada-de-laurentiis/lasagna-rolls-recipe-1943979',
 'https://www.foodnetwork.com/recipes/alton-brown/overnight-cinnamon-rolls-recipe-2014250',
 'https://www.foodnetwork.com/recipes/food-network-kitchen/shrimp-fettuccine-alfredo-3364199',
 'https://www.foodnetwork.com/recipes/food-network-kitchen/chicken-fettuccine-alfredo-3364118

In [239]:
plr = collect_urls('https://www.foodnetwork.com/recipes/food-network-kitchen/pesto-lasagna-rolls-3696449')

In [265]:
for i in plr[5:]:
    recipe = scrape(i)
    recipes.append(recipe)

#### Lightened-up Shrimp Scampi

In [269]:
scampi = 'https://www.foodnetwork.com/recipes/food-network-kitchen/lightened-up-shrimp-scampi-2012108'
# scrape(scampi)

In [271]:
scampi_links = collect_urls(scampi)
scampi_links

['https://www.foodnetwork.com/recipes/photos/30-minute-dinner-recipes',
 'https://www.foodnetwork.com/recipes/packages/baking-guide',
 'https://www.foodnetwork.com/recipes/food-network-kitchen/fall-off-the-bone-chicken-5195778',
 'https://www.foodnetwork.com/recipes/food-network-kitchen/pork-veggie-and-rice-noodle-lettuce-wraps-with-sweet-chili-sauce-9628517',
 'https://www.foodnetwork.com/recipes/food-network-kitchen/sourdough-discard-chocolate-chip-cookies-9876093',
 'https://www.foodnetwork.com/recipes/ina-garten/turkey-lasagna-recipe2-1916662',
 'https://www.foodnetwork.com/recipes/food-network-kitchen/juicy-chicken-breasts-baked-from-frozen-9884167',
 'https://www.foodnetwork.com/recipes/food-network-kitchen/lightened-up-shrimp-scampi-2012108.recipePrint',
 'https://www.foodnetwork.com/recipes/food-network-kitchen/classic-shrimp-scampi-8849846',
 'https://www.foodnetwork.com/recipes/food-network-kitchen/classic-shrimp-scampi-8849846',
 'https://www.foodnetwork.com/recipes/tyler-fl

In [278]:
scampi_links

['https://www.foodnetwork.com/recipes/food-network-kitchen/fall-off-the-bone-chicken-5195778',
 'https://www.foodnetwork.com/recipes/food-network-kitchen/pork-veggie-and-rice-noodle-lettuce-wraps-with-sweet-chili-sauce-9628517',
 'https://www.foodnetwork.com/recipes/food-network-kitchen/sourdough-discard-chocolate-chip-cookies-9876093',
 'https://www.foodnetwork.com/recipes/ina-garten/turkey-lasagna-recipe2-1916662',
 'https://www.foodnetwork.com/recipes/food-network-kitchen/juicy-chicken-breasts-baked-from-frozen-9884167',
 'https://www.foodnetwork.com/recipes/food-network-kitchen/lightened-up-shrimp-scampi-2012108.recipePrint',
 'https://www.foodnetwork.com/recipes/food-network-kitchen/classic-shrimp-scampi-8849846',
 'https://www.foodnetwork.com/recipes/food-network-kitchen/classic-shrimp-scampi-8849846',
 'https://www.foodnetwork.com/recipes/tyler-florence/shrimp-scampi-with-linguini-recipe-1942429',
 'https://www.foodnetwork.com/recipes/tyler-florence/shrimp-scampi-with-linguini-r

In [285]:
scampi_links.pop(5)

'https://www.foodnetwork.com/recipes/food-network-kitchen/lightened-up-shrimp-scampi-2012108.recipePrint'

In [286]:
for i in scampi_links[5:]:
    recipe = scrape(i)
    recipes.append(recipe)

#### Garlic Mashed Cauliflower

In [311]:
gmc = 'https://www.foodnetwork.com/recipes/trisha-yearwood/art-smiths-garlic-mashed-cauliflower-2282175'
# scrape(scampi)

In [312]:
gmc_links = collect_urls(gmc)
gmc_links

['https://www.foodnetwork.com/recipes/photos/30-minute-dinner-recipes',
 'https://www.foodnetwork.com/recipes/packages/baking-guide',
 'https://www.foodnetwork.com/recipes/food-network-kitchen/fall-off-the-bone-chicken-5195778',
 'https://www.foodnetwork.com/recipes/food-network-kitchen/pork-veggie-and-rice-noodle-lettuce-wraps-with-sweet-chili-sauce-9628517',
 'https://www.foodnetwork.com/recipes/food-network-kitchen/sourdough-discard-chocolate-chip-cookies-9876093',
 'https://www.foodnetwork.com/recipes/ina-garten/turkey-lasagna-recipe2-1916662',
 'https://www.foodnetwork.com/recipes/food-network-kitchen/juicy-chicken-breasts-baked-from-frozen-9884167',
 'https://www.foodnetwork.com/recipes/trisha-yearwood',
 'https://www.foodnetwork.com/recipes/trisha-yearwood/art-smiths-garlic-mashed-cauliflower-2282175.recipePrint',
 'https://www.foodnetwork.com/recipes/packages/sensational-sides',
 'https://www.foodnetwork.com/recipes/mock-garlic-mashed-potatoes-recipe-1942447',
 'https://www.foo

In [330]:
gmc_links

['https://www.foodnetwork.com/recipes/trisha-yearwood/art-smiths-garlic-mashed-cauliflower-2282175.recipePrint',
 'https://www.foodnetwork.com/recipes/mock-garlic-mashed-potatoes-recipe-1942447',
 'https://www.foodnetwork.com/recipes/valerie-bertinelli/herb-mashed-cauliflower-8030220',
 'https://www.foodnetwork.com/recipes/food-network-kitchen/turmeric-mashed-cauliflower-4555216',
 'https://www.foodnetwork.com/recipes/patrick-and-gina-neely/mashed-cauliflower-and-cheese-recipe-1921683']

In [331]:
for i in gmc_links[5:]:
    recipe = scrape(i)
    recipes.append(recipe)

## Epicurious

### Sushi

In [375]:
def scrape_sushi(recipe_url):
    """
    This function takes a URL link to a recipe and, using the imported recipe-scrapers package, 
    adds the scraped content to a dictionary.
    
    To use this function, please first install the package and run the following import statement:
        pip install recipe-scrapers
        from recipe_scrapers import scrape_me    
    """
    scraper = scrape_me(recipe_url)
    
    title = scraper.title()
#     total_time = scraper.total_time() # NOTE: while turning this off works, now all recipes say 35 min
    yields = scraper.yields()
    ingredients = scraper.ingredients()
    instructions = scraper.instructions()
    image = scraper.image()
    host = scraper.host()
    links = scraper.links()
    nutrients = scraper.nutrients()
    
    recipe_dict = {
        'title': title,
        'cooktime': total_time,
        'yields': yields,
        'ingredients': ingredients,
        'instructions': instructions,
        'nutrients': nutrients,
        'image': image,
        'host': host,
        'links': links
    }
    return recipe_dict

In [352]:
# epi_sushi = collect_epicurious('https://www.epicurious.com/search/sushi?content=recipe')

In [353]:
# epi_sushi

[]

In [None]:
epicurious_sushi = 'https://www.epicurious.com/search/sushi?content=recipe'

In [354]:
sushi_links = [
    'https://www.epicurious.com/recipes/food/views/miso-glazed-salmon-with-sushi-rice',
    'https://www.epicurious.com/recipes/food/views/veggie-sushi-hand-roll',
    'https://www.epicurious.com/recipes/food/views/gluten-free-sticky-rice-buns',
    'https://www.epicurious.com/recipes/food/views/simple-ponzu-sauce',
    'https://www.epicurious.com/recipes/food/views/snapper-sashimi-with-seaweed-and-fennel-56389896',
    'https://www.epicurious.com/recipes/food/views/sushi-sandwiches-51262890',
    'https://www.epicurious.com/recipes/food/views/tuna-tostadas-contramar-style-51249210',
    'https://www.epicurious.com/recipes/food/views/chopstick-ready-rice-51240420',
    'https://www.epicurious.com/recipes/food/views/sashimi-salad-with-soy-and-orange-51221630',
    'https://www.epicurious.com/recipes/food/views/seafood-chowder-with-squash-51212610',
    'https://www.epicurious.com/recipes/food/views/seafood-chowder-with-squash-51212610',
    'https://www.epicurious.com/recipes/food/views/lemon-paprika-roasted-salmon-51189020',
    'https://www.epicurious.com/recipes/food/views/quinoa-brown-rice-sushi-51156610',
    'https://www.epicurious.com/recipes/food/views/maru-51103810',
    'https://www.epicurious.com/recipes/food/views/veggie-sushi-rolls-367009',
    'https://www.epicurious.com/recipes/food/views/spicy-seattle-tuna-rolls-366728',
    'https://www.epicurious.com/recipes/food/views/frisee-with-crisped-salmon-skin-and-warm-sherry-vinaigrette-358320',
    'https://www.epicurious.com/recipes/food/views/pomegranate-cocktail-232443',
    'https://www.epicurious.com/recipes/food/views/sushi-rice-351070'
]

In [356]:
mgs = scrape('https://www.epicurious.com/recipes/food/views/miso-glazed-salmon-with-sushi-rice')

ElementNotFoundInHtml: recipe-scrapers exception: Element not found in html (self.soup.find returned None). Check traceback.

In [357]:
scrape_me(mgs)

<recipe_scrapers.epicurious.Epicurious at 0x7fa725862a30>

In [376]:
epi_recipes = []
for i in sushi_links:
    recipe = scrape_sushi(i)
    epi_recipes.append(recipe)

In [384]:
epi_recipes[17]

{'title': 'Pomegranate Cocktail',
 'cooktime': 35,
 'yields': '4 item(s)',
 'ingredients': ['6 ounces premium vodka (Blowfish Sushi uses Han Soju, an Asian spirit)',
  '2 ounces seltzer',
  '4 ounces Triple Sec',
  '4 ounces pomegranate juice',
  '2 lemons, juiced',
  '1 pomegranate, seeded'],
 'instructions': 'Combine vodka, seltzer, Triple Sec, pomegranate juice, and lemon juice in a cocktail shaker over ice; shake. Pour into a chilled cocktail glass; garnish with pomegranate seeds. Save leftover fruit for nibbling.',
 'nutrients': {},
 'image': 'https://www.epicurious.com/static/img/misc/epicurious-social-logo.png',
 'host': 'epicurious.com',
 'links': [{'aria-controls': 'main-navigation',
   'class': ['show-main-navigation'],
   'href': '#main-navigation',
   'title': 'Go to Main Navigation',
   'data-reactid': '4'},
  {'data-track-location': 'header',
   'data-track-source': 'navigation',
   'href': '/',
   'itemprop': 'url',
   'title': 'Epicurious',
   'data-reactid': '6'},
  {'

In [385]:
for i in epi_recipes:
    recipes.append(i)

### Mediterranean

In [387]:
epicurious_mediterranean = 'https://www.epicurious.com/search/mediterranean?content=recipe'

In [354]:
mediterranean_links = [
    'https://www.epicurious.com/recipes/food/views/chicken-thighs-with-tomatoes-and-feta',
    'https://www.epicurious.com/recipes/food/views/greek-turkey-burgers',
    'https://www.epicurious.com/recipes/food/views/big-batch-marinated-bell-peppers',
    'https://www.epicurious.com/recipes/food/views/sea-bream-crudo-with-lemon-and-olives',
    'https://www.epicurious.com/recipes/food/views/mediterranean-eggplant-and-barley-salad-235753',
    'https://www.epicurious.com/recipes/food/views/clam-toasts-with-pancetta',
    'https://www.epicurious.com/recipes/food/views/spicy-marinated-vegetables-and-sardines-on-toast',
    'https://www.epicurious.com/recipes/food/views/smoky-eggplant-dip',
    'https://www.epicurious.com/recipes/food/views/spice-rubbed-pork-tenderloin-with-a-mediterranean-grain-salad',
    'https://www.epicurious.com/recipes/food/views/kefi-lamb-gyro',
    'https://www.epicurious.com/recipes/food/views/roasted-beet-tzatziki-salad',
    'https://www.epicurious.com/recipes/food/views/leg-of-lamb-with-garlic-and-rosemary-105020',
    'https://www.epicurious.com/recipes/food/views/turkish-lamb-chops-with-sumac-tahini-and-dill',
    'https://www.epicurious.com/recipes/food/views/charred-chicken-with-sweet-potatoes-and-oranges',
    'https://www.epicurious.com/recipes/food/views/zhoug-spicy-herb-sauce',
    'https://www.epicurious.com/recipes/food/views/shakshuka-baked-eggs-with-spicy-tomato-sauce',
    'https://www.epicurious.com/recipes/food/views/baked-falafel-with-orange-tahini-sauce',
    'https://www.epicurious.com/recipes/food/views/grilled-cheese-tacos'
]

In [356]:
mgs = scrape('https://www.epicurious.com/recipes/food/views/miso-glazed-salmon-with-sushi-rice')

ElementNotFoundInHtml: recipe-scrapers exception: Element not found in html (self.soup.find returned None). Check traceback.

In [357]:
scrape_me(mgs)

<recipe_scrapers.epicurious.Epicurious at 0x7fa725862a30>

In [376]:
epi_recipes = []
for i in sushi_links:
    recipe = scrape_sushi(i)
    epi_recipes.append(recipe)

In [384]:
epi_recipes[17]

{'title': 'Pomegranate Cocktail',
 'cooktime': 35,
 'yields': '4 item(s)',
 'ingredients': ['6 ounces premium vodka (Blowfish Sushi uses Han Soju, an Asian spirit)',
  '2 ounces seltzer',
  '4 ounces Triple Sec',
  '4 ounces pomegranate juice',
  '2 lemons, juiced',
  '1 pomegranate, seeded'],
 'instructions': 'Combine vodka, seltzer, Triple Sec, pomegranate juice, and lemon juice in a cocktail shaker over ice; shake. Pour into a chilled cocktail glass; garnish with pomegranate seeds. Save leftover fruit for nibbling.',
 'nutrients': {},
 'image': 'https://www.epicurious.com/static/img/misc/epicurious-social-logo.png',
 'host': 'epicurious.com',
 'links': [{'aria-controls': 'main-navigation',
   'class': ['show-main-navigation'],
   'href': '#main-navigation',
   'title': 'Go to Main Navigation',
   'data-reactid': '4'},
  {'data-track-location': 'header',
   'data-track-source': 'navigation',
   'href': '/',
   'itemprop': 'url',
   'title': 'Epicurious',
   'data-reactid': '6'},
  {'

In [385]:
for i in epi_recipes:
    recipes.append(i)

# Recipes to DF

In [386]:
len(recipes)

56

In [342]:
df = pd.DataFrame(recipes).tail(3)
df
# df.drop_duplicates()

Unnamed: 0,title,cooktime,yields,ingredients,instructions,nutrients,image,host,links
34,Baked Shrimp Scampi,43,6 serving(s),"[Deselect All, 2 pounds (12 to 15 per pound) shrimp in the shell, 3 tablespoons good olive oil, 2 tablespoons dry white wine, Kosher salt and freshly ground black pepper, 12 tablespoons (1 1/2 sti...","Preheat the oven to 425 degrees F.\nPeel, devein, and butterfly the shrimp, leaving the tails on. Place the shrimp in a mixing bowl and toss gently with the olive oil, wine, 2 teaspoons salt, and ...",{},https://food.fnr.sndimg.com/content/dam/images/food/fullset/2012/11/12/0/FN_Ina-Garten-Baked-Shrimp-Scampi_s4x3.jpg.rend.hgtvcom.406.305.suffix/1384540899886.jpeg,foodnetwork.com,"[{'href': 'https://watch.foodnetwork.com/?utm_source=marketingsite&utm_medium=trendingline_watchfullseasons_text'}, {'href': '//www.foodnetwork.com/shows/tv-schedule'}, {'href': '//www.foodnetwork..."
35,Grilled Shrimp Scampi,55,4 serving(s),"[Deselect All, 12 jumbo shrimp, peeled and deveined, 1/2 cup canola oil, 10 cloves garlic, 1/4 teaspoon crushed red pepper flakes, 1 teaspoon ground fennel seed, Salt and freshly ground black pepp...","Put the shrimp in a large baking dish.\nCombine the canola oil, garlic, red pepper flakes and fennel seed in a small food processor and process until the garlic is somewhat paste-like. Pour this m...",{},https://food.fnr.sndimg.com/content/dam/images/food/fullset/2012/1/27/1/GT0311_Grilled-Shrimp-Scampi_s4x3.jpg.rend.hgtvcom.406.305.suffix/1398434365616.jpeg,foodnetwork.com,"[{'href': 'https://watch.foodnetwork.com/?utm_source=marketingsite&utm_medium=trendingline_watchfullseasons_text'}, {'href': '//www.foodnetwork.com/shows/tv-schedule'}, {'href': '//www.foodnetwork..."
36,Grilled Shrimp Scampi,55,4 serving(s),"[Deselect All, 12 jumbo shrimp, peeled and deveined, 1/2 cup canola oil, 10 cloves garlic, 1/4 teaspoon crushed red pepper flakes, 1 teaspoon ground fennel seed, Salt and freshly ground black pepp...","Put the shrimp in a large baking dish.\nCombine the canola oil, garlic, red pepper flakes and fennel seed in a small food processor and process until the garlic is somewhat paste-like. Pour this m...",{},https://food.fnr.sndimg.com/content/dam/images/food/fullset/2012/1/27/1/GT0311_Grilled-Shrimp-Scampi_s4x3.jpg.rend.hgtvcom.406.305.suffix/1398434365616.jpeg,foodnetwork.com,"[{'href': 'https://watch.foodnetwork.com/?utm_source=marketingsite&utm_medium=trendingline_watchfullseasons_text'}, {'href': '//www.foodnetwork.com/shows/tv-schedule'}, {'href': '//www.foodnetwork..."


### Selenium Test

In [44]:
# Visit the website of your choice:
url = 'https://www.foodnetwork.com/recipes/food-network-kitchen/herbed-chicken-marsala-recipe-2121049'
driver.get(url)

In [47]:
# Copy Selector
#site > div.area > div.container-site.is-Fluid.container-site--HasAvatar > div > div:nth-child(2) > div.col-md-18 > div > section

# Copied xpath
test_recipe = driver.find_elements_by_xpath('/html/body/section/div[3]/div[3]/div/div[2]/div[1]/div/section/div[2]')
test_recipe

[<selenium.webdriver.remote.webelement.WebElement (session="db93896d6992692589880f7b162cf883", element="766e2bee-ee2a-4cb2-a707-a479067b8f6a")>]

In [52]:
test_recipe[0].text

"Ingredients\nDeselect All\nFour 4-ounce boneless, skinless chicken breast cutlets\nKosher salt and freshly ground black pepper\n1/3 cup whole wheat flour\n1 1/2 tablespoons extra-virgin olive oil\n3/4 cup low-sodium chicken broth\n1/3 cup sun-dried tomatoes (not packed in oil; not rehydrated), finely chopped or very thinly sliced\n1/2 teaspoon finely chopped rosemary\n10 ounces white button or cremini (baby bella) mushrooms, sliced\n1/3 cup sweet marsala wine\n2 teaspoons unsalted butter\n1 to 2 tablespoons roughly chopped flat-leaf parsley\nAdd to Shopping List\nView Shopping List\nDirections\nTry this cooking class now\nWatch Class\nPlace the chicken cutlets between 2 pieces of plastic wrap and pound with a meat mallet (or the flat side of a chef's knife) until about 1/3-inch thick. Sprinkle with 1/4 teaspoon salt and 1/4 teaspoon pepper.\nPut the flour on a medium plate. Heat the oil in a large nonstick skillet over medium-high heat. Dredge the chicken in the flour to fully coat, s

In [56]:
test_recipe_class = driver.find_elements_by_class_name('o-Recipe')
test_recipe_class[0].text

"WATCH\nRECIPE COURTESY OF FOOD NETWORK KITCHEN\nHerbed Chicken Marsala\n41 Reviews\nSmothered in low-calorie sauteed mushrooms and sun-dried tomatoes, this dish is both healthy and satisfying. A little bit of butter ... More\nSave Recipe\nLevel: Easy\nTotal: 35 min\nActive: 35 min\nYield: 4 servings\nNutrition Info\nShare This Recipe\nIngredients\nDeselect All\nFour 4-ounce boneless, skinless chicken breast cutlets\nKosher salt and freshly ground black pepper\n1/3 cup whole wheat flour\n1 1/2 tablespoons extra-virgin olive oil\n3/4 cup low-sodium chicken broth\n1/3 cup sun-dried tomatoes (not packed in oil; not rehydrated), finely chopped or very thinly sliced\n1/2 teaspoon finely chopped rosemary\n10 ounces white button or cremini (baby bella) mushrooms, sliced\n1/3 cup sweet marsala wine\n2 teaspoons unsalted butter\n1 to 2 tablespoons roughly chopped flat-leaf parsley\nAdd to Shopping List\nView Shopping List\nDirections\nTry this cooking class now\nWatch Class\nPlace the chicken c

In [61]:
print("WATCH\nRECIPE COURTESY OF FOOD NETWORK KITCHEN\nHerbed Chicken Marsala\n41 Reviews\nSmothered in low-calorie sauteed mushrooms and sun-dried tomatoes, this dish is both healthy and satisfying. A little bit of butter ... More\nSave Recipe\nLevel: Easy\nTotal: 35 min\nActive: 35 min\nYield: 4 servings\nNutrition Info\nShare This Recipe\nIngredients\nDeselect All\nFour 4-ounce boneless, skinless chicken breast cutlets\nKosher salt and freshly ground black pepper\n1/3 cup whole wheat flour\n1 1/2 tablespoons extra-virgin olive oil\n3/4 cup low-sodium chicken broth\n1/3 cup sun-dried tomatoes (not packed in oil; not rehydrated), finely chopped or very thinly sliced\n1/2 teaspoon finely chopped rosemary\n10 ounces white button or cremini (baby bella) mushrooms, sliced\n1/3 cup sweet marsala wine\n2 teaspoons unsalted butter\n1 to 2 tablespoons roughly chopped flat-leaf parsley\nAdd to Shopping List\nView Shopping List\nDirections\nTry this cooking class now\nWatch Class\nPlace the chicken cutlets between 2 pieces of plastic wrap and pound with a meat mallet (or the flat side of a chef's knife) until about 1/3-inch thick. Sprinkle with 1/4 teaspoon salt and 1/4 teaspoon pepper.\nPut the flour on a medium plate. Heat the oil in a large nonstick skillet over medium-high heat. Dredge the chicken in the flour to fully coat, shaking off any excess. Add the chicken to the skillet and fry until fully cooked and golden brown, about 4 minutes per side. Transfer to a platter and tent with foil to keep warm.\nAdd 1/2 cup of the broth, the sun-dried tomatoes and rosemary to any remaining drippings in the skillet and cook, stirring frequently, for 1 minute to plump the tomatoes. Add the mushrooms, 1/4 teaspoon salt and 1/2 teaspoon pepper and cook until the mushrooms are soft, about 5 minutes. Add the marsala and bring to a boil. Add the remaining 1/4 cup broth and the butter and simmer until the butter is fully melted, about 30 seconds.\nSpoon the mushroom mixture and sauce over the chicken, sprinkle with the parsley and serve.\nCook’s Note\nThis recipe uses regular whole wheat flour, but for a slightly more refined coating, whole wheat pastry flour can be substituted.\nRecipe courtesy of Food Network Kitchen\nMy Private Notes\nAdd a Note\nWATCH\nWatch how to make this recipe.\nCategories:\nHealthy\nChicken Recipes\nPoultry\nMain Dish\nLow-Fat\nLow Calorie\nDiabetes-Friendly\nMore From: Healthy Food, Fast\nLooking for Something Else?\nQuick & Easy More Marsala Recipes 5 Ingredients or Less Highly Rated\nChicken Marsala\n901\nChicken Marsala\n10\nChicken Marsala\n23\nThe Best Chicken Marsala\n41\nClasses You May Also Like\nGuy Fieri\nFieri'd Chicken Saltimbocca\n11m Easy 99%\nCLASS\nElena Besser\nRoast Chicken with Chicken Fat Potatoes\n25m Easy 98%\nCLASS\nDon Angie\nChicken Parmesan\n19m Intermediate 99%\nCLASS\nIna Garten\nSkillet Roasted Lemon Chicken\n10m Easy 99%\nCLASS\n41 Reviews\nYour Rating:\nPost Review\nSort by\nLauren YanakeffMarch 18, 2021\n0\nComment\nAnonymousJanuary 31, 2021\nVery simple and tastes like you would get in a fine restaurant!\n0\nComment\nLou K.January 26, 2021\n0\nComment\nAlissa S.January 13, 2021\nAbsolutely delicious!!\n0\nComment\nAshton P.November 21, 2020\nSo good!\n0\nComment\nlukeaforsytheNovember 2, 2020\nmy family loves this\n0\nComment\nAnonymousOctober 28, 2020\n\n+1\nComment\ntodd l.October 14, 2020\nThis turned out to be an absolute home run. Very easy to make, very healthy and super delicious. My family gave me a 10!\n+1\nComment\nAnonymousAugust 15, 2020\nSuper easy and delicious. Not a huge fan of sun-dried tomatoes but in this recipe they taste amazing.\n+1\nComment\nfawaztahirJuly 30, 2020\nYum\n+1\nComment\nShow more reviews")

WATCH
RECIPE COURTESY OF FOOD NETWORK KITCHEN
Herbed Chicken Marsala
41 Reviews
Smothered in low-calorie sauteed mushrooms and sun-dried tomatoes, this dish is both healthy and satisfying. A little bit of butter ... More
Save Recipe
Level: Easy
Total: 35 min
Active: 35 min
Yield: 4 servings
Nutrition Info
Share This Recipe
Ingredients
Deselect All
Four 4-ounce boneless, skinless chicken breast cutlets
Kosher salt and freshly ground black pepper
1/3 cup whole wheat flour
1 1/2 tablespoons extra-virgin olive oil
3/4 cup low-sodium chicken broth
1/3 cup sun-dried tomatoes (not packed in oil; not rehydrated), finely chopped or very thinly sliced
1/2 teaspoon finely chopped rosemary
10 ounces white button or cremini (baby bella) mushrooms, sliced
1/3 cup sweet marsala wine
2 teaspoons unsalted butter
1 to 2 tablespoons roughly chopped flat-leaf parsley
Add to Shopping List
View Shopping List
Directions
Try this cooking class now
Watch Class
Place the chicken cutlets between 2 pieces of plas

In [57]:
test_recipe_ingredients_class = driver.find_elements_by_class_name('bodyLeft')
test_recipe_ingredients_class

[<selenium.webdriver.remote.webelement.WebElement (session="db93896d6992692589880f7b162cf883", element="dfa79233-c253-406f-a013-5b4a9fad7538")>]

In [60]:
test_recipe_ingredients_class[0].text

'Ingredients\nDeselect All\nFour 4-ounce boneless, skinless chicken breast cutlets\nKosher salt and freshly ground black pepper\n1/3 cup whole wheat flour\n1 1/2 tablespoons extra-virgin olive oil\n3/4 cup low-sodium chicken broth\n1/3 cup sun-dried tomatoes (not packed in oil; not rehydrated), finely chopped or very thinly sliced\n1/2 teaspoon finely chopped rosemary\n10 ounces white button or cremini (baby bella) mushrooms, sliced\n1/3 cup sweet marsala wine\n2 teaspoons unsalted butter\n1 to 2 tablespoons roughly chopped flat-leaf parsley\nAdd to Shopping List\nView Shopping List'

In [11]:
test_recipe_id = driver.find_elements_by_id('site')
test_recipe_id

[<selenium.webdriver.remote.webelement.WebElement (session="db93896d6992692589880f7b162cf883", element="31352e00-98bb-4405-8a64-7b0d959bd533")>]

In [15]:
type(test_recipe_id[0])

selenium.webdriver.remote.webelement.WebElement

In [18]:
test_recipe_id[0].text

'.st0{fill:#fff}.st1{fill:#e6003d}\nRecipes\nShows\nChefs\nFood Network Kitchen\nPremium .cls-1{fill:#fff;}.cls-1,.cls-3{fill-rule:evenodd;}.cls-2{opacity:0.8;}.cls-3{fill:#89caf7;}.cls-4{mask:url(#mask);}.cls-5{opacity:0.9;}.cls-6{mask:url(#mask-2-2);}\ndiscovery+\nHome Healthy Healthy Cooking Every Day Main Dishes\nMain\nAppetizers\nMain Dishes\nSides\nDesserts\nGuides\nMenus\nVideos\nBobby Flay Fit\nBlog\nOur 50 Most-Popular Healthy Recipes\nLooking for a few good-for-you recipes to add to your weekly rotation? Count down through the 50 healthy recipes our Food Network fans love most.\nSAVE COLLECTION\nRelated To:\nHealthy\nOPEN GALLERY\n50 Photos\nPhoto By: Tara Donne ©FOOD NETWORK : 2012, Television Food Network, G.P.\n1 / 50\nNo. 50: Herbed Chicken Marsala\nSmothered in low-calorie sauteed mushrooms and sundried tomatoes, this dish is both healthy and satisfying. A little bit of butter goes a long way in the sauce — just a touch adds creamy richness.\nGet the Recipe: Herbed Chick

In [27]:
test_recipe_xpath = driver.find_element_by_xpath("//*[@id='site']")
test_recipe_xpath

<selenium.webdriver.remote.webelement.WebElement (session="db93896d6992692589880f7b162cf883", element="31352e00-98bb-4405-8a64-7b0d959bd533")>

In [28]:
test_recipe_xpath.text

'.st0{fill:#fff}.st1{fill:#e6003d}\nRecipes\nShows\nChefs\nFood Network Kitchen\nPremium .cls-1{fill:#fff;}.cls-1,.cls-3{fill-rule:evenodd;}.cls-2{opacity:0.8;}.cls-3{fill:#89caf7;}.cls-4{mask:url(#mask);}.cls-5{opacity:0.9;}.cls-6{mask:url(#mask-2-2);}\ndiscovery+\nHome Healthy Healthy Cooking Every Day Main Dishes\nMain\nAppetizers\nMain Dishes\nSides\nDesserts\nGuides\nMenus\nVideos\nBobby Flay Fit\nBlog\nOur 50 Most-Popular Healthy Recipes\nLooking for a few good-for-you recipes to add to your weekly rotation? Count down through the 50 healthy recipes our Food Network fans love most.\nSAVE COLLECTION\nRelated To:\nHealthy\nOPEN GALLERY\n50 Photos\nPhoto By: Tara Donne ©FOOD NETWORK : 2012, Television Food Network, G.P.\n1 / 50\nNo. 50: Herbed Chicken Marsala\nSmothered in low-calorie sauteed mushrooms and sundried tomatoes, this dish is both healthy and satisfying. A little bit of butter goes a long way in the sauce — just a touch adds creamy richness.\nGet the Recipe: Herbed Chick

In [None]:
# Example: Scroll down (with a test for a modal)

def scroll_down():
    for i in range(1, 10):
        try:
            modal_button = driver.find_element_by_class_name("button2")
            webdriver.ActionChains(driver).move_to_element(modal_button).click(modal_button).perform()
      ##### modal_button.click() also works 
            
        except:
            time.sleep(.5)
            pass 
        
        #scroll to the bottom
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(1)

        
# Example: Load more content
# Code snippet for context purposes only. We will not run this function:

def get_more(): 
    for i in range(1, 5):
        try:
            next_b = driver.find_element_by_xpath("//*[contains(text(), 'Load next Politics story')]")
            webdriver.ActionChains(driver).move_to_element(next_b).click(next_b).perform()
            time.sleep(.5)
        except: 
            print("Page #" + str(i) + " has failed to load") 

In [62]:
driver.quit()

# Reference

## Data Structures

In [103]:
t1 = [
    'cookTime',
    'cookingMethod',
    'nutrition',
    'recipeCategory',
    'recipeCuisine',
    'recipeIngredient',
    'recipeInstructions',
    'recipeYield',
    'suitableForDiet'
]

t1

t2 = [
    'Duration',
    'Text',
    'NutritionInformation',
    'Text',
    'Text',
    'Text',
    'CreativeWork or ItemList  or Text',
    'QuantitativeValue or Text',
    'RestrictedDiet'
]

t3 = [
    'The time it takes to actually cook the dish, in ISO 8601 duration format.',
    'The method of cooking, such as Frying, Steaming, ...',
    'Nutrition information about the recipe or menu item.',
    'The category of the recipe—for example, appetizer, entree, etc.',
    'The cuisine of the recipe (for example, French or Ethiopian).',
    'A single ingredient used in the recipe, e.g. sugar, flour or garlic. Supersedes ingredients.',
    'A step in making the recipe, in the form of a single item (document, video, etc.) or an ordered list with HowToStep and/or HowToSection items.',
    'The quantity produced by the recipe (for example, number of people served, number of servings, etc).','Indicates a dietary restriction or guideline for which this recipe or menu item is suitable, e.g. diabetic, halal etc.',
]

In [120]:
schema_description = {'Property': t1, 'Expected Type': t2, 'Description': t3}
Recipe_schema_properties = pd.DataFrame(schema_description)

In [121]:
Recipe_schema_properties

Unnamed: 0,Property,Expected Type,Description
0,cookTime,Duration,"The time it takes to actually cook the dish, in ISO 8601 duration format."
1,cookingMethod,Text,"The method of cooking, such as Frying, Steaming, ..."
2,nutrition,NutritionInformation,Nutrition information about the recipe or menu item.
3,recipeCategory,Text,"The category of the recipe—for example, appetizer, entree, etc."
4,recipeCuisine,Text,"The cuisine of the recipe (for example, French or Ethiopian)."
5,recipeIngredient,Text,"A single ingredient used in the recipe, e.g. sugar, flour or garlic. Supersedes ingredients."
6,recipeInstructions,CreativeWork or ItemList or Text,"A step in making the recipe, in the form of a single item (document, video, etc.) or an ordered list with HowToStep and/or HowToSection items."
7,recipeYield,QuantitativeValue or Text,"The quantity produced by the recipe (for example, number of people served, number of servings, etc)."
8,suitableForDiet,RestrictedDiet,"Indicates a dietary restriction or guideline for which this recipe or menu item is suitable, e.g. diabetic, halal etc."


## Schemas

Schemas that may be useful references for how I structure my data:  
  
@type: [Recipe](https://schema.org/Recipe)  
@type: [NutritionInformation](https://schema.org/NutritionInformation)  
@type: [MenuItem](https://schema.org/MenuItem)  
  
Example of `Recipe` schema format:

>@type: Recipe  
name: Mom's World Famous Banana Bread  
suitableForDiet: LowFatDiet  
recipeYield: 1 loaf  
recipeInstructions: Preheat the oven to 350 degrees. Mix in the ingredients in a bowl. Add the flour last. Pour the mixture into a loaf pan and bake for one hour.  
recipeIngredient: 3/4 cup of sugar  
recipeIngredient: 3 or 4 ripe bananas, smashed  
recipeIngredient: 1 egg  
prepTime: PT15M  
nutrition:  
>- @type: NutritionInformation  
>- fatContent: 9 grams fat  
>- calories: 240 calories  
>interactionStatistic:  
>- @type: InteractionCounter
>- userInteractionCount: 140
>- interactionType: Comment
>image: http://example.org/bananabread.jpg  
description: This classic banana bread recipe comes from my mom -- the walnuts add a nice texture and flavor to the banana bread.  
datePublished: 2009-05-08  
cookTime: PT1H  
author: John Smith  

## Borrowed Functions that may be useful but will need tuning:

In [6]:
# Example: Scroll down (with a test for a modal)

def scroll_down():
    for i in range(1, 10):
        try:
            modal_button = driver.find_element_by_class_name("button2")
            webdriver.ActionChains(driver).move_to_element(modal_button).click(modal_button).perform()
      ##### modal_button.click() also works 
            
        except:
            time.sleep(.5)
            pass 
        
        #scroll to the bottom
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(1)

In [7]:
# def collect_epicurious(url):
#     scroll_down()
#     links
#     links = driver.find_elements_by_id('href')
    
#     return links

In [8]:
driver.get('https://www.epicurious.com/search/sushi?content=recipe')