# This script imports data from individual recipes by iterating through a CSV file of all NYT recipe links

In [3]:
import pandas as pd
from bs4 import BeautifulSoup
import re
import requests


# Lists to store data in
recipe_name = []
recipe_author = []
recipe_rating = []
recipe_review_count = []
recipe_instructions = []
recipe_ingredients = []
recipe_tags = []
recipe_links = []

# Reads the recipe links from a CSV generated by recipe_links_importer.py
recipe_links = pd.read_csv('recipe_links.csv')['Links'].tolist()

The following code iterates through the recipes to gather the following information:
    Title
    Author
    Rating
    Number of Reviews
    Tags
    Ingredients
    Instructions

In [4]:
count = 0 # Initializes counter to keep track of progress


response_recipe = requests.get('https://cooking.nytimes.com/recipes/3783-original-plum-torte?smid=ig-nytcooking&utm_source=curalate_like2buy&utm_medium=curalate_like2buy_gnauQ5q1__89e37797-e72c-4bbb-be25-0c665a7cbcc4&crl8_id=89e37797-e72c-4bbb-be25-0c665a7cbcc4')
html_recipe = response_recipe.text
soup_recipe = BeautifulSoup(html_recipe, 'html.parser')

# Adds title of recipe; if recipe title isn't available, recipe title is set as 'None'
try:
    recipe_name.append(soup_recipe.title.string)
except:
    recipe_name.append('none')
    print ('Name Error' + str(count))

# Adds author of recipe; if recipe author isn't available, recipe author is set as 'None'
try: 
    recipe_author.append(soup_recipe.find('span', {'class': 'byline-name', 'itemprop': 'author'}).text)
except:
    recipe_author.append('None')
    print ('Author Error' + str(count))

# Adds average rating and number of reviews of recipe; if they aren't available, they are set as '0'
pattern = '\=\s(\d+)' # Pattern for obtaining the recipe rating and number of reviews
value = re.findall(pattern, str(soup_recipe.find(text=re.compile("bootstrap.recipe.avg_rating"))))

try:
    recipe_rating.append(value[0])
except:
    recipe_rating.append('0')
    print ('Rating Error' + str(count))
try:
    recipe_review_count.append(value[1])
except:
    recipe_review_count.append('0')
    print ('Review Count Error' + str(count))

# Adds recipe tags; if recipe tags aren't available, empty list added
tags = []
try:
    ol_tags = soup_recipe.find('div', {'class': 'tags-nutrition-container'})
    ol_tags_children = ol_tags.findChildren('a')
    tags = [child.getText() for child in ol_tags_children]
except:
    print ('Tag Error' + str(count))
    pass

recipe_tags.append(tags)

# Adds ingredients and quantity; if either are unavailable, empty dictionary added
recipe_ingredients_list = []
recipe_quantity_list = []
recipe_ingredients_dictionary = {}
a = False

try:
    recipe_ingredients_list = [ingredient.string.strip() for ingredient in soup_recipe.find_all('span', {'class': 'ingredient-name'})]
    a = True
except:
    try:
        recipe_ingredients_list = [ingredient.getText().strip() for ingredient in soup_recipe.find_all('span', {'class': 'ingredient-name'})]
        a = True
    except:
        print ('Ingredients Error' + str(count))
        pass
if a == True:
    try: 
        recipe_quantity_list = [quantity.string.strip() for quantity in soup_recipe.find_all('span', {'class': 'quantity'})]
        recipe_ingredients_dictionary = dict(zip(recipe_ingredients_list, recipe_quantity_list))
    except:
        try: 
            recipe_quantity_list = [quantity.getText().strip() for quantity in soup_recipe.find_all('span', {'class': 'quantity'})]
            recipe_ingredients_dictionary = dict(zip(recipe_ingredients_list, recipe_quantity_list))
        except:
            print ('Quantity Error' + str(count))
            pass

recipe_ingredients.append(recipe_ingredients_dictionary)

# Adds recipe instructions; if instructions are unavailable, empty list added
recipe_instructions_list = []
try:
    ol_instructions = soup_recipe.find('ol', {'class': 'recipe-steps', 'itemprop': 'recipeInstructions'})
    ol_instructions_children = ol_instructions.findChildren('li')
    recipe_instructions_list = [child.getText() for child in ol_instructions_children]
except:
    print ('Instructions Error' + str(count))
    pass

recipe_instructions.append(recipe_instructions_list)

# Prints the current recipe being added
count += 1    
print (count)

# Creates a DataFrame for recipe information
recipe_information = pd.DataFrame({
'Recipe Name': recipe_name,
'Recipe Author': recipe_author,
'Recipe Rating': recipe_rating,
'Recipe Review Count': recipe_review_count,
'Recipe Tags': recipe_tags,
'Recipe Ingredients': recipe_ingredients,
'Recipe instructions': recipe_instructions
})

# Writes the recipe dataframe to a CSV file
recipe_information.to_csv('recipe_information_copy.csv')


1
