In [1]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
import re

In [2]:
response = requests.get('https://www.epicurious.com/search/cucumbers')
doc = BeautifulSoup(response.text)

In [12]:
rows = []

for num in range(1, 11):
    url = f'https://www.epicurious.com/search/cucumbers?page={num}'
    response = requests.get(url)
    doc = BeautifulSoup(response.text)
    contents = doc.find_all(class_ = re.compile('.*-content-card'))
    print('Scraping', url)
    for content in contents:
        row = {}
        row['Tag'] = content.find(class_ = 'summary').strong.text
        row['Title'] = content.h4.text
        try:
            row['Summary'] = content.find(class_ = 'dek').text
        except:
            pass
        try:
            row['Rating'] = content.find(itemprop = 'ratingValue').text
        except:
            pass
        try:
            row['Make_Again_Pct'] = content.find(class_ = 'make-again-percentage').text
        except:
            pass
        row['Url'] = content.find(class_ = 'hed').a['href']
        rows.append(row)

Scraping https://www.epicurious.com/search/cucumbers?page=1
Scraping https://www.epicurious.com/search/cucumbers?page=2
Scraping https://www.epicurious.com/search/cucumbers?page=3
Scraping https://www.epicurious.com/search/cucumbers?page=4
Scraping https://www.epicurious.com/search/cucumbers?page=5
Scraping https://www.epicurious.com/search/cucumbers?page=6
Scraping https://www.epicurious.com/search/cucumbers?page=7
Scraping https://www.epicurious.com/search/cucumbers?page=8
Scraping https://www.epicurious.com/search/cucumbers?page=9
Scraping https://www.epicurious.com/search/cucumbers?page=10


In [13]:
df = pd.DataFrame(rows)
df = df[['Title', 'Tag', 'Summary', 'Url', 'Rating', 'Make_Again_Pct']]
df.head()

Unnamed: 0,Title,Tag,Summary,Url,Rating,Make_Again_Pct
0,Spicy Lightly Pickled Cucumbers,recipe,These quick pickles have just the right amount...,/recipes/food/views/spicy-lightly-pickled-cucu...,4,100%
1,Cucumbers with Ajo Blanco Sauce,recipe,Think of this as an all-purpose garlic sauce. ...,/recipes/food/views/cucumbers-with-ajo-blanco-...,4,100%
2,Cold Beef Tenderloin with Tomatoes and Cucumbers,recipe,Beef tenderloin is precious enough to baby on ...,/recipes/food/views/cold-beef-tenderloin-with-...,2,0%
3,Fried Fish Sandwiches with Cucumbers and Tarta...,recipe,Take your cast-iron pan to the grill for this ...,/recipes/food/views/fried-fish-sandwiches-with...,3,100%
4,Grain Salad with Tomatoes and Cucumbers,recipe,We call for semi-pearled grains because they c...,/recipes/food/views/farro-spelt-grain-salad-wi...,3,100%


In [14]:
df.to_csv('Epicurious_cucumbers.csv', index = False)

Part 2

In [15]:
df = pd.read_csv('Epicurious_cucumbers.csv')

In [16]:
df = df[df.Tag == 'recipe']

In [17]:
def scrap_page(food):
    row = {}
    url = f"https://www.epicurious.com/{food['Url']}"
    print('Scraping', url)
    response = requests.get(url)
    contents = BeautifulSoup(response.text)
    row['Title'] = contents.find(class_ = 'title-source').h1.text.strip()
    row['Ingredients'] = list(map(lambda x: x.text, contents.find('ul', class_ = 'ingredients').find_all(class_ = 'ingredient')))
    row['Direction'] = list(map(lambda x: x.text.strip(), contents.find(class_ = 'preparation-group').find_all(class_ = 'preparation-step')))
    row['Tags'] = list(map(lambda x: x.text, contents.find(class_ = 'menus-tags content').find(class_ = 'tags').find_all('dt')))
    print(row)
    return pd.Series(row)

In [18]:
scraped_df = df.apply(scrap_page, axis = 1)

Scraping https://www.epicurious.com//recipes/food/views/spicy-lightly-pickled-cucumbers
{'Title': 'Spicy Lightly Pickled Cucumbers', 'Ingredients': [], 'Direction': [], 'Tags': []}
Scraping https://www.epicurious.com//recipes/food/views/spicy-lightly-pickled-cucumbers
{'Title': 'Spicy Lightly Pickled Cucumbers', 'Ingredients': [], 'Direction': [], 'Tags': []}
Scraping https://www.epicurious.com//recipes/food/views/cucumbers-with-ajo-blanco-sauce
{'Title': 'Cucumbers with Ajo Blanco Sauce', 'Ingredients': [], 'Direction': [], 'Tags': []}
Scraping https://www.epicurious.com//recipes/food/views/cold-beef-tenderloin-with-tomatoes-and-cucumbers
{'Title': 'Cold Beef Tenderloin with Tomatoes and Cucumbers', 'Ingredients': [], 'Direction': [], 'Tags': []}
Scraping https://www.epicurious.com//recipes/food/views/fried-fish-sandwiches-with-cucumbers-and-tartar-sauce
{'Title': 'Fried Fish Sandwiches with Cucumbers and Tartar Sauce', 'Ingredients': [], 'Direction': [], 'Tags': []}
Scraping https://

{'Title': 'Sweet and Sour Pickles', 'Ingredients': [], 'Direction': [], 'Tags': []}
Scraping https://www.epicurious.com//recipes/food/views/charred-romaine-greek-salad-with-quinoa-crusted-feta-56389585
{'Title': 'Charred Romaine Greek Salad With Quinoa-Crusted Feta', 'Ingredients': [], 'Direction': [], 'Tags': []}


In [19]:
scraped_df.head()

Unnamed: 0,Title,Ingredients,Direction,Tags
0,Spicy Lightly Pickled Cucumbers,[],[],[]
1,Cucumbers with Ajo Blanco Sauce,[],[],[]
2,Cold Beef Tenderloin with Tomatoes and Cucumbers,[],[],[]
3,Fried Fish Sandwiches with Cucumbers and Tarta...,[],[],[]
4,Grain Salad with Tomatoes and Cucumbers,[],[],[]


In [20]:
merged_df = df.join(scraped_df, rsuffix = '_scraped').drop(columns = 'Title_scraped')
merged_df.head()

Unnamed: 0,Title,Tag,Summary,Url,Rating,Make_Again_Pct,Ingredients,Direction,Tags
0,Spicy Lightly Pickled Cucumbers,recipe,These quick pickles have just the right amount...,/recipes/food/views/spicy-lightly-pickled-cucu...,4.0,100%,[],[],[]
1,Cucumbers with Ajo Blanco Sauce,recipe,Think of this as an all-purpose garlic sauce. ...,/recipes/food/views/cucumbers-with-ajo-blanco-...,4.0,100%,[],[],[]
2,Cold Beef Tenderloin with Tomatoes and Cucumbers,recipe,Beef tenderloin is precious enough to baby on ...,/recipes/food/views/cold-beef-tenderloin-with-...,2.0,0%,[],[],[]
3,Fried Fish Sandwiches with Cucumbers and Tarta...,recipe,Take your cast-iron pan to the grill for this ...,/recipes/food/views/fried-fish-sandwiches-with...,3.0,100%,[],[],[]
4,Grain Salad with Tomatoes and Cucumbers,recipe,We call for semi-pearled grains because they c...,/recipes/food/views/farro-spelt-grain-salad-wi...,3.0,100%,[],[],[]


In [21]:
merged_df.to_csv('Epicurious_cucumbers.csv', index = False)