In [None]:
import pandas as pd
import numpy as np
import re

import itertools
import requests
from wordcloud import WordCloud

from rich import inspect
from rich.pretty import pprint
from tqdm.notebook import tqdm

from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By

import matplotlib.pyplot as plt

pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)

In [None]:
recipes_path = "../data/full_dataset.csv"

recipes_data = pd.read_csv(
    recipes_path,
    index_col=0,
    usecols=["title", "link", "NER"],
    dtype={
        "title": "string[pyarrow]",
        "link": "string[pyarrow]",
        "NER": "string[pyarrow]",
    },
)

print(f" data shape: {recipes_data.shape}")
print(recipes_data.memory_usage(deep=True))

In [None]:
recipes_data.sample()

In [None]:
recipes_data["website"] = recipes_data.link.str.split("/").str.get(0)

recipes_data.sample(5)

In [None]:
recipes_data.website.value_counts()

After inspection of some websites, the following list of websites will be re-scraped for further features (including duration, nutrition facts and servings):

- www.food.com
- www.allrecipes.com
- www.tasteofhome.com

In [None]:
preliminary_websites = [
    "www.food.com",
    "www.allrecipes.com",
    "www.tasteofhome.com",
]

filtered_data = recipes_data.query(
    f" website == {preliminary_websites} "
).copy()

filtered_data.NER = filtered_data.NER.apply(eval)

filtered_data.shape

In [None]:
all_ingredients = filtered_data.NER.explode()

ingredients_frequency = all_ingredients.value_counts().to_dict()

In [None]:
wordcloud = WordCloud(width=1000, height=500).generate_from_frequencies(
    ingredients_frequency
)
plt.figure(figsize=(15, 8))
plt.imshow(wordcloud, interpolation="bilinear")
plt.axis("off")
plt.show()

In [None]:
filtered_data.isnull().sum()

In [None]:
filtered_data.NER.iloc[0]

In [None]:
filtered_data["duration"] = np.nan
filtered_data["nutrition_facts"] = np.nan
filtered_data["servings"] = np.nan

In [None]:
filtered_data.to_csv("ommak.csv", index=False)

# Start of scraping:

## www.food.com

In [13]:
import pandas as pd
import numpy as np
import re
import time
import itertools
import requests
from bs4 import BeautifulSoup
from wordcloud import WordCloud

from rich import inspect
from tqdm.notebook import tqdm
from pprint import pprint

from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.common.exceptions import WebDriverException
from selenium.webdriver.common.action_chains import ActionChains

from selenium_stealth import stealth


import matplotlib.pyplot as plt

pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)

In [5]:
filtered_data = pd.read_csv(
    "/Users/man-top/my_github/FoodFlex/notebooks/ommak.csv"
)

example_to_scrape = (
    filtered_data.query(" website == 'www.allrecipes.com' ")
    .link.sample()
    .iloc[0]
)

example_to_scrape

'www.allrecipes.com/recipe/17096/blue-cheese-dip-i/'

In [36]:
import time

chrome_driver_path = (
    "/Users/man-top/Downloads/chromedriver_mac_arm64/chromedriver"
)


service = Service(chrome_driver_path)
options = webdriver.ChromeOptions()
options.headless = False

driver = webdriver.Chrome(service=service, options=options, keep_alive=True)

driver.get(f"https://{example_to_scrape}")

time.sleep(4)

# scrolling
body = driver.find_element(by=By.TAG_NAME("body"))
body.send_keys(Keys.PAGE_DOWN)

time.sleep(2)


button_url = WebDriverWait(driver, 30).until(
    EC.element_to_be_clickable(
        (By.CSS_SELECTOR, "button.link.facts__nutrition.svelte-ovaflp")
    )
)
# button_url = WebDriverWait(driver, 20).until(lambda x: x.find_element(By.XPATH, "//*[@id='recipe']/div[9]/button"))

# inspect(button_url)

try:
    button_url.click()
    print("yel3an rabbak")
except WebDriverException:
    print("Element is not clickable")


time.sleep(4)

try:
    nutrition_facts_url = WebDriverWait(driver, 30).until(
        EC.visibility_of_element_located(
            (By.XPATH, "//*[@id='top']/div[3]/div[4]/div/div/div[2]")
        )
    )
except:
    print("not found")
print()

driver.quit()

  options.headless = False


TypeError: 'str' object is not callable

In [None]:
inspect(button_url)

In [7]:
def setup_web_driver(headless: bool):
    chrome_driver_path = (
        "/Users/man-top/Downloads/chromedriver_mac_arm64/chromedriver"
    )

    service = Service(chrome_driver_path)
    options = webdriver.ChromeOptions()
    if headless:
        options.add_argument("--headless=new")

    driver = webdriver.Chrome(service=service, options=options, keep_alive=True)
    return driver

In [29]:
def scrape_nutrition_facts(driver, url):
    driver.get(f"https://{url}")

    time.sleep(np.random.randint(2, 4))

    button_url = driver.find_element(
        By.XPATH, "//button[contains(text(), 'Nutrition information')]"
    )

    time.sleep(np.random.randint(2, 5))

    driver.execute_script("arguments[0].scrollIntoView();", button_url)

    expanded = button_url.get_attribute("aria-expanded")

    print("start: ", expanded)

    count_failed_click = 0

    while expanded == "false":
        print(expanded)
        if count_failed_click % 4 == 0:
            print("failed 4 times in a row... Refreshing")
            time.sleep(np.random.randint(1, 2))
            driver.refresh()

        time.sleep(np.random.randint(1, 2))

        button_url = driver.find_element(
            By.XPATH, "//button[contains(text(), 'Nutrition information')]"
        )

        time.sleep(np.random.randint(2, 5))

        button_url.click()
        expanded = button_url.get_attribute("aria-expanded")

        count_failed_click += 1

    time.sleep(1)

    nutrition_facts_url = driver.find_element(
        By.CSS_SELECTOR, "div.recipe-nutrition.svelte-epeb0m"
    )
    driver.close()
    driver.quit()
    return nutrition_facts_url.text

In [6]:
import time

chrome_driver_path = (
    "/Users/man-top/Downloads/chromedriver_mac_arm64/chromedriver"
)

service = Service(chrome_driver_path)
options = webdriver.ChromeOptions()

options.add_argument("start-maximized")


# options.add_argument('--headless=new')

driver = webdriver.Chrome(service=service, options=options, keep_alive=True)


driver.get(f"https://{example_to_scrape}")

time.sleep(np.random.randint(2, 5))

driver.fullscreen_window()

time.sleep(np.random.randint(2, 5))

button_url = driver.find_element(
    By.XPATH, "//*[@id='mntl-nutrition-facts-label_1-0']/button/span[1]"
)

time.sleep(np.random.randint(2, 5))

driver.execute_script("arguments[0].scrollIntoView();", button_url)

# expanded = button_url.get_attribute('aria-expanded')

# print(expanded)

count_failed_click = 0

while expanded == "false":
    print(expanded)
    if count_failed_click % 4 == 0:
        print("failed 4 times in a row... Refreshing")
        time.sleep(np.random.randint(1, 2))
        driver.refresh()

    time.sleep(np.random.randint(1, 2))

    button_url = driver.find_element(
        By.XPATH, "//button[contains(text(), 'Nutrition information')]"
    )

    time.sleep(np.random.randint(2, 5))

    button_url.click()
    expanded = button_url.get_attribute("aria-expanded")

    count_failed_click += 1


time.sleep(1)

nutrition_facts_url = driver.find_element(
    By.CSS_SELECTOR, "div.recipe-nutrition.svelte-epeb0m"
)

print(nutrition_facts_url.text)


driver.close()
driver.quit()

false
false
failed 4 times in a row... Refreshing


NoSuchWindowException: Message: no such window: target window already closed
from unknown error: web view not found
  (Session info: chrome=114.0.5735.198)
Stacktrace:
0   chromedriver                        0x0000000102e83f48 chromedriver + 4226888
1   chromedriver                        0x0000000102e7c4f4 chromedriver + 4195572
2   chromedriver                        0x0000000102ac0d68 chromedriver + 281960
3   chromedriver                        0x0000000102a9aef8 chromedriver + 126712
4   chromedriver                        0x0000000102b1fe88 chromedriver + 671368
5   chromedriver                        0x0000000102b3245c chromedriver + 746588
6   chromedriver                        0x0000000102aeff1c chromedriver + 474908
7   chromedriver                        0x0000000102af0ef4 chromedriver + 478964
8   chromedriver                        0x0000000102e4559c chromedriver + 3970460
9   chromedriver                        0x0000000102e496f0 chromedriver + 3987184
10  chromedriver                        0x0000000102e4f5b4 chromedriver + 4011444
11  chromedriver                        0x0000000102e4a2fc chromedriver + 3990268
12  chromedriver                        0x0000000102e221c0 chromedriver + 3826112
13  chromedriver                        0x0000000102e66088 chromedriver + 4104328
14  chromedriver                        0x0000000102e661e0 chromedriver + 4104672
15  chromedriver                        0x0000000102e75f28 chromedriver + 4169512
16  libsystem_pthread.dylib             0x000000019d5fffa8 _pthread_start + 148
17  libsystem_pthread.dylib             0x000000019d5fada0 thread_start + 8


In [8]:
driver = setup_web_driver(headless=False)

scrape_nutrition_facts(driver=driver, url=example_to_scrape)

NameError: name 'scrape_nutrition_facts' is not defined

In [4]:
def scrape_all_recipes_nutrition_facts(url):
    print(f"\n {url}")
    driver = setup_web_driver(headless=True)

    driver.get(f"https://{url}")

    time.sleep(np.random.randint(1, 3))

    try:
        driver.find_element(By.ID, "onetrust-reject-all-handler").click()
    except:
        print("nop reject all detected!")

    time.sleep(np.random.randint(2, 5))

    try:
        button_url = driver.find_element(
            By.XPATH, "//*[@id='mntl-nutrition-facts-label_1-0']/button/span[1]"
        )

        time.sleep(np.random.randint(2, 5))

        driver.execute_script("arguments[0].scrollIntoView();", button_url)

        driver.execute_script("window.scrollBy(0,-100)", "")

        time.sleep(np.random.randint(1, 3))

        button_url.click()

        time.sleep(np.random.randint(2, 4))

        nutrition_facts_url = driver.find_element(
            By.XPATH, "//*[@id='mntl-nutrition-facts-label_1-0']/div"
        )

        return nutrition_facts_url.text
    except:
        print("this recipe is doomed!")

    driver.close()
    driver.quit()
    return np.nan

In [45]:
import time

chrome_driver_path = (
    "/Users/man-top/Downloads/chromedriver_mac_arm64/chromedriver"
)

service = Service(chrome_driver_path)
options = webdriver.ChromeOptions()

# options.add_argument('--headless=new')

driver = webdriver.Chrome(service=service, options=options, keep_alive=True)


driver.get(f"https://{example_to_scrape}")

time.sleep(np.random.randint(1, 3))


reject_all = driver.find_element(By.ID, "onetrust-reject-all-handler")
reject_all.click()

time.sleep(np.random.randint(2, 5))

button_url = driver.find_element(
    By.XPATH, "//*[@id='mntl-nutrition-facts-label_1-0']/button/span[1]"
)

time.sleep(np.random.randint(2, 5))

driver.execute_script("arguments[0].scrollIntoView();", button_url)

driver.execute_script("window.scrollBy(0,-100)", "")

time.sleep(np.random.randint(1, 3))

button_url.click()

time.sleep(np.random.randint(2, 4))

nutrition_facts_url = driver.find_element(
    By.XPATH, "//*[@id='mntl-nutrition-facts-label_1-0']/div"
)

print(nutrition_facts_url.text)

driver.close()
driver.quit()

Nutrition Facts
Servings Per Recipe 6
Calories 492
% Daily Value *
Total Fat 39g 50%
Saturated Fat 15g 74%
Cholesterol 224mg 75%
Sodium 123mg 5%
Total Carbohydrate 1g 0%
Dietary Fiber 0g 1%
Total Sugars 0g
Protein 32g
Vitamin C 3mg 16%
Calcium 14mg 1%
Iron 4mg 24%
Potassium 323mg 7%
* Percent Daily Values are based on a 2,000 calorie diet. Your daily values may be higher or lower depending on your calorie needs.
** Nutrient information is not available for all ingredients. Amount is based on available nutrient data.
(-) Information is not currently available for this nutrient. If you are following a medically restrictive diet, please consult your doctor or registered dietitian before preparing this recipe for personal consumption.
Powered by the ESHA Research Database © 2018, ESHA Research, Inc. All Rights Reserved


In [71]:
scrape_all_recipes_nutrition_facts(
    url="www.allrecipes.com/recipe/20770/quite-a-peachy-pour/"
)


 www.allrecipes.com/recipe/20770/quite-a-peachy-pour/
this recipe is doomed!


nan

In [9]:
all_recipes_source_df = filtered_data.query(" website == 'www.allrecipes.com' ")

nutrition_facts = []
for link in all_recipes_source_df.link.values:
    nutrition_facts.append(scrape_all_recipes_nutrition_facts(url=link))


 www.allrecipes.com/recipe/241895/deconstructed-screwdriver-the-raw-egg/


NameError: name 'time' is not defined

In [66]:
all_recipes_source_df.shape

(61398, 6)

In [76]:
len(nutrition_facts)

3779

In [78]:
nutrition_facts_series = pd.Series(
    data=nutrition_facts,
    index=all_recipes_source_df.index[: len(nutrition_facts)],
)
nutrition_facts_series.name = "Nutrition_facts_unstructured"
all_recipes_source_df = all_recipes_source_df.join(nutrition_facts_series)
all_recipes_source_df.to_csv("all_recipes_data_w_nutrition_facts.csv")

In [79]:
all_recipes_source_df.head()

Unnamed: 0,link,NER,website,duration,nutrition_facts,servings,Nutrition_facts_unstructured
0,www.allrecipes.com/recipe/241895/deconstructed...,"['orange juice', 'jiggers vodka']",www.allrecipes.com,,,,Nutrition Facts\nServings Per Recipe 1\nCalori...
1,www.allrecipes.com/recipe/20808/kettle-corn/,"['vegetable oil', 'white sugar', 'popcorn kern...",www.allrecipes.com,,,,Nutrition Facts\nServings Per Recipe 5\nCalori...
2,www.allrecipes.com/recipe/238733/pops-fabulous...,"['eggplants', 'green bell peppers', 'red bell ...",www.allrecipes.com,,,,Nutrition Facts\nServings Per Recipe 10\nCalor...
3,www.allrecipes.com/recipe/241254/chipotle-mang...,"['avocados', 'tomatoes', 'mango', 'cilantro', ...",www.allrecipes.com,,,,Nutrition Facts\nServings Per Recipe 20\nCalor...
4,www.allrecipes.com/recipe/261696/basic-homemad...,"['milk', 'heavy whipping cream', 'white vinega...",www.allrecipes.com,,,,Nutrition Facts\nServings Per Recipe 4\nCalori...


In [81]:
all_recipes_source_df.Nutrition_facts_unstructured.isnull().sum() / len(
    all_recipes_source_df
)

0.9400794814163328

In [82]:
len(nutrition_facts_series) / len(all_recipes_source_df)

0.06154923613147008

In [84]:
len(all_recipes_source_df), len(nutrition_facts_series)

(61398, 3779)

# continuation of scraping:

In [12]:
import pandas as pd
import numpy as np
import re
import time
import itertools
import requests
from bs4 import BeautifulSoup
from wordcloud import WordCloud

from rich import inspect
from tqdm import tqdm
from pprint import pprint

from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.common.exceptions import WebDriverException
from selenium.webdriver.common.action_chains import ActionChains

from selenium_stealth import stealth


import matplotlib.pyplot as plt

pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)

In [13]:
def scrape_all_recipes_nutrition_facts(url):
    print(f"\n {url}")
    driver = setup_web_driver(headless=True)

    driver.get(f"https://{url}")

    time.sleep(np.random.randint(1, 3))

    try:
        driver.find_element(By.ID, "onetrust-reject-all-handler").click()
    except:
        print("nop reject all detected!")

    time.sleep(np.random.randint(2, 3))

    try:
        button_url = driver.find_element(
            By.XPATH, "//*[@id='mntl-nutrition-facts-label_1-0']/button/span[1]"
        )

        time.sleep(np.random.randint(2, 3))

        driver.execute_script("arguments[0].scrollIntoView();", button_url)

        driver.execute_script("window.scrollBy(0,-100)", "")

        time.sleep(np.random.randint(1, 2))

        button_url.click()

        time.sleep(np.random.randint(1, 2))

        nutrition_facts_url = driver.find_element(
            By.XPATH, "//*[@id='mntl-nutrition-facts-label_1-0']/div"
        )

        return nutrition_facts_url.text
    except:
        print("this recipe is doomed!")

    driver.close()
    driver.quit()
    return np.nan

def setup_web_driver(headless: bool):
    chrome_driver_path = (
        "/Users/man-top/Downloads/chromedriver_mac_arm64/chromedriver"
    )

    service = Service(chrome_driver_path)
    options = webdriver.ChromeOptions()
    if headless:
        options.add_argument("--headless=new")

    driver = webdriver.Chrome(service=service, options=options, keep_alive=True)
    return driver

In [17]:

all_recipes_source_df = pd.read_csv("all_recipes_data_w_nutrition_facts.csv")
try:
    for link in tqdm(all_recipes_source_df.iloc[4543:].query(
        " Nutrition_facts_unstructured.isnull() "
    ).link.values):
        all_recipes_source_df.loc[
            all_recipes_source_df.query(f" link == '{link}' ").index,
            "Nutrition_facts_unstructured",
        ] = scrape_all_recipes_nutrition_facts(url=link)
except (KeyboardInterrupt, WebDriverException):
    all_recipes_source_df.to_csv("all_recipes_data_w_nutrition_facts.csv", index=False)


  0%|          | 0/56854 [00:00<?, ?it/s]


 www.allrecipes.com/recipe/230987/crisp-apples-with-citrus-dressing/


  0%|          | 1/56854 [00:10<161:45:58, 10.24s/it]


 www.allrecipes.com/recipe/16016/lentil-soup/


  0%|          | 2/56854 [00:15<111:49:07,  7.08s/it]

this recipe is doomed!

 www.allrecipes.com/recipe/18782/lamb-casserole/


  0%|          | 3/56854 [00:24<130:13:09,  8.25s/it]


 www.allrecipes.com/recipe/260651/mexican-tres-leches-cake-pastel-de-3-leches/


  0%|          | 4/56854 [00:33<136:07:11,  8.62s/it]


 www.allrecipes.com/recipe/69270/apricot-dessert/


  0%|          | 5/56854 [00:43<143:53:11,  9.11s/it]


 www.allrecipes.com/recipe/150942/moo-goo-gai-pan-ii/


  0%|          | 6/56854 [00:52<143:34:13,  9.09s/it]


 www.allrecipes.com/recipe/263428/circus-animal-cookies-with-icing/


  0%|          | 7/56854 [01:01<141:47:09,  8.98s/it]


 www.allrecipes.com/recipe/11932/fettuccini-with-basil-and-brie/


  0%|          | 8/56854 [01:11<146:48:00,  9.30s/it]


 www.allrecipes.com/recipe/169409/triple-fruit-drink/


  0%|          | 9/56854 [01:20<146:40:34,  9.29s/it]


 www.allrecipes.com/recipe/60723/grand-margarita/


  0%|          | 10/56854 [01:25<125:20:11,  7.94s/it]

this recipe is doomed!

 www.allrecipes.com/recipe/234863/ranchero-sauce/


  0%|          | 11/56854 [01:35<131:25:13,  8.32s/it]


 www.allrecipes.com/recipe/215354/melon-lime-cooler/


  0%|          | 12/56854 [01:44<135:24:56,  8.58s/it]


 www.allrecipes.com/recipe/264842/earl-greyhound/


  0%|          | 13/56854 [01:53<136:53:35,  8.67s/it]


 www.allrecipes.com/recipe/10844/apple-oatmeal-bars/


  0%|          | 14/56854 [02:02<138:41:19,  8.78s/it]


 www.allrecipes.com/recipe/245974/grilled-steak-summer-vegetable-rice/


  0%|          | 15/56854 [02:12<144:22:57,  9.14s/it]


 www.allrecipes.com/recipe/76175/cinnamon-bread-delight/


  0%|          | 16/56854 [02:21<147:27:35,  9.34s/it]


 www.allrecipes.com/recipe/241592/rainbow-veggie-pancakes-with-cottage-cheese/


  0%|          | 17/56854 [02:30<146:03:20,  9.25s/it]


 www.allrecipes.com/recipe/258043/chocolate-walnut-oatmeal/


  0%|          | 18/56854 [02:40<145:09:52,  9.19s/it]


 www.allrecipes.com/recipe/268476/double-chocolate-and-peppermint-ice-cream-sandwich-cookies/


  0%|          | 19/56854 [02:50<148:52:13,  9.43s/it]


 www.allrecipes.com/recipe/246398/ethans-hungry-mans-cast-iron-meatloaf/


  0%|          | 20/56854 [02:58<146:26:02,  9.28s/it]


 www.allrecipes.com/recipe/162056/sweet-potato-and-hazelnut-mashed-potatoes/


  0%|          | 21/56854 [03:08<149:34:38,  9.47s/it]


 www.allrecipes.com/recipe/244612/flat-iron-steak-marinade/


  0%|          | 22/56854 [03:17<147:21:41,  9.33s/it]


 www.allrecipes.com/recipe/76174/oatmeal-cream-cheese-patties/


  0%|          | 23/56854 [03:26<144:48:27,  9.17s/it]


 www.allrecipes.com/recipe/258341/grain-free-chicken-parm/


  0%|          | 24/56854 [03:36<149:47:13,  9.49s/it]


 www.allrecipes.com/recipe/74097/georgia-peach-pie/


  0%|          | 25/56854 [03:45<147:20:10,  9.33s/it]


 www.allrecipes.com/recipe/269381/chewy-sunflower-butter-cookies/


  0%|          | 26/56854 [03:55<147:36:44,  9.35s/it]


 www.allrecipes.com/recipe/258008/barilla-gluten-free-elbows-pasta-salad-with-yellow-cherry-tomatoes-fresh-oregano-baby-mozzarella/


  0%|          | 27/56854 [04:05<153:09:55,  9.70s/it]


 www.allrecipes.com/recipe/14694/cuban-pork-roast-i/


  0%|          | 28/56854 [04:14<150:04:01,  9.51s/it]


 www.allrecipes.com/recipe/223356/french-veggie-loaf/


  0%|          | 29/56854 [04:25<154:06:01,  9.76s/it]


 www.allrecipes.com/recipe/9816/anzac-biscuits-i/


  0%|          | 30/56854 [04:34<151:14:18,  9.58s/it]


 www.allrecipes.com/recipe/238197/amish-peach-dumplings/


  0%|          | 31/56854 [04:43<149:13:10,  9.45s/it]


 www.allrecipes.com/recipe/239928/greek-style-potatoes/


  0%|          | 32/56854 [04:53<152:09:01,  9.64s/it]


 www.allrecipes.com/recipe/17799/trinidad-sweetbread/


  0%|          | 33/56854 [05:03<152:00:28,  9.63s/it]


 www.allrecipes.com/recipe/213185/dees-sexy-spicy-shrimp-sausage-and-peppers/


  0%|          | 34/56854 [05:13<152:48:52,  9.68s/it]


 www.allrecipes.com/recipe/259042/grilled-flat-iron-steak/


  0%|          | 35/56854 [05:21<148:48:21,  9.43s/it]


 www.allrecipes.com/recipe/242065/white-russian-pudding-shot/


  0%|          | 36/56854 [05:31<151:15:02,  9.58s/it]


 www.allrecipes.com/recipe/241373/melt-in-your-mouth-beef-cacciatore/


  0%|          | 37/56854 [05:41<152:11:42,  9.64s/it]


 www.allrecipes.com/recipe/254487/holiday-turkey-brine/


  0%|          | 38/56854 [05:50<148:27:09,  9.41s/it]


 www.allrecipes.com/recipe/91914/ts-sweet-potato-fries/


  0%|          | 39/56854 [06:00<152:07:43,  9.64s/it]


 www.allrecipes.com/recipe/216238/quick-and-easy-sicilian-meatloaf/


  0%|          | 40/56854 [06:10<153:32:14,  9.73s/it]


 www.allrecipes.com/recipe/85988/pea-and-avocado-salad/


  0%|          | 41/56854 [06:20<154:12:46,  9.77s/it]


 www.allrecipes.com/recipe/11418/pumpkin-pie-bars/


  0%|          | 42/56854 [06:29<149:40:25,  9.48s/it]


 www.allrecipes.com/recipe/239076/nutella-brownies/


  0%|          | 43/56854 [06:39<152:18:31,  9.65s/it]


 www.allrecipes.com/recipe/231783/tao-hummus/


  0%|          | 44/56854 [06:48<150:26:38,  9.53s/it]


 www.allrecipes.com/recipe/220283/halibut-olympia/


  0%|          | 45/56854 [06:59<155:36:48,  9.86s/it]


 www.allrecipes.com/recipe/7899/banana-oatmeal-crumb-cake/


  0%|          | 46/56854 [07:07<150:10:16,  9.52s/it]


 www.allrecipes.com/recipe/239768/sausage-stuffed-crescent-cornucopias/


  0%|          | 47/56854 [07:16<147:02:06,  9.32s/it]


 www.allrecipes.com/recipe/9101/cranberry-cherry-pie/


  0%|          | 48/56854 [07:27<152:02:57,  9.64s/it]


 www.allrecipes.com/recipe/165888/coconut-almond-mocha-macaroons/


  0%|          | 49/56854 [07:35<148:25:47,  9.41s/it]


 www.allrecipes.com/recipe/245179/pumpkin-pie-cupcakes/


  0%|          | 50/56854 [07:40<127:32:53,  8.08s/it]

this recipe is doomed!

 www.allrecipes.com/recipe/25404/toads-in-the-hole-ii/


  0%|          | 51/56854 [07:50<135:54:59,  8.61s/it]


 www.allrecipes.com/recipe/24406/swinks-chili/


  0%|          | 52/56854 [08:00<142:37:38,  9.04s/it]


 www.allrecipes.com/recipe/231429/authentic-mexican-restaurant-style-salsa/


  0%|          | 53/56854 [08:10<146:43:05,  9.30s/it]


 www.allrecipes.com/recipe/257715/blood-orange-and-pineapple-muffins/


  0%|          | 54/56854 [08:19<143:41:40,  9.11s/it]


 www.allrecipes.com/recipe/46214/heart-chop-suey/


  0%|          | 55/56854 [08:29<146:37:38,  9.29s/it]


 www.allrecipes.com/recipe/13321/shank-beef-soup/


  0%|          | 56/56854 [08:38<145:03:31,  9.19s/it]


 www.allrecipes.com/recipe/92359/bat-cupcakes/


  0%|          | 57/56854 [08:48<148:27:32,  9.41s/it]


 www.allrecipes.com/recipe/181298/japanese-fusion-guacamole/


  0%|          | 58/56854 [08:57<150:44:28,  9.55s/it]


 www.allrecipes.com/recipe/217978/edens-nectar-bourbon-chicken/


  0%|          | 59/56854 [09:08<153:21:13,  9.72s/it]


 www.allrecipes.com/recipe/260128/slow-cooker-buffalo-chicken/


  0%|          | 60/56854 [09:17<153:59:43,  9.76s/it]


 www.allrecipes.com/recipe/222969/butterscotch-monkey-bread/


  0%|          | 61/56854 [09:27<154:28:24,  9.79s/it]


 www.allrecipes.com/recipe/19210/oatmeal-raisin-cookies-vii/


  0%|          | 62/56854 [09:37<155:07:24,  9.83s/it]


 www.allrecipes.com/recipe/143135/yoyos-bbq-beans/


  0%|          | 63/56854 [09:47<156:32:49,  9.92s/it]


 www.allrecipes.com/recipe/265804/chickpea-curry-with-carrots/


  0%|          | 64/56854 [09:57<156:33:02,  9.92s/it]


 www.allrecipes.com/recipe/145846/maple-walnut-quick-bread/


  0%|          | 65/56854 [10:07<156:47:10,  9.94s/it]


 www.allrecipes.com/recipe/256708/pecan-pie-bites/


  0%|          | 66/56854 [10:16<152:19:54,  9.66s/it]


 www.allrecipes.com/recipe/221112/baked-cauliflower-casserole/


  0%|          | 67/56854 [10:26<153:45:42,  9.75s/it]


 www.allrecipes.com/recipe/246503/best-bulgoki-korean-barbeque-beef/


  0%|          | 68/56854 [10:36<153:34:18,  9.74s/it]


 www.allrecipes.com/recipe/20116/peach-clouds/


  0%|          | 69/56854 [10:45<148:42:09,  9.43s/it]


 www.allrecipes.com/recipe/230119/quinoa-stuffing/


  0%|          | 70/56854 [10:53<145:22:59,  9.22s/it]


 www.allrecipes.com/recipe/229311/ultimate-baked-french-fries/


  0%|          | 71/56854 [11:02<144:16:18,  9.15s/it]


 www.allrecipes.com/recipe/215069/tasteful-tahini-salad-dressing/


  0%|          | 72/56854 [11:11<142:57:06,  9.06s/it]


 www.allrecipes.com/recipe/232097/turkey-tetrazzini-a-la-stouffers/


  0%|          | 73/56854 [11:21<147:01:08,  9.32s/it]


 www.allrecipes.com/recipe/231074/philly-shrimp-cocktail-dip/


  0%|          | 74/56854 [11:31<149:13:18,  9.46s/it]


 www.allrecipes.com/recipe/212662/turkey-breast-roulade-with-apple-and-raisin-stuffing/


  0%|          | 75/56854 [11:40<146:08:10,  9.27s/it]


 www.allrecipes.com/recipe/256480/caramel-pancake-syrup/


  0%|          | 76/56854 [11:49<144:25:03,  9.16s/it]


 www.allrecipes.com/recipe/235268/potato-latkes-with-caramelized-pears-goat-cheese-and-sherry-vinegar-drizzle/


  0%|          | 77/56854 [11:59<148:16:51,  9.40s/it]


 www.allrecipes.com/recipe/244484/pork-caesar-salad-from-smithfield/


  0%|          | 78/56854 [12:09<152:23:34,  9.66s/it]


 www.allrecipes.com/recipe/16153/carrot-pudding/


  0%|          | 79/56854 [12:18<148:30:07,  9.42s/it]


 www.allrecipes.com/recipe/9793/anise-biscotti/


  0%|          | 80/56854 [12:27<146:02:21,  9.26s/it]


 www.allrecipes.com/recipe/239838/coconut-cashew-cookies/


  0%|          | 81/56854 [12:36<148:12:13,  9.40s/it]


 www.allrecipes.com/recipe/240841/sweet-potato-banana-smoothie/


  0%|          | 82/56854 [12:46<150:41:40,  9.56s/it]


 www.allrecipes.com/recipe/264724/mixed-berries-smoothie/


  0%|          | 83/56854 [12:52<131:58:03,  8.37s/it]

this recipe is doomed!

 www.allrecipes.com/recipe/257415/edible-chocolate-chip-cookie-dough/


  0%|          | 84/56854 [13:01<133:57:25,  8.49s/it]


 www.allrecipes.com/recipe/25429/star-fruit-steak/


  0%|          | 85/56854 [13:09<135:36:52,  8.60s/it]


 www.allrecipes.com/recipe/255694/meatless-buffalo-dip/


  0%|          | 86/56854 [13:19<142:27:44,  9.03s/it]


 www.allrecipes.com/recipe/219922/wendys-zucchini-bread/


  0%|          | 87/56854 [13:29<143:20:09,  9.09s/it]


 www.allrecipes.com/recipe/237622/banana-crush/


In [None]:
all_recipes_source_df.query("link=='www.allrecipes.com/recipe/255995/gourmet-chocolate-covered-cherry-jell-o-shots/'")

Unnamed: 0.1,Unnamed: 0,link,NER,website,duration,nutrition_facts,servings,Nutrition_facts_unstructured
4027,4027,www.allrecipes.com/recipe/255995/gourmet-choco...,"['gelatin', 'boiling water', 'vodka', 'chocola...",www.allrecipes.com,,,,Nutrition Facts\nServings Per Recipe 12\nCalor...


In [16]:
all_recipes_source_df.query("Nutrition_facts_unstructured.notnull()").index[-1]

4543