Scrapping food recipes

All recipes from 'https://www.allrecipes.com/'

# Imports

In [1]:
import requests as req
from bs4 import BeautifulSoup
import time
import pandas as pd

In [2]:
#from flask import Flask
#from flask_sqlalchemy import SQLAlchemy

from sqlalchemy import create_engine
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
from sqlalchemy import Boolean, Column, ForeignKey, Integer, String

from fastapi import Depends, FastAPI, HTTPException

# Scrapping

In [3]:
url = 'https://www.allrecipes.com/'

In [4]:
main_page = req.get(url)

In [12]:
main_page_sp = BeautifulSoup(main_page.text, 'lxml')

In [19]:
def get_recipes():
    recipe_dict = {}
    for recipe in recipes:
        #time.sleep(2)
        try:
            recipe_req = req.get(recipe)
            if recipe_req.status_code == 200:
                soup_recipe = BeautifulSoup(recipe_req.text, 'lxml')

                # Recipe Title
                title = soup_recipe.find('div',attrs = {'class':'headline-wrapper'}).text

                #general information
                general_info = soup_recipe.find_all('div',attrs = {'class':'recipe-meta-item'})
                items_info  = {title.text.split(':')[0]: title.text.split(':')[1] for title in general_info}

                #ingredients
                ingredients_list = soup_recipe.find_all('li',attrs = {'class':'ingredients-item'})
                ingredients = [ingredient.text for ingredient in ingredients_list]

                #directions
                directions_list = soup_recipe.find_all('li',attrs = {'class':'instructions-section-item'})
                directions = [direction.text for direction in directions_list]

                #nutrition facts
                nutrition_fact = soup_recipe.find('div', attrs = {'class':'recipeNutritionSectionBlock'}).text

                recipe_dict[title] = {'general information': items_info,
                                      'ingredients': ingredients,
                                      'directions / steps' : directions,
                                      'nutritional information':nutrition_fact}
            else:
                print('error with code', recipe.status_code)
        except Exception as error:
            print('error', error)
        
    return recipe_dict
    

In [20]:
group_recipes = main_page_sp.find_all('div', attrs = {"class":"category-page-list"})

In [21]:
recipes = []
for group in group_recipes:
    for recipe_card in group.find_all('div',attrs = {'class':'card'}):
        recipes.append(recipe_card.a['href'])


In [22]:
recipe_dict = get_recipes()


In [23]:
recipe_dict

{'Bananas Foster Belgian Waffles  ': {'general information': {' prep': ' 25 mins  ',
   ' cook': ' 15 mins  ',
   ' total': ' 40 mins  ',
   'Servings': ' 4  ',
   'Yield': ' 4 waffles  '},
  'ingredients': ['    1\u2009⅓ cups all-purpose flour    ',
   '    ¾ teaspoon baking soda    ',
   '    2 teaspoons white sugar    ',
   '    ¼ teaspoon salt    ',
   '    3 eggs    ',
   '    1\u2009½ teaspoons vanilla extract    ',
   '    1\u2009⅓ cups milk    ',
   '    ⅓ cup melted butter    ',
   '    2 teaspoons baking powder    ',
   '    ¼ cup butter    ',
   '    ⅔ cup brown sugar    ',
   '    2 teaspoons rum flavored extract    ',
   '    2 teaspoons vanilla extract    ',
   '    ½ teaspoon ground cinnamon    ',
   '    ¼ cup whole pecans    ',
   "    ½ cup pancake syrup (i.e. Mrs. Butterworth's®)    ",
   '    3 bananas, cut into 1/2 inch slices    ',
   '    1 cup heavy cream    ',
   '    ¼ teaspoon vanilla extract    ',
   "    1 tablespoon confectioners' sugar    "],
  'direction

# Database creation

In [24]:
data = pd.DataFrame.from_dict(recipe_dict)

In [25]:
data

Unnamed: 0,Bananas Foster Belgian Waffles,Baked Chicken and Sausage Gumbo,Air Fryer Beignets,Easy Mini King Cakes,Go-To Crawfish Etouffee,Air Fryer Hush Puppies,Cinnamon Swirl French Toast Casserole,Honey-Lime Chicken,Scottish Cock-a-Leekie Soup,Cheesy and Creamy Chicken Tetrazzini,...,Miso Soup,The Best Meatloaf I've Ever Made,Quick Tartar Sauce,Creamy Meatball Sub Casserole,Janet's Rich Banana Bread,Dumplings,Tres Leches (Milk Cake),Banana Pancakes I,Roasted Pork Loin,Three Ingredient Peanut Butter Cookies
general information,"{' prep': ' 25 mins ', ' cook': ' 15 mins ',...","{' prep': ' 35 mins ', ' cook': ' 1 hr 50 min...","{' prep': ' 10 mins ', ' cook': ' 15 mins ',...","{' prep': ' 15 mins ', ' cook': ' 10 mins ',...","{' prep': ' 20 mins ', ' cook': ' 20 mins ',...","{' prep': ' 10 mins ', ' cook': ' 10 mins ',...","{' prep': ' 15 mins ', ' cook': ' 50 mins ',...","{' prep': ' 10 mins ', ' cook': ' 10 mins ',...","{' prep': ' 30 mins ', ' cook': ' 3 hrs 40 mi...","{' prep': ' 35 mins ', ' cook': ' 40 mins ',...",...,"{' prep': ' 5 mins ', ' cook': ' 15 mins ', ...","{' prep': ' 15 mins ', ' cook': ' 1 hr 10 min...","{' prep': ' 5 mins ', ' total': ' 5 mins ', ...","{' prep': ' 15 mins ', ' cook': ' 45 mins ',...","{' prep': ' 10 mins ', ' cook': ' 1 hr ', ' ...","{' prep': ' 5 mins ', ' cook': ' 15 mins ', ...","{' prep': ' 15 mins ', ' cook': ' 30 mins ',...","{' prep': ' 5 mins ', ' cook': ' 10 mins ', ...","{' prep': ' 20 mins ', ' cook': ' 1 hr ', ' ...","{'Servings': ' 6 ', 'Yield': ' 1 dozen '}"
ingredients,"[ 1 ⅓ cups all-purpose flour , ¾ tea...","[ 1 cup all-purpose flour , ¼ cup ve...","[ cooking spray , ½ cup all-purpose...","[ cooking spray , ½ cup brown sugar...","[ ½ cup butter , 1 onion, diced ,...","[ nonfat cooking spray , 1 cup yell...","[ 1 stick butter, or as needed , 1 (...","[ ⅓ cup all-purpose flour , ¼ teaspo...","[ 2 ½ pounds leeks , 1 (4 pound) who...","[ cooking spray , ½ pound fettuccin...",...,"[ 2 teaspoons dashi granules , 4 cup...","[ 1 tablespoon butter , ¼ cup minced...","[ 1 cup mayonnaise , 2 teaspoons swe...","[ 1 pound ground beef , ⅓ cup choppe...","[ ½ cup butter, melted , 1 cup white...","[ 1 cup all-purpose flour , 2 teaspo...","[ 1 ½ cups all-purpose flour , 1 tea...","[ 1 cup all-purpose flour , 1 tables...","[ 3 cloves garlic, minced , 1 tables...","[ 1 cup peanut butter , 1 cup white ..."
directions / steps,[ Step 1 Preheat a Belgium waffle iron. W...,[ Step 1 Preheat oven to 400 degrees F (2...,[ Step 1 Preheat air fryer to 370 degrees...,[ Step 1 Preheat the oven to 360 degrees ...,"[ Step 1 Melt the butter in a large, heav...",[ Step 1 Preheat an air fryer to 390 degr...,[ Step 1 Butter 6 slices of cinnamon swir...,[ Step 1 Combine flour and cayenne pepper...,[ Step 1 Cut off the leek tops (the dark ...,[ Step 1 Preheat oven to 350 degrees F (1...,...,[ Step 1 In a medium saucepan over medium...,[ Step 1 Preheat oven to 350 degrees F (1...,"[ Step 1 Stir the mayonnaise, relish, mus...",[ Step 1 Preheat oven to 400 degrees F (2...,[ Step 1 Preheat oven to 350 degrees F (1...,"[ Step 1 Stir together flour, baking powd...",[ Step 1 Preheat oven to 350 degrees F (1...,"[ Step 1 Combine flour, white sugar, baki...",[ Step 1 Preheat oven to 350 degrees F (1...,[ Step 1 Preheat oven to 350 degrees F (1...
nutritional information,Per Serving: 1096 calories; protein 14.9g; c...,Per Serving: 758 calories; protein 40.8g; ca...,Per Serving: 88 calories; protein 1.8g; carb...,Per Serving: 618 calories; fat 23.7g; choles...,Per Serving: 236 calories; protein 18.8g; ca...,Per Serving: 85 calories; protein 2.8g; carb...,Per Serving: 368 calories; protein 10.9g; ca...,Per Serving: 367 calories; protein 29.9g; ca...,Per Serving: 810 calories; fat 46.2g; choles...,Per Serving: 478 calories; protein 23.6g; ca...,...,Per Serving: 63 calories; protein 5.5g; carb...,Per Serving: 213 calories; protein 17.9g; ca...,Per Serving: 200 calories; protein 0.3g; car...,Per Serving: 452 calories; protein 19.4g; ca...,Per Serving: 218 calories; protein 3.2g; car...,Per Serving: 105 calories; protein 2.8g; car...,Per Serving: 280 calories; protein 5.5g; car...,Per Serving: 193 calories; protein 5g; carbo...,Per Serving: 238 calories; protein 18.4g; ca...,Per Serving: 394 calories; protein 11.8g; ca...


## Definitions

In [54]:
SQLALCHEMY_DATABASE_URL = 'postgresql+psycopg2://postgres:1234@0.0.0.0:5432/food_web_scrapping' 
engine = create_engine(SQLALCHEMY_DATABASE_URL)
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
Base = declarative_base()

In [55]:
class Recipe(Base):
    __tablename__ = "recipe"    
    
    id = Column(Integer, primary_key=True, index=True)
    name = Column(String, unique=True)   

In [56]:
class Ingredients(Base):
    __tablename__ = "ingridients"
    ingredients_id = Column(Integer, primary_key=True, index=True)
    ingridients = Column(String)    
    recipe_id = Column(Integer, ForeignKey("recipe.id"))

In [57]:
class Directions(Base):
    __tablename__ = "directions"
    directions_id = Column(Integer, primary_key=True, index=True)
    directions = Column(String)
    recipe_id = Column(Integer, ForeignKey("recipe.id"))

In [58]:
class NutritionalInfo(Base):
    __tablename__ = "nutritional_info"
    nutritional_info_id = Column(Integer, primary_key=True, index=True)
    information = Column(String)
    recipe_id = Column(Integer, ForeignKey("recipe.id"))

In [59]:
app = FastAPI()

In [60]:
def get_db():
    db = SessionLocal()
    try:
        yield db
    finally:
        db.close()

## Functions

In [61]:
def get_id_by_recipe_name(name):
    return db.query(Recipe).filter(Recipe.name == name).first()

In [62]:
def ingest_data_from_df(dataframe):    
    for recipe_name in dataframe:
        recipe_id = get_id_by_recipe_name(recipe_name)
        if not recipe_id:
            db_recipe = Recipe(name=recipe_name) 
            db.add(db_recipe)
            db.commit()
            db.refresh(db_recipe)
            recipe_id_ = get_id_by_recipe_name(recipe_name).id
            db_ingredients = Ingredients(ingridients=dataframe[recipe_name]['ingredients'], recipe_id=recipe_id_)
            db_directions  = Directions(directions=dataframe[recipe_name]['directions / steps'], recipe_id=recipe_id_)
            db_nutritional = NutritionalInfo(information=dataframe[recipe_name]['nutritional information'], recipe_id=recipe_id_)
            db.add(db_ingredients)
            db.add(db_directions)
            db.add(db_nutritional)
            db.commit()
            db.refresh(db_ingredients)
            db.refresh(db_directions)
            db.refresh(db_nutritional)
    db.close()    

## Ingesting and creating

In [63]:
Base.metadata.drop_all(bind=engine)
Base.metadata.create_all(bind=engine)

In [64]:
db = SessionLocal()

In [65]:
ingest_data_from_df(data)

In [66]:
db_recipe = Recipe(name = 'Probando el query') 
db.add(db_recipe)
db.commit()
db.refresh(db_recipe)
db.close()