Scrapping food recipes

All recipes from 'https://www.allrecipes.com/'

# Imports

In [3]:
import requests as req
from bs4 import BeautifulSoup
import time
import pandas as pd

In [4]:
#from flask import Flask
#from flask_sqlalchemy import SQLAlchemy

from sqlalchemy import create_engine
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
from sqlalchemy import Boolean, Column, ForeignKey, Integer, String

from fastapi import Depends, FastAPI, HTTPException

# Scrapping

In [3]:
url = 'https://www.allrecipes.com/'

In [4]:
main_page = req.get(url)

In [5]:
main_page_sp = BeautifulSoup(main_page.text, 'lxml')

In [6]:
def get_recipes():
    recipe_dict = {}
    for recipe in recipes:
        #time.sleep(2)
        try:
            recipe_req = req.get(recipe)
            if recipe_req.status_code == 200:
                soup_recipe = BeautifulSoup(recipe_req.text, 'lxml')

                # Recipe Title
                title = soup_recipe.find('div',attrs = {'class':'headline-wrapper'}).text

                #general information
                general_info = soup_recipe.find_all('div',attrs = {'class':'recipe-meta-item'})
                items_info  = {title.text.split(':')[0]: title.text.split(':')[1] for title in general_info}

                #ingredients
                ingredients_list = soup_recipe.find_all('li',attrs = {'class':'ingredients-item'})
                ingredients = [ingredient.text for ingredient in ingredients_list]

                #directions
                directions_list = soup_recipe.find_all('li',attrs = {'class':'instructions-section-item'})
                directions = [direction.text for direction in directions_list]

                #nutrition facts
                nutrition_fact = soup_recipe.find('div', attrs = {'class':'recipeNutritionSectionBlock'}).text

                recipe_dict[title] = {'general information': items_info,
                                      'ingredients': ingredients,
                                      'directions / steps' : directions,
                                      'nutritional information':nutrition_fact}
            else:
                print('error with code', recipe.status_code)
        except Exception as error:
            print('error', error)
        
    return recipe_dict
    

In [7]:
group_recipes = main_page_sp.find_all('div', attrs = {"class":"category-page-list"})

In [8]:
recipes = []
for group in group_recipes:
    for recipe_card in group.find_all('div',attrs = {'class':'card'}):
        recipes.append(recipe_card.a['href'])


In [9]:
recipe_dict = get_recipes()


In [10]:
recipe_dict

{'Shrimp and Pineapple Stir-Fry  ': {'general information': {' prep': ' 20 mins  ',
   ' cook': ' 10 mins  ',
   ' total': ' 30 mins  ',
   'Servings': ' 4  ',
   'Yield': ' 4 servings  '},
  'ingredients': ['    ¼ cup pineapple juice    ',
   '    ¼ cup hoisin sauce    ',
   '    ¼ cup low-sodium soy sauce    ',
   '    1 tablespoon sherry    ',
   '    1 tablespoon cornstarch    ',
   '    2 tablespoons vegetable oil    ',
   '    2 teaspoons sesame oil    ',
   '    1 pound large shrimp, peeled and deveined    ',
   '    2 cups pineapple chunks, fresh or canned    ',
   '    1 medium red bell pepper, cored and sliced vertically    ',
   '    1 medium onion, vertically sliced    ',
   '    ½ cup snow peas    ',
   '    2 teaspoons minced garlic    ',
   '    ¼ teaspoon crushed red pepper flakes, or more to taste    ',
   '    2 medium scallions, sliced diagonally    ',
   '    1 teaspoon sesame seeds    '],
  'directions / steps': ['    Step 1   Whisk pineapple juice, hoisin sauce, s

# Data base creation

In [11]:
data = pd.DataFrame.from_dict(recipe_dict)

In [12]:
data

Unnamed: 0,Shrimp and Pineapple Stir-Fry,Cinnamon Swirl French Toast Casserole,Scottish Cock-a-Leekie Soup,Easy Mini King Cakes,Air Fryer Hamburger Patties,Honey-Lime Chicken,Cheesy and Creamy Chicken Tetrazzini,Go-To Crawfish Etouffee,Air Fryer Hush Puppies,Mississippi Roast - Slow Cooker Pepperoncini Pot Roast,...,Miso Soup,Slow Cooker Funeral Potatoes (Hash Brown Casserole),Fabulous Fried Cabbage,Favorite Garlic Noodles,Funeral Potatoes,Simple Roasted Butternut Squash,Quick Tartar Sauce,Buffalo Chicken Dip,Dumplings,Tres Leches (Milk Cake)
general information,"{' prep': ' 20 mins ', ' cook': ' 10 mins ',...","{' prep': ' 15 mins ', ' cook': ' 50 mins ',...","{' prep': ' 30 mins ', ' cook': ' 3 hrs 40 mi...","{' prep': ' 15 mins ', ' cook': ' 10 mins ',...","{' prep': ' 10 mins ', ' cook': ' 10 mins ',...","{' prep': ' 10 mins ', ' cook': ' 10 mins ',...","{' prep': ' 35 mins ', ' cook': ' 40 mins ',...","{' prep': ' 20 mins ', ' cook': ' 20 mins ',...","{' prep': ' 10 mins ', ' cook': ' 10 mins ',...","{' prep': ' 10 mins ', ' cook': ' 8 hrs ', '...",...,"{' prep': ' 5 mins ', ' cook': ' 15 mins ', ...","{' prep': ' 10 mins ', ' cook': ' 3 hrs ', '...","{' prep': ' 5 mins ', ' cook': ' 45 mins ', ...","{' prep': ' 5 mins ', ' cook': ' 20 mins ', ...","{' prep': ' 5 mins ', ' cook': ' 10 mins ', ...","{' prep': ' 15 mins ', ' cook': ' 25 mins ',...","{' prep': ' 5 mins ', ' total': ' 5 mins ', ...","{' prep': ' 5 mins ', ' cook': ' 40 mins ', ...","{' prep': ' 5 mins ', ' cook': ' 15 mins ', ...","{' prep': ' 15 mins ', ' cook': ' 30 mins ',..."
ingredients,"[ ¼ cup pineapple juice , ¼ cup hois...","[ 1 stick butter, or as needed , 1 (...","[ 2 ½ pounds leeks , 1 (4 pound) who...","[ cooking spray , ½ cup brown sugar...","[ 1 pound 80% lean ground beef , 2 t...","[ ⅓ cup all-purpose flour , ¼ teaspo...","[ cooking spray , ½ pound fettuccin...","[ ½ cup butter , 1 onion, diced ,...","[ nonfat cooking spray , 1 cup yell...","[ 1 (4 pound) beef chuck roast , ¼ c...",...,"[ 2 teaspoons dashi granules , 4 cup...","[ 1 bag (32 oz.) frozen diced potatoes ,...","[ 2 teaspoons butter , 1 (15 ounce) ...","[ 1 (8 ounce) package egg noodles , ...",[ 1 (16 ounce) jar process cheese sauce ...,"[ 1 butternut squash - peeled, seeded, and ...","[ 1 cup mayonnaise , 2 teaspoons swe...","[ 2 (10 ounce) cans chunk chicken, drained ...","[ 1 cup all-purpose flour , 2 teaspo...","[ 1 ½ cups all-purpose flour , 1 tea..."
directions / steps,"[ Step 1 Whisk pineapple juice, hoisin sa...",[ Step 1 Butter 6 slices of cinnamon swir...,[ Step 1 Cut off the leek tops (the dark ...,[ Step 1 Preheat the oven to 360 degrees ...,[ Step 1 Preheat an air fryer to 400 degr...,[ Step 1 Combine flour and cayenne pepper...,[ Step 1 Preheat oven to 350 degrees F (1...,"[ Step 1 Melt the butter in a large, heav...",[ Step 1 Preheat an air fryer to 390 degr...,[ Step 1 Place roast in a slow cooker. Fo...,...,[ Step 1 In a medium saucepan over medium...,"[ Step 1 Combine frozen potatoes, soup, o...",[ Step 1 Bring the butter and chicken bro...,[ Step 1 Fill a large pot with lightly sa...,[ Step 1 Preheat oven to 325 degrees F (1...,[ Step 1 Preheat oven to 400 degrees F (2...,"[ Step 1 Stir the mayonnaise, relish, mus...",[ Step 1 Heat chicken and hot sauce in a ...,"[ Step 1 Stir together flour, baking powd...",[ Step 1 Preheat oven to 350 degrees F (1...
nutritional information,Per Serving: 306 calories; protein 21.6g; ca...,Per Serving: 368 calories; protein 10.9g; ca...,Per Serving: 810 calories; fat 46.2g; choles...,Per Serving: 618 calories; fat 23.7g; choles...,Per Serving: 279 calories; protein 23.1g; fa...,Per Serving: 367 calories; protein 29.9g; ca...,Per Serving: 478 calories; protein 23.6g; ca...,Per Serving: 236 calories; protein 18.8g; ca...,Per Serving: 85 calories; protein 2.8g; carb...,Per Serving: 537 calories; protein 36.5g; ca...,...,Per Serving: 63 calories; protein 5.5g; carb...,Per Serving: 337 calories; protein 10.2g; ca...,Per Serving: 66 calories; protein 2.9g; carb...,Per Serving: 386 calories; protein 9.6g; car...,Per Serving: 418 calories; protein 10.7g; ca...,Per Serving: 177 calories; protein 2.6g; car...,Per Serving: 200 calories; protein 0.3g; car...,Per Serving: 284 calories; protein 11.1g; ca...,Per Serving: 105 calories; protein 2.8g; car...,Per Serving: 280 calories; protein 5.5g; car...


## Definitions

In [13]:
SQLALCHEMY_DATABASE_URL = 'postgresql+psycopg2://postgres:1234@0.0.0.0:5432/food_web_scrapping' 
engine = create_engine(SQLALCHEMY_DATABASE_URL)
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
Base = declarative_base()

  """)


In [14]:
class Recipe(Base):
    __tablename__ = "recipe"    
    
    id = Column(Integer, primary_key=True, index=True)
    name = Column(String, unique=True)   

In [15]:
class Ingredients(Base):
    __tablename__ = "ingridients"
    ingredients_id = Column(Integer, primary_key=True, index=True)
    ingridients = Column(String)    
    recipe_id = Column(Integer, ForeignKey("recipe.id"))

In [16]:
class Directions(Base):
    __tablename__ = "directions"
    directions_id = Column(Integer, primary_key=True, index=True)
    directions = Column(String)
    recipe_id = Column(Integer, ForeignKey("recipe.id"))

In [17]:
class NutritionalInfo(Base):
    __tablename__ = "nutritional_info"
    nutritional_info_id = Column(Integer, primary_key=True, index=True)
    information = Column(String)
    recipe_id = Column(Integer, ForeignKey("recipe.id"))

In [21]:
app = FastAPI()

In [None]:
def get_db():
    db = SessionLocal()
    try:
        yield db
    finally:
        db.close()

## Functions

In [51]:
def get_id_by_recipe_name(name):
    return db.query(Recipe).filter(Recipe.name == name).first()

In [52]:
def ingest_data_from_df(dataframe):    
    for recipe_name in dataframe:
        recipe_id = get_id_by_recipe_name(recipe_name)
        if not recipe_id:
            db_recipe = Recipe(name=recipe_name) 
            db.add(db_recipe)
            db.commit()
            db.refresh(db_recipe)
            recipe_id_ = get_id_by_recipe_name(recipe_name).id
            db_ingredients = Ingredients(ingridients=dataframe[recipe_name]['ingredients'], recipe_id=recipe_id_)
            db_directions  = Directions(directions=dataframe[recipe_name]['directions / steps'], recipe_id=recipe_id_)
            db_nutritional = NutritionalInfo(information=dataframe[recipe_name]['nutritional information'], recipe_id=recipe_id_)
            db.add(db_ingredients)
            db.add(db_directions)
            db.add(db_nutritional)
            db.commit()
            db.refresh(db_ingredients)
            db.refresh(db_directions)
            db.refresh(db_nutritional)
    db.close()    

## Ingesting and creating

In [None]:
Base.metadata.drop_all(bind=engine)
Base.metadata.create_all(bind=engine)

In [None]:
db = SessionLocal()

In [53]:
ingest_data_from_df(data)

In [None]:
#db_recipe = Recipe(name = 'Probando el query') 
#db.add(db_recipe)
#db.commit()
#db.refresh(db_recipe)
#db.close()