<a href="https://colab.research.google.com/github/sugarforever/awesome-langchain-applications/blob/main/01_AI_Recipe_Parser.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# AI Recipe Parser

Extract ingredients from recipes like https://www.jamieoliver.com/recipes/pasta-recipes/smoked-salmon-pasta/

In [8]:
!pip install -q -U langchain openai beautifulsoup4

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/143.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━[0m [32m133.1/143.0 kB[0m [31m4.3 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m143.0/143.0 kB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
[?25h

In [23]:
from langchain.output_parsers import PydanticOutputParser
from pydantic import BaseModel, Field
from typing import List

class Ingredient(BaseModel):
    name: str = Field(description="The name of the ingredient")
    quantity: float = Field(description="The specific unit of measurement corresponding to the quantity, such as grams, ounces, liters, etc.")
    unit: str = Field(description="The amount of the ingredient required for the recipe. This can be represented using various units such as grams, cups, teaspoons, etc.")

class Recipe(BaseModel):
    name: str = Field(description="The name of the recipe")
    ingredients: List[Ingredient] = Field(description="The list of ingredients for the recipe")

In [19]:
from langchain.prompts import (
    PromptTemplate,
    ChatPromptTemplate,
    HumanMessagePromptTemplate,
)
from langchain.llms import OpenAI
from langchain.chat_models import ChatOpenAI

temperature = 0.0
model = ChatOpenAI(model="gpt-3.5-turbo-16k", temperature=temperature, openai_api_key="您的有效openai api key")

In [16]:
import requests
from bs4 import BeautifulSoup

url = "https://www.jamieoliver.com/recipes/pasta-recipes/smoked-salmon-pasta/"

response = requests.get(url)
html_markup = ''
if response.status_code == 200:
    html_markup = response.text
    soup = BeautifulSoup(html_markup, 'html.parser')

    # Find the element with id 'recipe-single'
    recipe_element = soup.find(id='recipe-single')

    if recipe_element:
        # Get the sanitized content within the 'recipe-single' element
        html_markup = str(recipe_element)
else:
    print("Failed to retrieve the website content.")


In [17]:
html_markup

'<section class="family-food-inspiration-page-container one" id="recipe-single">\n<div class="container recipe-container">\n<div class="row">\n<div class="single-recipe-details-mobile col-xs-12 visible-xs-block">\n<div class="single-recipe-details-mobile-top">\n<h3 class="h1 single-recipe-title">Smoked salmon pasta</h3>\n<p class="subheading">Spinach, spring onion, lemon, curds &amp; Parmesan</p>\n</div>\n<div class="float-wrapper">\n</div>\n</div>\n</div>\n<svg class="print-logo" viewbox="0 0 373.89 65.28" xmlns="http://www.w3.org/2000/svg"><path class="cls-1" d="M23,12.63H45.44v2.62H42.75a3.18,3.18,0,0,0-2.17.61,2.33,2.33,0,0,0-.68,1.84V43.07a10.76,10.76,0,0,1-4.18,8.69c-2.8,2.26-6.59,3.41-11.36,3.41q-6.59,0-10.71-3.26c-2.76-2.17-4.14-4.68-4.14-7.51A7.18,7.18,0,0,1,11.61,39,6.93,6.93,0,0,1,16.71,37,6.33,6.33,0,0,1,21,38.52a4.92,4.92,0,0,1,1.76,3.82,4.68,4.68,0,0,1-1.27,3.44,4.14,4.14,0,0,1-3.06,1.3A7.38,7.38,0,0,1,17.35,47a8.07,8.07,0,0,0-1.36-.2,1.91,1.91,0,0,0-1.18.29,1.27,1.27,0,0

In [22]:
from langchain.schema import (
    AIMessage,
    HumanMessage,
    SystemMessage
)
# Set up a parser + inject instructions into the prompt template.
parser = PydanticOutputParser(pydantic_object=Recipe)

prompt = PromptTemplate(
    template="Extract the recipe ingredients from the following HTML markup:\n{html}.\n{format_instructions}\n",
    input_variables=["html"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
)

_input = prompt.format_prompt(html=html_markup)

output = model([ HumanMessage(content=_input.to_string()) ])

parser.parse(output.content)

Recipe(name='Smoked salmon pasta', ingredients=[Ingredient(name='fresh lasagne sheets', quantity=125.0, unit='g'), Ingredient(name='spring onions', quantity=2.0, unit=''), Ingredient(name='spinach', quantity=80.0, unit='g'), Ingredient(name='smoked salmon', quantity=60.0, unit='g'), Ingredient(name='a lemon', quantity=0.5, unit=''), Ingredient(name='Parmesan cheese', quantity=5.0, unit='g'), Ingredient(name='olive oil', quantity=0.0, unit=''), Ingredient(name='cottage cheese', quantity=1.0, unit='tablespoon'), Ingredient(name='optional: extra virgin olive oil', quantity=0.0, unit='')])