In [1]:
import requests
from bs4 import BeautifulSoup
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import ollama
import json

  from .autonotebook import tqdm as notebook_tqdm


In [11]:
LLM_QUERY = "Hi! i want to eat japanese style cuisine but currently i have a high blood pressure."
# LLM_MODEL = "llama3.2-vision:90b"
# LLM_MODEL = "gemma2:27b"
LLM_MODEL = "llama3.1:70b"
# LLM_MODEL = "llama3.2:latest"
# LLM_MODEL = "llama3.2-vision:90b"

LLM_INGREDIENTS = """
"bananas",
"white mushrooms",
"Spring Onion",
"tomatoes",
"cucumber",
"corn on the cob",
"figs"
"""

In [12]:
# Load embedding model
model = SentenceTransformer('all-MiniLM-L6-v2')

## Cookpad Retrieval

In [13]:
class RecipeScraper:
    def __init__(self, query, ingredients, llm_model):
        self.query = query
        self.llm_model = llm_model
        self.ingredients = ingredients  # User-provided ingredients
        self.recipes = []  # To store basic recipe info (title and link)
        self.all_recipes_details = []  # To store detailed recipe info
        self.model = SentenceTransformer('all-MiniLM-L6-v2')  # Embedding model

    def generate_llm_response(self, prompt):
        """Generate a response from the LLM."""
        url = "http://localhost:11434/api/generate"
        headers = {
            "Content-Type": "application/json"
        }
        data = {
            "model": self.llm_model,
            "prompt": prompt,
            "stream": False,
            "options": {"num_ctx": 16000}
        }
        try:
            print("Communicating with LLM...")
            response = requests.post(url, headers=headers, data=json.dumps(data))
            response.raise_for_status()  # Raise an HTTPError for bad responses (4xx and 5xx)
            results = str(json.loads(response.text)["response"]).strip()
            return results.lower()
        except requests.exceptions.RequestException as e:
            print(f"Error communicating with LLM: {e}")
            return "0"  # Default to 0 if there's an error
        
    def evaluate_recipes(self):
        """Evaluate recipes based on ingredient overlap and semantic relevance using LLM."""
        filtered_recipes = []
        for recipe in self.all_recipes_details:
            # Calculate ingredient overlap
            overlap = len(set(recipe["ingredients"]).intersection(set(self.ingredients)))
            
            # Prepare a prompt for the LLM to evaluate relevance
            prompt = (
                f"Evaluate the following recipe based on its relevance to these ingredients: "
                f"{', '.join(self.ingredients)}.\n\n"
                f"Recipe Title: {recipe['title']}\n"
                f"Ingredients: {', '.join(recipe['ingredients'])}\n"
                f"Steps: {'. '.join(recipe['instructions'])}\n\n"
                f"Score the relevance on a scale from 0 to 10, where 10 is highly relevant."
                f"Answer only the score, no need additional text or sentence, the valur can be float"
            )
            
            # Get relevance score from the LLM
            llm_response = self.generate_llm_response(prompt)
            try:
                relevance_score = float(llm_response)  # Convert response to an integer
            except ValueError:
                relevance_score = 0  # Default to 0 if the response is not a valid number
            
            # Store overlap and relevance score
            recipe["overlap"] = overlap
            recipe["relevance_score"] = relevance_score
            print("relevance_score:", relevance_score)
            filtered_recipes.append(recipe)
        
        # Sort recipes by combined score (weighted overlap and relevance)
        return sorted(
            filtered_recipes,
            key=lambda x: (x["relevance_score"], x["overlap"]),
            reverse=True
        )
    
    def search_recipes(self):
        """Search for recipes on Cookpad using the provided query."""
        url = f'https://cookpad.com/us/search/{self.query.replace(" ", "%20")}'
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36'
        }
        
        response = requests.get(url, headers=headers)
        if response.status_code != 200:
            print("Failed to retrieve data. Status code:", response.status_code)
            return
        
        soup = BeautifulSoup(response.text, 'html.parser')
        self.recipes = []  # Clear previous results
        for recipe in soup.select('a.block-link__main'):
            if recipe:
                title = recipe.text.strip()
                link = 'https://cookpad.com' + recipe['href']
                self.recipes.append({'title': title, 'link': link})
        
        print(f"Found {len(self.recipes)} recipes for query '{self.query}'.")

    def get_recipe_details(self, url):
        """Fetch detailed recipe information from a given recipe link."""
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36'
        }
        response = requests.get(url, headers=headers)
        if response.status_code != 200:
            print(f"Failed to retrieve recipe details. Status code: {response.status_code}")
            return {}

        soup = BeautifulSoup(response.text, 'html.parser')
        
        # Extract introduction
        intro_elem = soup.select_one('.text-cookpad-14')  # Adjust based on actual page structure
        introduction = intro_elem.text.strip() if intro_elem else "No introduction available"
        
        # Extract ingredients
        ingredients = [ingredient.text.strip() for ingredient in soup.select('.ingredient-list')]
        
        # Extract instructions (steps to cook)
        instructions = [step.text.strip() for step in soup.select('.step')]
        
        return {
            'introduction': introduction,
            'ingredients': ingredients,
            'instructions': instructions
        }

    def fetch_all_recipe_details(self):
        """Loop through all recipes found and fetch detailed information."""
        # self.all_recipes_details = []  # Clear previous results
        for idx, recipe in enumerate(self.recipes):
            print(f"Fetching details for {idx+1}: {recipe['title']}")
            details = self.get_recipe_details(recipe['link'])
            
            if details:
                recipe_data = {
                    "title": recipe['title'],
                    "introduction": details['introduction'],
                    "ingredients": details['ingredients'],
                    "instructions": details['instructions']
                }
                self.all_recipes_details.append(recipe_data)

    # def evaluate_recipes(self):
    #     """Evaluate recipes based on ingredient overlap and relevance."""
    #     filtered_recipes = []
    #     for recipe in self.all_recipes_details:
    #         overlap = len(set(recipe["ingredients"]).intersection(set(self.ingredients)))
    #         recipe["overlap"] = overlap
    #         filtered_recipes.append(recipe)
    #     return sorted(filtered_recipes, key=lambda x: x["overlap"], reverse=True)

    def refine_query(self, top_recipes):
        """Refine the query using the LLM."""
        # prompt = f"""
        # User Query: {self.query}
        # Retrieved Recipes: {[recipe['title'] for recipe in top_recipes]}
        # Available Ingredients: {', '.join(self.ingredients)}
        # Suggest a refined query to retrieve more relevant recipes.
        # Answer only the refined query, do not add anything or other explaination!
        # Query maximum is 5 words, if you want to reduce the number of words is okay!
        # If you did not find any, try reduce the words!
        # Try use only the ingrediets as keyword!
        # """
        
        prompt = f"""
        User Query: {self.query}
        Retrieved Recipe Titles: {[recipe['title'] for recipe in top_recipes]}
        Available Ingredients: {', '.join(self.ingredients)}
        Based on the user query and retrieved recipe titles, refine the query to retrieve recipes that are more relevant to the available ingredients. 

        Guidelines for refinement:
        1. Use only the ingredients listed as keywords.
        2. The refined query should be concise, with a maximum of 5 words.
        3. If no relevant results are found, simplify or reduce the query to essential ingredient keywords.
        4. Do not include any explanations or additional text—only provide the refined query.

        Provide only the refined query as the output.
        """
        
        url = "http://localhost:11434/api/generate"
        headers = {
            "Content-Type": "application/json"
        }
        data = {
            "model": LLM_MODEL,
            "prompt": prompt,
            "stream": False,
            "options": {"num_ctx": 16000}
        }
        print("starting")
        response = requests.post(url, headers=headers, data=json.dumps(data))
        print(response.status_code)
        
        # response = ollama.generate(model="llama3.1:70b", prompt=prompt)
        results = str(json.loads(response.text)["response"]).lower()
        return results.strip()

    def iterative_search(self, max_iterations=5, relevance_threshold=3):
        """Iteratively refine the query and search for relevant recipes."""
        current_query = self.query
        iteration = 0
        
        # self.recipe_evaluator = RecipeEvaluator(self.ingredients, LLM_MODEL)

        while iteration < max_iterations:
            print(f"Iteration {iteration + 1}: Searching with query '{current_query}'")
            self.query = current_query
            self.search_recipes()
            self.fetch_all_recipe_details()
            
            if not self.all_recipes_details:
                print("No recipes found. Refining query...")
                current_query = self.refine_query([])
            else:
                evaluated_recipes = self.evaluate_recipes()
                top_recipes = evaluated_recipes[:3]  # Select top 3 recipes

                # Check if relevant recipes meet the threshold
                if any(recipe["overlap"] >= relevance_threshold for recipe in top_recipes):
                    print("Relevant recipes found!")
                    return top_recipes
                
                print("Refining query based on top recipes...")
                current_query = self.refine_query(top_recipes)

            iteration += 1

        print("Max iterations reached. Returning best available recipes.")
        return evaluated_recipes[:3] if 'evaluated_recipes' in locals() else []


In [14]:
scraper = RecipeScraper(LLM_QUERY, LLM_INGREDIENTS, LLM_MODEL)

In [15]:
# Perform iterative search
relevant_recipes = scraper.iterative_search(max_iterations=5)

Iteration 1: Searching with query 'Hi! i want to eat japanese style cuisine but currently i have a high blood pressure.'
Found 0 recipes for query 'Hi! i want to eat japanese style cuisine but currently i have a high blood pressure.'.
No recipes found. Refining query...
starting
200
Iteration 2: Searching with query 'low sodium japanese dishes using mushrooms and onions'
Found 3 recipes for query 'low sodium japanese dishes using mushrooms and onions'.
Fetching details for 1: Mike's Ramen Soup Starters
Fetching details for 2: Mike's Spicy Thai Shrimp Ramen Noodles
Fetching details for 3: Caramelized Crispy Pork Ramen
Communicating with LLM...
relevance_score: 2.4
Communicating with LLM...
relevance_score: 5.4
Communicating with LLM...
relevance_score: 1.3
Refining query based on top recipes...
starting
200
Iteration 3: Searching with query 'low sodium mushroom onion dishes'
Found 30 recipes for query 'low sodium mushroom onion dishes'.
Fetching details for 1: Round Steak with Mushrooms

In [16]:
relevant_recipes

[{'title': 'Round Steak with Mushrooms and onions',
  'introduction': 'To start creating your recipe library, please register or login.',
  'ingredients': ['2 + lbs London Broil (top round)\n\n\n Salt and pepper to taste\n\n\n Large yellow onion diced\n\n\n3 tablespoons oil (divided)\n\n\n2 tablespoons butter\n\n\n8 + ounces mushrooms sliced\n\n\n3 cups low sodium beef broth\n\n\n1 teaspoon thyme\n\n\n1 tablespoons Worcestershire sauce\n\n\n1-2 tablespoons cornstarch dissolved in water'],
  'instructions': ['1\n    \n\n\n\nPreheat oven to 300 degrees F',
   '2\n    \n\n\n\nCut London broil into 1 to 2 inch squares. Pound them flat with the rough side of the tenderizer mallet.',
   '3\n    \n\n\n\nSalt and pepper pounded London broil on both sides.',
   '4\n    \n\n\n\nIn a Dutch oven heat 2 tablespoons of oil over medium high heat.',
   '5\n    \n\n\n\nSear the beef in batches so not to crowd the meat. Remove to dish and set aside. Add additional oil if needed.',
   '6\n    \n\n\n\nAdd

# Recipes

In [17]:
def new_prompt_formatter(
                     combined_recipes_list: list[dict],
                     add:str
                     ) -> str:
    """
    Augments query with text-based context from context_items.
    """
    # Ensure that context_items is a list of multiple summaries
    # context_summaries = [item["Course Summary"] for item in context_items]

    # Join context summaries into one paragraph with each item on a new line
    # context_text = "- " + "\n- ".join(context_summaries)
    
    # Build the prompt
    new_prompt = f'''
As a seasoned chef with extensive culinary expertise, create a recipe tailored to the user's available ingredients and preferences, drawing inspiration from the provided recipes. Follow these guidelines:

1. **User Query**: Address the user's specific query or request:
   "{LLM_QUERY}"

2. **Ingredients Focus**: Ensure the recipe centers around the user's available ingredients:
   {LLM_INGREDIENTS}
   If essential ingredients are missing, suggest thoughtful substitutions that retain the dish’s original flavor, texture, and purpose.
   
# 3. **Inspirational Basis**: Base the recipe primarily on the provided recipe inspirations. Adapt techniques, flavor pairings, and structure from these inspirations:
#    {combined_recipes_list}

4. **Clear and Concise Instructions**:
   - Present the recipe in plain text.
   - Include a clear list of ingredients and step-by-step preparation instructions.
   - Avoid symbols, patterns, or bullet points; write in complete sentences.

5. **Substitution Suggestions**:
   - Provide substitutions for unavailable or challenging ingredients.
   - Ensure that suggested alternatives maintain the dish’s culinary intention.

6. **Consistency with Inspirations**:
   - Stay true to the style and techniques outlined in the inspirations.
   - Adapt only where necessary to align with the user's specific ingredients and preferences.

Deliver a fully structured recipe that is easy to understand and directly applicable. Ensure it incorporates substitutions where needed and offers any relevant tips or enhancements to elevate the dish.

<important>Create a recipe based on the user's query and ingredients, closely aligning with the techniques and ideas from the provided recipes.</important>
<todo>Provide a structured recipe, including ingredients, instructions, and thoughtful substitution suggestions in plain text format.</todo>
'''
#     new_prompt = f'''
# As a chef with 30 years of culinary expertise across diverse cuisines, create a recipe that perfectly aligns with the user's preference, focusing on the available ingredients and inspired by the provided recipe list:

# {add}

# These are the ingredients the user has:

# {query}

# Use the provided recipes primarily as references for techniques, flavor pairings, and overall dish structure:

# {combined_recipes_list}

# If essential ingredients are missing, suggest thoughtful substitutions that retain the dish’s original flavor, texture, and intention. The response must provide a clear, well-structured recipe including ingredients, step-by-step instructions, and substitution suggestions, presented in full sentences.

# Avoid using symbols, patterns, or bullet points. Write in plain text format. Ensure the recipe reflects the user's available ingredients and closely follows the inspirations provided, adapting only when necessary to suit the user's pantry.

# <important>Create a recipe based on the available ingredients, using the given recipes as inspiration.</important>
# <todo>Provide a recipe with ingredients, instructions, and thoughtful substitution suggestions.</todo>
# '''
    return new_prompt

# Format prompt with context items
new_prompt = new_prompt_formatter(add=LLM_QUERY,
                              combined_recipes_list=relevant_recipes,)
new_prompt

'\nAs a seasoned chef with extensive culinary expertise, create a recipe tailored to the user\'s available ingredients and preferences, drawing inspiration from the provided recipes. Follow these guidelines:\n\n1. **User Query**: Address the user\'s specific query or request:\n   "Hi! i want to eat japanese style cuisine but currently i have a high blood pressure."\n\n2. **Ingredients Focus**: Ensure the recipe centers around the user\'s available ingredients:\n   \n"bananas",\n"white mushrooms",\n"Spring Onion",\n"tomatoes",\n"cucumber",\n"corn on the cob",\n"figs"\n\n   If essential ingredients are missing, suggest thoughtful substitutions that retain the dish’s original flavor, texture, and purpose.\n   \n# 3. **Inspirational Basis**: Base the recipe primarily on the provided recipe inspirations. Adapt techniques, flavor pairings, and structure from these inspirations:\n#    [{\'title\': \'Round Steak with Mushrooms and onions\', \'introduction\': \'To start creating your recipe lib

In [None]:
import requests
import json



url = "http://localhost:11434/api/generate"

headers = {
    "Content-Type": "application/json"
}

data = {
    "model": LLM_MODEL,
    "prompt": new_prompt,
    "stream": False,
    "options": {"num_ctx": 25000}
}
# data = {
#     "model": "llama3.1:70b",
#     "prompt": new_prompt,
#     "stream": False,
#     "options": {"num_ctx": 16000}
# }
print("starting")
response = requests.post(url, headers=headers, data=json.dumps(data))
print(response.status_code)


print(json.loads(response.text)["response"])

add = str(json.loads(response.text)["response"])

starting
