In [158]:
import pandas as pd
import numpy as np
import spacy
from sklearn.metrics.pairwise import cosine_similarity
import os
import concurrent.futures
from predictionguard import PredictionGuard

# Read in Variables

In [159]:
# Data Frame
data = '2. Allergy_Filtered.csv'

# User data
user_data = '3. UserOutput.csv'

# Recipe Column - str
recipe = 'name' 

#Ingredients Column for Recipes - str list of ingredients
ingredients = 'ingredients'

#Ingredients Column for User Pantry - str list of ingredients
pantry = 'ingredients'

# Cook Time Column - int
cook_time = 'cook_time'

# Reviews 
reviews = 'reviews'

# Ratings
rating = 'rating'

# Category
category = 'category'

# URL
url = 'url'


df = pd.read_csv(data)
df_user = pd.read_csv(user_data)


In [160]:
#print(df.head())
print(df_user[1:20])

       attributes                 ingredients
1    easy cleanup                      garlic
2      light meal              grape tomatoes
3   low prep time                yellow onion
4   portable meal                sweet potato
5             NaN                      apples
6             NaN                        lime
7             NaN                      orange
8             NaN                      grapes
9             NaN                   pineapple
10            NaN                    cilantro
11            NaN  almond and coconut creamer
12            NaN                     avocado
13            NaN                     carrots
14            NaN                    cucumber
15            NaN                      garlic
16            NaN                     cabbage
17            NaN                        eggs
18            NaN                 goat cheese
19            NaN            sandwich pickles


In [161]:
# Convert the ingredients in the 'ingredients' column into a list
pantry_ingredients = df_user[pantry].tolist()

# Display the resulting list
print(pantry_ingredients)

['red onion', 'garlic', 'grape tomatoes', 'yellow onion', 'sweet potato', 'apples', 'lime', 'orange', 'grapes', 'pineapple', 'cilantro', 'almond and coconut creamer', 'avocado', 'carrots', 'cucumber', 'garlic', 'cabbage', 'eggs', 'goat cheese', 'sandwich pickles', 'bread']


# Preprocess Recipe Ingredients Into Lists


In [155]:
#print(spacy.util.get_installed_models()) # - run this to check what model u have installed, use the output in the nlp part below if u have a diffenrt model then i did

In [162]:
# Set API key
api_key = os.getenv("PREDICTIONGUARD_API_KEY", "Oq62vYfSJRwjnFQcUnJy5PM3SRVejYtJCXWSxnfv")

# Initialize the PredictionGuard client
client = PredictionGuard(api_key=api_key)

# System Behavior - Modified for Recipes
system_message = {
    "role": "system",
    "content": (
        "You are an ingredient matching evaluator. Your task is to assess how feasible it is to make each recipe "
        "based on the user's pantry ingredients. Return a score between 0 and 1 where 1 means all ingredients match perfectly, "
        "and a lower score means that some ingredients are similar but not exact (e.g., 'onion' instead of 'white onion'). "
        "Do not provide any explanations or contextual information. Only return the score."
    )
}

def process_recipe(row):
    try:
        recipe_name = row['name']
        recipe_ingredients = row['ingredients']
        
        # Format pantry ingredients list as a string
        pantry_ingredients_str = ', '.join(pantry_ingredients)
        
        # User message for checking recipe feasibility
        user_message = f"Pantry ingredients: {pantry_ingredients_str}. Can I make the recipe '{recipe_name}' with the ingredients {recipe_ingredients}?"

        # Prepare the messages list for the chatbot
        messages = [
            system_message,
            {
                "role": "user",
                "content": f"{user_message}"
            }
        ]
        
        # Send the message to the PredictionGuard API
        result = client.chat.completions.create(
            model="Hermes-3-Llama-3.1-8B",
            messages=messages
        )
        
        # Extract the chatbot's response
        response = result['choices'][0]['message']['content']
        return response.strip()
    except Exception as e:
        return f"Error: {str(e)}"

# Process recipes in parallel
def process_in_parallel(data_frame, max_workers=5):
    # Counter for processed recipes
    count = 0
    
    # Function to update the counter and print progress
    def process_recipe_with_checker(row):
        nonlocal count
        count += 1
        if count % 100 == 0:
            print(f"Processed {count} recipes")
        return {
            "name": row['name'], 
            "feasibility_score": process_recipe(row)
        }

    # Use ThreadPoolExecutor for parallel processing
    with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
        responses = list(executor.map(process_recipe_with_checker, data_frame.to_dict(orient='records')))
    return responses




In [163]:
# Process recipes to check feasibility
response = process_in_parallel(df, max_workers=5)

# Create a list to store the feasibility scores
feasibility_scores = []

# Print out the feasibility scores for each recipe and add to list
for result in response:
    print(f"Recipe: {result['name']}, Feasibility Score: {result['feasibility_score']}")
    feasibility_scores.append(result['feasibility_score'])

# Add the feasibility scores to the DataFrame as a new column
df['feasibility_score'] = feasibility_scores

Processed 100 recipes
Processed 200 recipes
Processed 300 recipes
Processed 400 recipes
Processed 500 recipes
Processed 600 recipes
Recipe: 15-Minute Butter Gnocchi with Spicy Chili Crisp, Capers and Parmesan, Feasibility Score: 0.2
Recipe: 3-Ingredient Banana Oatmeal Cookies, Feasibility Score: 0.35
Recipe: 3-Ingredient Peanut Butter Oatmeal Cookies, Feasibility Score: 0.3
Recipe: 4 Ingredient Lemon Pound Cake, Feasibility Score: 0.3
Recipe: 4-Ingredient Keto Peanut Butter Cookies, Feasibility Score: 0.3
Recipe: 4-Ingredient Orange Chicken, Feasibility Score: 0.1
Recipe: 4-Ingredient Slow Cooker Peach Cobbler, Feasibility Score: 0.0
Recipe: Air Fryer Arancini, Feasibility Score: 0.2
Recipe: Air Fryer Chicken Cordon Bleu, Feasibility Score: 0.25
Recipe: Air Fryer Chicken Parmesan, Feasibility Score: 0.15
Recipe: Air Fryer Chicken Quesadillas, Feasibility Score: 0.35
Recipe: Air Fryer Cinnamon Roll Bites, Feasibility Score: 0.0
Recipe: Air Fryer Eggplant Parmesan, Feasibility Score: 0.2

In [165]:
ingredient_similarity = df
recipe_reviews = pd.read_csv('1. Recipe_Reviews.csv') # all ingredients

In [166]:
# inner join - gets only rows where both columns match
merged_data = pd.merge(ingredient_similarity, recipe_reviews, on='name', how='inner')


In [167]:

outputfile = '4.FeasibleRecipes.csv'  # Specify the output filename
merged_data.to_csv(outputfile, index=False)