# 🥇 Title: NutriChef AI - Backend Development Notebook

📝 Welcome text, short intro explaining that this notebook builds the backend logic for NutriChef AI project.

# 📍 Step 1: Image Input - Ingredient Detection (Vision AI)

## Import Libraries

In [None]:
# !pip install transformers torch torchvision pillow

In [None]:
from transformers import BlipProcessor, BlipForConditionalGeneration
from PIL import Image
import torch

## Define Ingredient Detector Class

In [None]:
class IngredientDetector:
    def __init__(self, device=None):
        """
        Initializes the BLIP model for ingredient detection from images.
        """
        self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
        self.processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
        self.model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to(self.device)

    def detect_ingredients(self, image):
        """
        Given a PIL image, returns a list of detected ingredients and the caption.
        """
        inputs = self.processor(image, return_tensors="pt").to(self.device)
        out = self.model.generate(**inputs)
        caption = self.processor.decode(out[0], skip_special_tokens=True)

        ingredients = self.extract_keywords(caption)
        return ingredients, caption

    def extract_keywords(self, caption):
        """
        Extract keywords from the caption, ignoring filler words.
        """
        ignore_words = {"a", "the", "in", "of", "and", "with", "on", "inside", "near"}
        words = caption.lower().replace(",", "").split()
        keywords = [word for word in words if word not in ignore_words]

        # Remove duplicates
        keywords = list(set(keywords))

        return keywords

## Load an Example Image

In [None]:
# Upload or use an example image (e.g., fridge or pantry)
from PIL import Image

# Load your example image
image = Image.open("fridge_ex.jpg")  # <-- Change this to your file
image.show()

## Run Ingredient Detection

In [None]:
# Create the detector
detector = IngredientDetector()

# Detect ingredients
ingredients, caption = detector.detect_ingredients(image)

print("Generated Caption:", caption)
print("Detected Ingredients:", ingredients)

# 📍 Step 2: Voice Input — Speech-to-Text (Whisper AI)
In this step, we allow users to speak their available ingredients.
We will use the OpenAI Whisper model to transcribe audio input into text.

## Import Libraries

In [None]:
# Install Whisper
# !pip install git+https://github.com/openai/whisper.git
# !pip install torch torchvision torchaudio

In [None]:
import whisper
import torch

## Define SpeechToText Class

In [None]:
class SpeechToText:
    def __init__(self, device=None):
        """
        Initializes the Whisper model for speech transcription.
        """
        self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
        self.model = whisper.load_model("small").to(self.device)  # 'small' or 'base' version

    def transcribe_audio(self, audio_path):
        """
        Transcribes the given audio file to text.
        """
        result = self.model.transcribe(audio_path)
        return result["text"]

## Load or Record Audio File

In [None]:
# Load an example audio file
audio_path = "your_audio_file_here.wav"  # Replace with your file

## Run Transcription

In [None]:
# Create the speech-to-text object
speech_to_text = SpeechToText()

# Transcribe audio
transcribed_text = speech_to_text.transcribe_audio(audio_path)

print("Transcribed Text:", transcribed_text)

# 📍 Step 3: Recipe Dataset Cleaning
In this step, we clean the raw recipes dataset (recipes.csv) and prepare a simpler version for fast recipe retrieval.
We will extract only the important fields: recipe name, ingredients, and instructions.

## Load Raw Recipes Dataset

In [None]:
import pandas as pd

# Load the raw recipes.csv
csv_path = "recipes.csv"  # <-- Make sure your file is here
recipes_df = pd.read_csv(csv_path)

# Display basic info
recipes_df.info()
recipes_df.head()

## Clean and Simplify Dataset

In [None]:
# Keep only the important columns
important_columns = ["recipe_name", "ingredients", "directions", "nutrition", "cuisine_path", "timing"]
recipes_clean = recipes_df[important_columns].dropna()

# Rename columns for easier use
recipes_clean = recipes_clean.rename(columns={
    "recipe_name": "name",
    "ingredients": "ingredients",
    "directions": "instructions"
})

# Use globally
recipes_df = recipes_clean

# Preview
recipes_clean.head()

## Save Cleaned Dataset

In [None]:
import os
import json

# Save cleaned recipes to JSON file
save_path = "./database/cleaned_recipes.json"

# Convert dataframe to list of dicts
recipes_list = recipes_clean.to_dict(orient="records")

# Make sure the folder exists
os.makedirs(os.path.dirname(save_path), exist_ok=True)

# Save
with open(save_path, "w", encoding="utf-8") as f:
    json.dump(recipes_list, f, indent=4)

print(f"✅ Saved cleaned recipes to {save_path} with {len(recipes_list)} recipes.")

# 📍 Step 4: Embedding Recipes + Building the Vector Database
In this step, we embed the cleaned recipes into vector representations and store them in ChromaDB.
This allows NutriChef AI to retrieve recipes by matching ingredients similarity.

## Import Libraries

In [None]:
# Install Sentence Transformers and ChromaDB
# !pip install sentence-transformers chromadb

In [None]:
from sentence_transformers import SentenceTransformer
import chromadb
from chromadb.config import Settings
import json

## Create Recipe Embeddings

In [None]:
# Load cleaned recipes
with open("./database/cleaned_recipes.json", "r", encoding="utf-8") as f:
    cleaned_recipes = json.load(f)

print(f"✅ Loaded {len(cleaned_recipes)} cleaned recipes.")

In [None]:
# Load embedding model
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")  # Small and fast model

# Prepare texts for embedding (ingredients + name + instructions combined)
recipe_texts = [
    f"Recipe: {recipe['name']}. Ingredients: {recipe['ingredients']}. Instructions: {recipe['instructions']}"
    for recipe in cleaned_recipes
]

# Create embeddings
recipe_embeddings = embedding_model.encode(recipe_texts, show_progress_bar=True)

print(f"✅ Created {len(recipe_embeddings)} embeddings.")

## Store Embeddings in ChromaDB

In [None]:
import chromadb

# Create or load a ChromaDB persistent collection
chroma_client = chromadb.PersistentClient(path="./database/vector_store")

# Create a new collection (or load if exists)
collection = chroma_client.get_or_create_collection(name="recipes")

# Add your documents/embeddings here
collection.add(
    documents=recipe_texts,
    embeddings=recipe_embeddings.tolist(),
    ids=[str(i) for i in range(len(cleaned_recipes))]
)

# No need for chroma_client.persist() anymore — it's handled automatically by PersistentClient!
print("✅ Saved embeddings to ChromaDB (Persistent Storage).")

## Test Simple Recipe Search

In [None]:
# Example: search recipes matching "chicken and rice"

query = "chicken and rice"

# Create query embedding
query_embedding = embedding_model.encode([query])

# Search top 5 recipes
results = collection.query(
    query_embeddings=query_embedding.tolist(),
    n_results=5,
    include=["documents"]
)

# Show top results
for i, doc in enumerate(results["documents"][0]):
    print(f"\nResult {i+1}:")
    print(doc)

# 📍 Step 5: Recipe Retrieval System
In this step, we implement a function to retrieve top matching recipes from ChromaDB based on detected ingredients from images or speech.
This will allow NutriChef AI to find the best recipe matches quickly.

## Connect to ChromaDB PersistentClient

In [None]:
import chromadb
from sentence_transformers import SentenceTransformer

# Connect to existing ChromaDB
chroma_client = chromadb.PersistentClient(path="./database/vector_store")

# Load the existing collection
collection = chroma_client.get_or_create_collection(name="recipes")

# Load the same embedding model you used before
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")

## Embed User Query

In [None]:
def embed_query(text_query):
    """
    Embed a user text query (ingredients list) using the same embedding model.
    """
    return embedding_model.encode([text_query])

## Search Top Matching Recipes

In [None]:
def search_recipes(ingredient_query, top_k=5):
    """
    Given an ingredient list (string), return top_k matching recipes.
    """
    # Embed the query
    query_embedding = embed_query(ingredient_query)

    # Search collection
    results = collection.query(
        query_embeddings=query_embedding.tolist(),
        n_results=top_k,
        include=["documents"]
    )

    # Return matched recipe texts
    return results["documents"][0]

## Test Retrieval System

In [None]:
# Example search
user_ingredients = "eggs, spinach, cheese"

matching_recipes = search_recipes(user_ingredients, top_k=5)

print("Top Recipes Found:")
for i, recipe in enumerate(matching_recipes, 1):
    print(f"\nRecipe {i}:")
    print(recipe)

# 📍 Step 6: Meal Plan Generator
In this step, we generate a simple daily meal plan by organizing the top matching recipes into breakfast, lunch, and dinner suggestions.

## Organize Recipes into Meals

In [None]:
import random

def create_daily_meal_plan(matching_recipes):
    """
    Given a list of matching recipes (strings),
    organize them into breakfast, lunch, and dinner.
    """
    meal_plan = {}

    # Shuffle recipes to randomize
    random.shuffle(matching_recipes)

    # Assign meals (simple rule: first 3 recipes)
    if len(matching_recipes) >= 3:
        meal_plan["Breakfast"] = matching_recipes[0]
        meal_plan["Lunch"] = matching_recipes[1]
        meal_plan["Dinner"] = matching_recipes[2]
    else:
        # If fewer than 3 recipes, reuse some
        meal_plan["Breakfast"] = matching_recipes[0] if len(matching_recipes) > 0 else "No recipe found."
        meal_plan["Lunch"] = matching_recipes[1] if len(matching_recipes) > 1 else matching_recipes[0]
        meal_plan["Dinner"] = matching_recipes[2] if len(matching_recipes) > 2 else matching_recipes[0]

    return meal_plan

## Test Meal Plan Generation

In [None]:
# Test the meal planner

# Assume we have retrieved some recipes already
user_ingredients = "eggs, cheese, spinach"

# Search matching recipes
matching_recipes = search_recipes(user_ingredients, top_k=5)

# Create a meal plan
daily_meal_plan = create_daily_meal_plan(matching_recipes)

# Show the meal plan
print("🥣 Your Daily Meal Plan:")

for meal, recipe in daily_meal_plan.items():
    print(f"\n{meal}:")
    print(recipe)

# 📍 Step 7: Nutrition Facts + Health Advice Generator
In this step, we enhance the meal plan by generating nutritional facts (Calories, Proteins, Carbs, Fats)
and offering a personalized health tip for the user.

# Import Libraries

In [None]:
# !pip install openai

In [None]:
from openai import OpenAI

# (Safe temporary way: Set API key for this session)
import os
os.environ["OPENAI_API_KEY"] = "your_openAI_api_key_here"

# Create client
client = OpenAI()

## Define Nutrition + Health Advice Generator

In [None]:
# import ast

def lookup_nutrition(meal_plan, recipes_df):
    results = []
    for line in meal_plan.lower().splitlines():
        if ":" not in line:
            continue

        label, meal = line.split(":", 1)
        meal = meal.strip()

        match = recipes_df[recipes_df["name"].str.contains(meal, case=False, na=False)]
        if not match.empty:
            nutrition = match.iloc[0]["nutrition"]
            try:
                nutrition_data = ast.literal_eval(nutrition)
                nutrition_summary = (
                    f"Calories: {nutrition_data[0]} kcal, "
                    f"Fat: {nutrition_data[1]}g, "
                    f"Carbs: {nutrition_data[2]}g, "
                    f"Protein: {nutrition_data[3]}g"
                )
            except Exception:
                nutrition_summary = nutrition
        else:
            nutrition_summary = "Nutrition data not found."

        results.append(f"{label.capitalize()}: {meal} ➤ {nutrition_summary}")
    return "\n".join(results)


def generate_nutrition_facts_and_advice(meal_plan):
    try:
        grounded_nutrition = lookup_nutrition(meal_plan, recipes_df)

        prompt = f"""
You are a nutritionist.

Here is the meal plan for today:
{meal_plan}

And here are the grounded nutrition facts for each meal:
{grounded_nutrition}

Now:
- Give a short nutritional summary for the day
- Suggest 2 health tips to improve the meal plan
"""

        response = client.chat.completions.create(
            model="gpt-3.5-turbo",
            messages=[
                {"role": "system", "content": "You are a licensed dietitian."},
                {"role": "user", "content": prompt}
            ]
        )

        return response.choices[0].message.content

    except Exception as e:
        print(f"🔥 Error generating nutrition advice: {e}")
        return "❌ Error generating nutrition advice."

## Test Nutrition Facts Generator

In [None]:
# Example usage
nutrition_summary = generate_nutrition_facts_and_advice(daily_meal_plan)

print("🧪 Nutrition Summary + Health Advice:")
print(nutrition_summary)