In [None]:
!pip install transformers faiss-cpu sentence-transformers

Collecting faiss-cpu
  Downloading faiss_cpu-1.11.0.post1-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (5.0 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cublas_cu12-12.4.5.8-

In [None]:
import pandas as pd
import faiss
from sentence_transformers import SentenceTransformer
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

# Load dataset
df = pd.read_csv("/content/Indian_Food_Dataset_With_Recipes.csv")
df = df.dropna(subset=["Cleaned-Ingredients", "TranslatedRecipeName", "FullRecipe"])

# Concatenate relevant fields for embedding
df["retrieval_text"] = (
    df["TranslatedRecipeName"] + " | " +
    df["Cleaned-Ingredients"] + " | " +
    df["FullRecipe"]
)

# Create embeddings using SentenceTransformer
embed_model = SentenceTransformer("all-MiniLM-L6-v2")
embeddings = embed_model.encode(df["retrieval_text"].tolist(), show_progress_bar=True)

# Create FAISS index
index = faiss.IndexFlatL2(embeddings.shape[1])
index.add(embeddings)

# Load LLM from Hugging Face
model_id = "deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", torch_dtype=torch.float16)

# Function to search and generate answer
def get_recipe_suggestions(user_query):
    user_embed = embed_model.encode([user_query])
    _, indices = index.search(user_embed, k=3)

    # Collect relevant recipes
    recipes = df.iloc[indices[0]][["TranslatedRecipeName", "TotalTimeInMins", "TranslatedIngredients", "TranslatedInstructions"]]

    # Build context string
    context = ""
    for idx, row in recipes.iterrows():
        context += f"🍽️ Recipe: {row['TranslatedRecipeName']}\n"
        context += f"⏱️ Time: {row['TotalTimeInMins']} mins\n"
        context += f"🧂 Ingredients: {row['TranslatedIngredients']}\n"
        context += f"👨‍🍳 Instructions: {row['TranslatedInstructions']}\n\n"

    # Prompt to reduce hallucination
    prompt = f"Based on the recipes below, list exactly 3 recipes matching the query '{user_query}' with their cooking time, ingredients, and instructions:\n\n{context}\n\nOnly use the given data and avoid adding any extra recipes or facts."

    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=4096).to(model.device)
    outputs = model.generate(**inputs, max_new_tokens=500)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Batches:   0%|          | 0/186 [00:00<?, ?it/s]

  return forward_call(*args, **kwargs)


tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

config.json:   0%|          | 0.00/826 [00:00<?, ?B/s]

model.safetensors.index.json: 0.00B [00:00, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-000002.safetensors:   0%|          | 0.00/8.67G [00:00<?, ?B/s]

model-00002-of-000002.safetensors:   0%|          | 0.00/7.39G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/181 [00:00<?, ?B/s]



In [None]:
query = input("Enter ingredients or dish you're looking for: ")
result = get_recipe_suggestions(query)
print(result)

Enter ingredients or dish you're looking for: chicken, eggs, onions, tomatoes, red chilli powder, salt, turmeric powder


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Based on the recipes below, list exactly 3 recipes matching the query 'chicken, eggs, onions, tomatoes, red chilli powder, salt, turmeric powder' with their cooking time, ingredients, and instructions:

🍽️ Recipe:  No Onion No Garlic Cabbage Kofta Curry
⏱️ Time: 65 mins
🧂 Ingredients:  1 green chili - chopped, 1/2 tsp turmeric powder, 1 sprig of green coriander - cut, 1/2 tsp garam masala powder,2 cups cabbage - grated, 1/2 cup gram flour, 1/2 tsp red chilli powder, 1/2 tsp cumin powder, turmeric powder - A pinch, 1 to 2 bay leaves, 3 to 4 tbsp cream, 1/2 tsp cardamom - break it, make 3 tomatoes - puree, 1/2 tsp salt - as per taste, oil - as per use, 1/2 tsp hot Masala powder, 1-1 / 2 tbsp coriander powder, 1 tbsp ginger - paste, 1 tbsp red chilli powder, salt - according to taste, 2 tbsp coriander - chopped, 1 cup oil - as per use, 2 tbsp yogurt
👨‍🍳 Instructions: To make the Cabbage Kofta Curry Recipe, firstly tighten the cabbage and put it in a bowl.
Take out the water from it by pre