In [30]:
!pip install -q sentence-transformers faiss-cpu pandas
import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


In [56]:

from sentence_transformers import SentenceTransformer
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity


In [57]:
# Load model
model = SentenceTransformer("all-mpnet-base-v2")

# Load recipes dataset
df = pd.read_csv("/kaggle/input/recipes/13k-recipes.csv")

# Check first few rows
df.head()


Unnamed: 0.1,Unnamed: 0,Title,Ingredients,Instructions,Image_Name,Cleaned_Ingredients
0,0,Miso-Butter Roast Chicken With Acorn Squash Pa...,"['1 (3½–4-lb.) whole chicken', '2¾ tsp. kosher...","Pat chicken dry with paper towels, season all ...",miso-butter-roast-chicken-acorn-squash-panzanella,"['1 (3½–4-lb.) whole chicken', '2¾ tsp. kosher..."
1,1,Crispy Salt and Pepper Potatoes,"['2 large egg whites', '1 pound new potatoes (...",Preheat oven to 400°F and line a rimmed baking...,crispy-salt-and-pepper-potatoes-dan-kluger,"['2 large egg whites', '1 pound new potatoes (..."
2,2,Thanksgiving Mac and Cheese,"['1 cup evaporated milk', '1 cup whole milk', ...",Place a rack in middle of oven; preheat to 400...,thanksgiving-mac-and-cheese-erick-williams,"['1 cup evaporated milk', '1 cup whole milk', ..."
3,3,Italian Sausage and Bread Stuffing,"['1 (¾- to 1-pound) round Italian loaf, cut in...",Preheat oven to 350°F with rack in middle. Gen...,italian-sausage-and-bread-stuffing-240559,"['1 (¾- to 1-pound) round Italian loaf, cut in..."
4,4,Newton's Law,"['1 teaspoon dark brown sugar', '1 teaspoon ho...",Stir together brown sugar and hot water in a c...,newtons-law-apple-bourbon-cocktail,"['1 teaspoon dark brown sugar', '1 teaspoon ho..."


In [58]:
# Combine title + ingredients for better search accuracy
recipe_texts = df["Title"].astype(str) + " " + df["Ingredients"].astype(str)

# Compute embeddings
recipe_embeddings = model.encode(recipe_texts.tolist(), convert_to_tensor=True)

print("✅ Embeddings computed!")


Batches:   0%|          | 0/422 [00:00<?, ?it/s]

✅ Embeddings computed!


In [60]:
def find_best_match(query):
    # Encode query
    query_embedding = model.encode([query], convert_to_tensor=True)
    
    # Compute cosine similarities
    scores = cosine_similarity(query_embedding.cpu(), recipe_embeddings.cpu())[0]
    
    # Get best index
    best_idx = scores.argmax()
    
    return {
        "Title": df.iloc[best_idx]["Title"],
        "Score": float(scores[best_idx]),
        "Ingredients": df.iloc[best_idx]["Ingredients"],
        "Instructions": df.iloc[best_idx]["Instructions"]
    }


In [63]:
while True:
    user_query = input("🔍 Enter your recipe search (type 'exit' to quit): ")
    if user_query.strip().lower() == "exit":
        print("Goodbye! 👋")
        break
    
    result = find_best_match(user_query)
    print(f"\n🍽 {result['Title']}\n   Score: {result['Score']:.4f}")
    print(f"   Ingredients: {result['Ingredients']}")
    print(f"   Instructions: {result['Instructions']}...\n")


🔍 Enter your recipe search (type 'exit' to quit):  exit


Goodbye! 👋
