# Using Food.com tokenized data

Following code extracts top ingredients in descending order of their counts, as present in the Food.com dataset

In [1]:
import pandas as pd
import numpy as np
import pickle
import sys
import types
from collections import Counter

import pandas.core.indexes.base
sys.modules['pandas.core.indexes.numeric'] = pandas.core.indexes.base
setattr(pandas.core.indexes.base, 'Int64Index', pd.Index)

with open("../Data/Food/ingr_map.pkl", "rb") as f:
    ingr_map = pickle.load(f, encoding="latin1")

In [None]:
top_ingredients = ingr_map[['replaced', 'count']].drop_duplicates(subset='replaced').sort_values(by='count', ascending=False)
top_ingredients.to_csv("../Data/Food/top_ingredients.csv", index=False)

# RecipeNLG EDA

In [2]:
recipes = pd.read_csv('../Data/RecipeNLG/RecipeNLG_dataset.csv')
recipes

Unnamed: 0.1,Unnamed: 0,title,ingredients,directions,link,source,NER
0,0,No-Bake Nut Cookies,"[""1 c. firmly packed brown sugar"", ""1/2 c. eva...","[""In a heavy 2-quart saucepan, mix brown sugar...",www.cookbooks.com/Recipe-Details.aspx?id=44874,Gathered,"[""brown sugar"", ""milk"", ""vanilla"", ""nuts"", ""bu..."
1,1,Jewell Ball'S Chicken,"[""1 small jar chipped beef, cut up"", ""4 boned ...","[""Place chipped beef on bottom of baking dish....",www.cookbooks.com/Recipe-Details.aspx?id=699419,Gathered,"[""beef"", ""chicken breasts"", ""cream of mushroom..."
2,2,Creamy Corn,"[""2 (16 oz.) pkg. frozen corn"", ""1 (8 oz.) pkg...","[""In a slow cooker, combine all ingredients. C...",www.cookbooks.com/Recipe-Details.aspx?id=10570,Gathered,"[""frozen corn"", ""cream cheese"", ""butter"", ""gar..."
3,3,Chicken Funny,"[""1 large whole chicken"", ""2 (10 1/2 oz.) cans...","[""Boil and debone chicken."", ""Put bite size pi...",www.cookbooks.com/Recipe-Details.aspx?id=897570,Gathered,"[""chicken"", ""chicken gravy"", ""cream of mushroo..."
4,4,Reeses Cups(Candy),"[""1 c. peanut butter"", ""3/4 c. graham cracker ...","[""Combine first four ingredients and press in ...",www.cookbooks.com/Recipe-Details.aspx?id=659239,Gathered,"[""peanut butter"", ""graham cracker crumbs"", ""bu..."
...,...,...,...,...,...,...,...
2231137,2231137,Sunny's Fake Crepes,"[""1/2 cup chocolate hazelnut spread (recommend...","[""Spread hazelnut spread on 1 side of each tor...",www.foodnetwork.com/recipes/sunny-anderson/sun...,Recipes1M,"[""chocolate hazelnut spread"", ""tortillas"", ""bu..."
2231138,2231138,Devil Eggs,"[""1 dozen eggs"", ""1 paprika"", ""1 salt and pepp...","[""Boil eggs on medium for 30mins."", ""Then cool...",cookpad.com/us/recipes/355411-devil-eggs,Recipes1M,"[""eggs"", ""paprika"", ""salt"", ""choice"", ""miracle..."
2231139,2231139,Extremely Easy and Quick - Namul Daikon Salad,"[""150 grams Daikon radish"", ""1 tbsp Sesame oil...","[""Julienne the daikon and squeeze out the exce...",cookpad.com/us/recipes/153324-extremely-easy-a...,Recipes1M,"[""radish"", ""Sesame oil"", ""White sesame seeds"",..."
2231140,2231140,Pan-Roasted Pork Chops With Apple Fritters,"[""1 cup apple cider"", ""6 tablespoons sugar"", ""...","[""In a large bowl, mix the apple cider with 4 ...",cooking.nytimes.com/recipes/1015164,Recipes1M,"[""apple cider"", ""sugar"", ""kosher salt"", ""bay l..."


# Recipe NLG Vector DB with ingredients

The code below creates a vector DB with FAISS from all the ingredients mentioned in a dish's recipe. 

In [None]:
from sentence_transformers import SentenceTransformer
import faiss
recipes = pd.read_csv("../Data/RecipeNLG/RecipeNLG_dataset.csv")

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
recipes['ingredients_text'] = recipes['NER'].apply(lambda x: ", ".join(eval(x)))


In [6]:
model = SentenceTransformer("all-MiniLM-L6-v2") 

embeddings = model.encode(recipes['ingredients_text'].head(50000).tolist(), 
                            batch_size=64, 
                            show_progress_bar=True,
                            convert_to_numpy=True)

# Convert to float32 for FAISS
embeddings = np.array(embeddings, dtype="float32")
embeddings = np.ascontiguousarray(embeddings)

print("Shape of embeddings:", embeddings.shape)

# Build FAISS index (L2 similarity)
index = faiss.IndexFlatL2(embeddings.shape[1])
index.add(embeddings)


Batches: 100%|██████████| 782/782 [01:12<00:00, 10.80it/s]


Shape of embeddings: (50000, 384)


In [7]:
recipe_map = {
    i: {
        "title": recipes.iloc[i]["title"],
        "ingredients": recipes.iloc[i]["ingredients"],
        "directions": recipes.iloc[i]["directions"],
        "link": recipes.iloc[i]["link"]
    }
    for i in range(len(embeddings))
}

In [8]:
user_ingredients = ["chicken", "tomato", "garlic", "onion"]

query = "Recipes with " + ", ".join(user_ingredients)
query_vec = model.encode([query], convert_to_numpy=True)
query_vec = np.ascontiguousarray(query_vec, dtype=np.float32)

k = 5
distances, indices = index.search(query_vec, k)

for rank, idx in enumerate(indices[0]):
    recipe = recipe_map[idx]
    print(f"Rank {rank+1}: {recipe['title']}")
    print("Ingredients:", recipe['ingredients'])
    print("Directions:", recipe['directions'])
    print("Link:", recipe['link'])
    print("---")


Rank 1: Chicken And Rice(Low-Fat)  
Ingredients: ["4 large tomatoes", "1 large red bell pepper", "2 medium onions", "2 c. uncooked rice", "salt and pepper", "garlic powder", "1 chicken bouillon", "4 boneless breasts of chicken"]
Directions: ["Cut up tomatoes into large chunks.", "Cut up pepper into strips. Cut up onions into rings.", "Dissolve chicken bouillon with 1 cup water.", "Combine ingredients and simmer until tomatoes dissolve. Add salt, pepper and garlic powder.", "Cut up chicken in strips and add.", "Simmer another 1/2 hour.", "Cook rice and add (or) serve it over the rice."]
Link: www.cookbooks.com/Recipe-Details.aspx?id=579749
---
Rank 2: Stuffed Potatoes
Ingredients: ["10 medium size potatoes", "sausage, uncooked", "1 can tomatoes", "1 can tomato sauce", "2 c. cooked mixed vegetables", "1 medium chopped onion", "1/2 c. chopped bell pepper", "garlic to taste", "salt and pepper to taste"]
Directions: ["Peel potatoes.", "Spiral or cut out center.", "Stuff with sausage (uncook

# Recipe NLG Vector DB with ingredients and direction

The code below creates a vector DB with FAISS from all the ingredients mentioned in a dish's recipe + plus the directions for the recipe. 

This might serve as the vector DB for Retrieval Augmented Generation of recipes using an LLM.

In [None]:
recipes['recipe_text'] = recipes.apply(
    lambda row: str(row['title'] if pd.notnull(row['title']) else "") + " " +
                " ".join(row['NER']) if isinstance(row['NER'], (list, tuple)) else "", axis=1)
                