# Dietary Restriction AI

### Import Packages

In [1]:
pip install langchain_community

Note: you may need to restart the kernel to use updated packages.


In [2]:
pip install pandas

Note: you may need to restart the kernel to use updated packages.


In [3]:
pip install pyspark

Note: you may need to restart the kernel to use updated packages.


In [36]:
from langchain_community.llms import Ollama
import os
import json
import pandas as pd 
import numpy as np
import tempfile 
from pyspark.sql import SparkSession

from pyspark.sql import functions as f
from sentence transformers import SentenceTransformer


ModuleNotFoundError: No module named 'sentence_transformers'

### Initialize a Spark Session

In [5]:
spark = SparkSession.builder \
    .appName("Local Spark") \
    .config("spark.driver.memory", "4g") \
    .config("spark.executor.memory", "4g") \
    .getOrCreate()

Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
25/03/30 13:18:02 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable


### Data Ingestion
In order to be compatible with LLM that we will be creating, the data needs to be processed to be in an efficient retrieval format and stored in a searchable index. 

##### Recipe Data
Our recipe data is sourced from web-scraped data containing

In [26]:
all_files = os.listdir("./")
recipe_files = [file for file in all_files if "recipes_raw_nosource" in file]

df_list = []

for file_name in recipe_files: 
    temp_path = os.path.join(tempfile.gettempdir(), file_name)
    
    with open(file_name, "r", encoding="utf-8") as file:
        data = json.load(file)

    file_df = pd.DataFrame.from_dict(data, orient="index")
    df_list.append(file_df)  # Collect DataFrame
    
# Concatenate all dataframes
recipes_df = pd.concat(df_list)

# select only title, ingredient, instructions columns
recipes_df = recipes_df[['title', 'ingredients', 'instructions']]

# repartition the dataframe 
recipes_df = spark.createDataFrame(recipes_df)
recipes_df = recipes_df.repartition(100)

recipes_df.show()

25/03/30 14:29:56 WARN TaskSetManager: Stage 16 contains a task of very large size (16301 KiB). The maximum recommended task size is 1000 KiB.


+--------------------+--------------------+--------------------+
|               title|         ingredients|        instructions|
+--------------------+--------------------+--------------------+
|  Baked Greens Chips|[6 to 8 ounces he...|Watch how to make...|
|Sweet Potato-Chic...|[2 large sweet po...|To prepare the ha...|
|         Cali Burger|[1/4 cup mayonnai...|For the chipotle ...|
|Oatmeal Cream Che...|[2 sticks unsalte...|Preheat the oven ...|
|    Campari Spritzer|[1 (12-ounce) can...|Stir the orange j...|
|Seared Rack of La...|[1/2 cup pistachi...|Watch how to make...|
|         Cream Puffs|[6 tablespoons un...|Special equipment...|
|Italian Style Hot...|[Cooking oil, sui...|In a saucepan ove...|
|    Crab Cakes Salad|[2 tablespoons fi...|For the salad: Co...|
|      Hot Cross Buns|[2 ounces fresh y...|Crumble the yeast...|
|Strawberries with...|[4 pints (8 cups)...|Thirty minutes to...|
|       Curry Chicken|[2 pounds chicken...|Put the sliced ch...|
|Golden Squash Blo...|[1 

##### Cooking Literature Data

The cooking literature data was pre-processed from PDF text files into a usable format in another notebook.

In [18]:
cook_lit_files = [file for file in all_files if "chunked_data" in file]

cook_lit_df = pd.read_json(cook_lit_files[0])
cook_lit_df = spark.createDataFrame(cook_lit_df)
cook_lit_df.show()

+--------------------+--------------------+--------------------+--------------------+------+
|           file_name|            metadata|            chunk_id|                body|tables|
+--------------------+--------------------+--------------------+--------------------+------+
|ADVANCES_IN_FOOD_...|{date -> NULL, ti...|ADVANCES_IN_FOOD_...|# HIVAICES I # FO...|    []|
|ADVANCES_IN_FOOD_...|{date -> NULL, ti...|ADVANCES_IN_FOOD_...|# ADVANCES IN FOO...|    []|
|ADVANCES_IN_FOOD_...|{date -> NULL, ti...|ADVANCES_IN_FOOD_...|     NO_CONTENT_HERE|    []|
|ADVANCES_IN_FOOD_...|{date -> NULL, ti...|ADVANCES_IN_FOOD_...|# ADVANCES IN FOO...|    []|
|ADVANCES_IN_FOOD_...|{date -> NULL, ti...|ADVANCES_IN_FOOD_...|# CRC Press # Tay...|    []|
|ADVANCES_IN_FOOD_...|{date -> NULL, ti...|ADVANCES_IN_FOOD_...|en made to publis...|    []|
|ADVANCES_IN_FOOD_...|{date -> NULL, ti...|ADVANCES_IN_FOOD_...|. Except as permi...|    []|
|ADVANCES_IN_FOOD_...|{date -> NULL, ti...|ADVANCES_IN_FOOD_...|nter, 

25/03/30 13:34:45 WARN TaskSetManager: Stage 8 contains a task of very large size (1335 KiB). The maximum recommended task size is 1000 KiB.
Exception ignored in: <_io.BufferedWriter name=5>
Traceback (most recent call last):
  File "/Users/jenny/ollama-env/lib/python3.13/site-packages/pyspark/python/lib/pyspark.zip/pyspark/daemon.py", line 193, in manager
BrokenPipeError: [Errno 32] Broken pipe


### Data Chunking
##### Recipes Data
The data was chunked into recipe-level chunks, since the recipes will then be able toi be referenced individually when needed. Since this use case is about modifying recipes in their entirety, we want the model to be able to reference the recipes in their entirety during its retrieval process. 

In [38]:
recipes_df_chunk = recipes_df.withColumn("chunk_text", 
                                         f.concat_ws("\n", f.col("title"), f.col("ingredients"), f.col("instructions")))
recipes_df_chunk.show()

25/03/30 14:49:10 WARN TaskSetManager: Stage 28 contains a task of very large size (16301 KiB). The maximum recommended task size is 1000 KiB.


+--------------------+--------------------+--------------------+--------------------+
|               title|         ingredients|        instructions|          chunk_text|
+--------------------+--------------------+--------------------+--------------------+
|  Baked Greens Chips|[6 to 8 ounces he...|Watch how to make...|Baked Greens Chip...|
|Sweet Potato-Chic...|[2 large sweet po...|To prepare the ha...|Sweet Potato-Chic...|
|         Cali Burger|[1/4 cup mayonnai...|For the chipotle ...|Cali Burger\n1/4 ...|
|Oatmeal Cream Che...|[2 sticks unsalte...|Preheat the oven ...|Oatmeal Cream Che...|
|    Campari Spritzer|[1 (12-ounce) can...|Stir the orange j...|Campari Spritzer\...|
|Seared Rack of La...|[1/2 cup pistachi...|Watch how to make...|Seared Rack of La...|
|         Cream Puffs|[6 tablespoons un...|Special equipment...|Cream Puffs\n6 ta...|
|Italian Style Hot...|[Cooking oil, sui...|In a saucepan ove...|Italian Style Hot...|
|    Crab Cakes Salad|[2 tablespoons fi...|For the sal

### Generate Embeddings

In [None]:
# load model and generate embeddings
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
embeddings = np.array([embedding_model.encode(chunk["body"]) for chunk in chunked_data], dtype=np.float32)

# Store embeddings in chunked JSON
for i, chunk in enumerate(chunked_data):
    chunk["embedding"] = embeddings[i].tolist()
    
# Create and save FAISS index
embedding_dim = embeddings.shape[1]
index = faiss.IndexFlatL2(embedding_dim)
index.add(embeddings)

### Model Ingestion

In [8]:
llm = Ollama(model="llama3.2")
print("Loaded Model")

Loaded Model


  llm = Ollama(model="llama3.2")


##### Prompt Engineering
The prompt inputted by the user should only need to contain the necessary recipe that the user wants to modify. The following prompt engineering code adds additional, consistent language that does the following: 
- Specifies that the user wants to modify the recipe, retaining the original intention
- Provides the dietary framework to stick to, in this case the high-protein low-carb diet. In another phase of development, this could be changed to xspecify a diet of choice
- Requests a list of macronutrients based on the data

To modify the pancake recipe to suit a high-protein, low-carb diet, we'll make some key changes:

1. Replace all-purpose flour with an almond flour-based mixture: Almond flour is a good source of protein and has a lower carb content compared to traditional flour.
2. Use protein-rich milk alternative: We'll replace regular milk with unsweetened almond milk or another low-carb milk alternative, such as coconut milk.
3. Add more protein-rich ingredients: Introduce some protein powder (e.g., whey or pea) to boost the protein content of each pancake.

Here's the modified recipe:

Ingredients:

* 1 ½ cups almond flour
* 3 ½ teaspoons baking powder (make sure it's sugar-free)
* 1 tablespoon coconut sugar (or another low-carb sweetener)
* ¼ teaspoon salt
* 1/4 cup protein powder (whey or pea-based)
* 1 ¼ cups unsweetened almond milk
* 3 tablespoons melted butter
* 1 large egg

Directions:

1. Sift the almond flour, baking powder, coconut sugar, and salt together in a large bowl.
2. Make a well

##### RAG Component

In [None]:
def retrieve_relevant_chunks(query, k=5):
    """Retrieve top-k most relevant chunks using FAISS."""
    query_embedding = embedding_model.encode(query).reshape(1, -1)  # Convert query to embedding
    distances, indices = index.search(query_embedding, k)  # Retrieve top-k chunks

    return [chunked_data[i] for i in indices[0]]  # Get original text chunks

def query_ollama_with_context(query):
    """Retrieve relevant context and query Ollama 3.2."""
    retrieved_chunks = retrieve_relevant_chunks(query)
    context = "\n".join([chunk["body"] for chunk in retrieved_chunks])  # Combine relevant chunks

    # Formulate prompt for LLaMA
    prompt = f"Context:\n{context}\n\nQuery: {query}\nAnswer:"

    # Query Ollama
    response = ollama.chat(model="llama3", messages=[{"role": "user", "content": prompt}])
    return response["message"]["content"]

if __name__ == "__main__":
    query = input("Enter your recipe: ")
    query += " Modify this recipe so that it is more suited for a high-protein, low carb diet. Provide a list of macronutrients as a part of the analysis.
    answer = query_ollama_with_context(query)
    print("\nOllama's Answer:", answer)

### Recipe Evaluator

In [2]:
def evaluate_recipe(protein_g, fat_g, carb_g):
    # Caloric values per gram
    PROTEIN_CAL = 4
    CARB_CAL = 4
    FAT_CAL = 9
    
    # Calculate total calories
    total_calories = (protein_g * PROTEIN_CAL) + (fat_g * FAT_CAL) + (carb_g * CARB_CAL)
    
    if total_calories == 0:
        return "Invalid recipe: Total calories cannot be zero."
    
    # Calculate macronutrient percentage
    protein_pct = (protein_g * PROTEIN_CAL / total_calories) * 100
    fat_pct = (fat_g * FAT_CAL / total_calories) * 100
    carb_pct = (carb_g * CARB_CAL / total_calories) * 100
    
    # Define healthy ranges
    protein_range = (10, 30)
    fat_range = (20, 35)
    carb_range = (45, 65)
    
    # Check if recipe meets healthy criteria
    if (protein_range[0] <= protein_pct <= protein_range[1] and
        fat_range[0] <= fat_pct <= fat_range[1] and
        carb_range[0] <= carb_pct <= carb_range[1]):
        return "Meets Criteria"
    else:
        return "Does Not Meet Criteria"
# Example usage
recipe_result = evaluate_recipe(protein_g=3, fat_g=20, carb_g=100)
print(recipe_result)

Does Not Meet Criteria
