### NutriChat - Data Ingestion & Retrieval Implementation
This notebook implements the core RAG system components:
1. Loading processed nutrition data into MinSearch
2. Setting up search index with nutritional fields
3. Implementing retrieval functions with field-specific boosts
4. Building RAG pipeline with search, prompt, and LLM components

In [None]:
import requests 
import minsearch

docs_url = 'https://raw.githubusercontent.com/milanimcgraw/NutriChat/refs/heads/main/nutritionfacts.json'
docs_response = requests.get(docs_url)
nutrition_data = docs_response.json()

<minsearch.Index at 0x1d9c9bd8890>

### Knowledge Base

In [None]:
documents = []

for item in nutrition_data['nutritionfacts']:
    documents.append(item)

In [None]:
documents[0]

In [None]:
#Create Index
index = minsearch.Index(
    text_fields=["Food", "Measure", "Category", "Grams", "Calories", "Protein", "Fat", "Sat.Fat", "Fiber", "Carbs"],
    keyword_fields=["Category"]
)

In [None]:
query = 'What foods are high in protein and low in fat?'

In [None]:
# Fit Index
index.fit(documents)

In [None]:
from openai import OpenAI

client = OpenAI()

response = client.chat.completions.create(
    model='gpt-4',
    messages=[{"role": "user", "content": query}]
)


response.choices[0].message.content

In [None]:
# Search Function
def search(query):
    boost = {
        'Food': 3.0,        
        'Measure': 2.0,     
        'Grams': 1.0,       
        'Calories': 2.5,    
        'Protein': 2.0,     
        'Fat': 2.0,         
        'Sat.Fat': 1.5,     
        'Fiber': 1.5,       
        'Carbs': 2.0,       
        'Category': 1.0     
    }

    results = index.search(
        query=query,
        filter_dict=None,
        boost_dict=boost,
        num_results=5
    )

    return results

In [None]:
#Build Prompt
def build_prompt(query, search_results):
    prompt_template = """
    You're a nutritionist working as an assistant. Answer the QUESTION based on the CONTEXT from the nutrition data.
    Use only the facts from the CONTEXT when answering the QUESTION. Be specific about measurements and values.

    QUESTION: {question}

    CONTEXT: 
    {context}
    """.strip()

    context = ""
    for doc in search_results:
        context = context + f"Food: {doc['Food']}\n"
        context = context + f"Measure: {doc['Measure']}\n"
        context = context + f"Nutritional Information: {doc['Calories']} calories, {doc['Protein']}g protein, "
        context = context + f"{doc['Fat']}g fat, {doc['Sat.Fat']}g saturated fat, "
        context = context + f"{doc['Fiber']}g fiber, {doc['Carbs']}g carbohydrates\n"
        context = context + f"Category: {doc['Category']}\n\n"
    
    prompt = prompt_template.format(question=query, context=context).strip()
    return prompt

In [None]:
# LLM Function
def llm(prompt):
    response = client.chat.completions.create(
        model='gpt-4',
        messages=[{"role": "user", "content": prompt}]
    )
    
    return response.choices[0].message.content

In [None]:
# RAG Function
def rag(query):
    search_results = search(query)
    prompt = build_prompt(query, search_results)
    answer = llm(prompt)
    return answer

In [None]:
# MinSearch - Test Pipeline
def test_minsearch_pipeline():
    query = "What are some low-calorie options in dairy products?"
    answer, results = rag(query)
    print("\nTest MinSearch Pipeline:")
    print("Query:", query)
    print("\nAI Response:")
    print(answer)
    print("\nTop Results Used:")
    for i, result in enumerate(results, 1):
        print(f"\n{i}. {result['Food']} ({result['Category']})")