### Get the PDF

In [1]:
# Download PDF file
import os
import requests
from tqdm.auto import tqdm # for progress bars, requires !pip install tqdm 

def iterator(obj, istqdm = False):
    if istqdm:
        return tqdm(obj)
    else:
        return obj
    
# Get PDF document
pdf_path = "human-nutrition-text.pdf"

# Download PDF if it doesn't already exist
if not os.path.exists(pdf_path):
  print("File doesn't exist, downloading...")

  # The URL of the PDF you want to download
  url = "https://pressbooks.oer.hawaii.edu/humannutrition2/open/download?type=pdf"

  # The local filename to save the downloaded file
  filename = pdf_path

  # Send a GET request to the URL
  response = requests.get(url)

  # Check if the request was successful
  if response.status_code == 200:
      # Open a file in binary write mode and save the content to it
      with open(filename, "wb") as file:
          file.write(response.content)
      print(f"The file has been downloaded and saved as {filename}")
  else:
      print(f"Failed to download the file. Status code: {response.status_code}")
else:
  print(f"File {pdf_path} exists.")

File human-nutrition-text.pdf exists.


### Open and pre-process the text

In [2]:
# Requires !pip install PyMuPDF, see: https://github.com/pymupdf/pymupdf
import fitz # (pymupdf, found this is better than pypdf for our use case, note: licence is AGPL-3.0, keep that in mind if you want to use any code commercially)


def text_formatter(text: str) -> str:
    """Performs minor formatting on text."""
    cleaned_text = text.replace("\n", " ").strip() # note: this might be different for each doc (best to experiment)

    # Other potential text formatting functions can go here
    return cleaned_text

# Open PDF and get lines/pages
# Note: this only focuses on text, rather than images/figures etc
def open_and_read_pdf(pdf_path: str, istqdm = False) -> list[dict]:
    """
    Opens a PDF file, reads its text content page by page, and collects statistics.

    Parameters:
        pdf_path (str): The file path to the PDF document to be opened and read.

    Returns:
        list[dict]: A list of dictionaries, each containing the page number
        (adjusted), character count, word count, sentence count, token count, and the extracted text
        for each page.
    """
    doc = fitz.open(pdf_path)  # open a document
    pages_and_texts = []
    for page_number, page in iterator(enumerate(doc), istqdm):  # iterate the document pages
        text = page.get_text()  # get plain text encoded as UTF-8
        text = text_formatter(text)
        pages_and_texts.append({"page_number": page_number,  # adjust page numbers since our PDF starts on page 42
                                "page_char_count": len(text),
                                "page_word_count": len(text.split(" ")),
                                "page_sentence_count_raw": len(text.split(". ")),
                                "page_token_count": len(text) / 4,  # 1 token = ~4 chars, see: https://help.openai.com/en/articles/4936856-what-are-tokens-and-how-to-count-them
                                "text": text})
    return pages_and_texts


### Initialize NLP pipelines

In [3]:
from spacy.lang.en import English # see https://spacy.io/usage for install instructions

nlp = English()
# Add a sentencizer pipeline, see https://spacy.io/api/sentencizer/ 
nlp.add_pipe("sentencizer")

<spacy.pipeline.sentencizer.Sentencizer at 0x154928006d0>

In [4]:
def apply_spacy_nlp(pages_and_texts: dict, istqdm = False) -> dict:
    for item in iterator(pages_and_texts, istqdm):
        item["sentences"] = list(nlp(item["text"]).sents)
        
        # Make sure all sentences are strings
        item["sentences"] = [str(sentence) for sentence in item["sentences"]]
        
        # Count the sentences 
        item["page_sentence_count_spacy"] = len(item["sentences"])
    return pages_and_texts

### Chunking the document into sentences

In [5]:
# Define split size to turn groups of sentences into chunks
# Create a function that recursively splits a list into desired sizes
def split_list(input_list: list, 
               slice_size: int) -> list[list[str]]:
    """
    Splits the input_list into sublists of size slice_size (or as close as possible).

    For example, a list of 17 sentences would be split into two lists of [[10], [7]]
    """
    return [input_list[i:i + slice_size] for i in range(0, len(input_list), slice_size)]

def chunk_sentences(pages_and_texts: dict, num_sentence_chunk_size: int, istqdm = False) -> dict:
    # Loop through pages and texts and split sentences into chunks+
    for item in iterator(pages_and_texts, istqdm):
        item["sentence_chunks"] = split_list(input_list=item["sentences"],
                                            slice_size=num_sentence_chunk_size)
        item["num_chunks"] = len(item["sentence_chunks"])
    return pages_and_texts

### Assigning metadata to each chunk and restructuring the data structure into a list of chunks with metadata

In [6]:
import re
# Split each chunk into its own item
# This is used at the very end to use the indicies to reference the pages

def restructure_chunks(pages_and_texts: dict, istqdm = False) -> dict:
    pages_and_chunks = []
    
    for item in iterator(pages_and_texts, istqdm):
        for sentence_chunk in item["sentence_chunks"]:
            chunk_dict = {}
            chunk_dict["page_number"] = item["page_number"]
            
            # Join the sentences together into a paragraph-like structure, aka a chunk (so they are a single string)
            joined_sentence_chunk = "".join(sentence_chunk).replace("  ", " ").strip()
            joined_sentence_chunk = re.sub(r'\.([A-Z])', r'. \1', joined_sentence_chunk) # ".A" -> ". A" for any full-stop/capital letter combo 
            chunk_dict["sentence_chunk"] = joined_sentence_chunk

            # Get stats about the chunk
            chunk_dict["chunk_char_count"] = len(joined_sentence_chunk)
            chunk_dict["chunk_word_count"] = len([word for word in joined_sentence_chunk.split(" ")])
            chunk_dict["chunk_token_count"] = len(joined_sentence_chunk) / 4 # 1 token = ~4 characters
            
            pages_and_chunks.append(chunk_dict)
    return pages_and_chunks

### Filter out by the preset min_token_length

In [7]:
# it feels like this could be done more intelligently, but it's a good starting point
import pandas as pd

def filter_pages_and_texts(pages_and_chunks: dict, 
                           min_token_length: int) -> dict:
    df = pd.DataFrame(pages_and_chunks)
    pages_and_chunks_over_min_token_len = df[df["chunk_token_count"] > min_token_length].to_dict(orient="records")
    return pages_and_chunks_over_min_token_len

### Get and test the embedding model

In [8]:
# Requires !pip install sentence-transformers
from sentence_transformers import SentenceTransformer
embedding_model_name = "all-mpnet-base-v2"
embedding_model = SentenceTransformer(model_name_or_path=embedding_model_name, 
                                      device="cuda") # choose the device to load the model to (note: GPU will often be *much* faster than CPU)
#embedding_model.to("cuda")
def get_embedding(text):
    url = "http://localhost:49152/api/embeddings"
    payload = {
        "model": "nomic-embed-text",
        "prompt": text
    }
    response = requests.post(url, json=payload)
    return response.json()['embedding']

def apply_ollama_embeddings(pages_and_chunks_over_min_token_len: dict, 
                     istqdm = False) -> dict:
    
    for item in iterator(pages_and_chunks_over_min_token_len, istqdm):
        item["embedding"] = get_embedding(item['sentence_chunk'])
    
    return pages_and_chunks_over_min_token_len

def apply_embeddings(pages_and_chunks_over_min_token_len: dict, 
                     embedding_model: SentenceTransformer,
                     istqdm = False,
                     flatten = False) -> dict:
    if not flatten:
        for item in iterator(pages_and_chunks_over_min_token_len, istqdm):
            item["embedding"] = embedding_model.encode(item["sentence_chunk"],
                                                batch_size=32,
                                                convert_to_tensor=True)
    else:
        text_chunks = [item["sentence_chunk"] for item in pages_and_chunks_over_min_token_len]
        embeddings = embedding_model.encode(text_chunks)
        print(embeddings)
        for embedding, item in iterator(zip(embeddings, pages_and_chunks_over_min_token_len), istqdm):
            item["embedding"] = embedding
    
    return pages_and_chunks_over_min_token_len

In [9]:
from functools import reduce

pages_and_texts = open_and_read_pdf(pdf_path=pdf_path)
pages_and_texts = apply_spacy_nlp(pages_and_texts)
pages_and_texts = chunk_sentences(pages_and_texts,num_sentence_chunk_size = 10 )
pages_and_chunks = restructure_chunks(pages_and_texts)
pages_and_chunks = filter_pages_and_texts(pages_and_chunks,30)
embedded_pages_and_chunks = apply_embeddings(pages_and_chunks,embedding_model,flatten=True)

[[ 0.06742426  0.09022821 -0.00509549 ... -0.02211546 -0.02321364
   0.01256908]
 [ 0.0552156   0.0592139  -0.01661676 ... -0.01204061 -0.01028473
   0.02273969]
 [ 0.02798019  0.03398141 -0.02064265 ... -0.00536189  0.02125598
   0.03130551]
 ...
 [ 0.07705151  0.00978554 -0.01218174 ... -0.04086804 -0.07517631
  -0.02405259]
 [ 0.10304512 -0.0164702   0.00826845 ... -0.05742176 -0.02828029
  -0.02946858]
 [ 0.08637737 -0.01253593 -0.01127469 ... -0.05223795 -0.03367297
  -0.02986604]]


### Connect to chromadb

In [10]:
import chromadb

chroma_client = chromadb.HttpClient(host='localhost', port=49151)

In [11]:
chroma_client.delete_collection(name='testing_python_creation')

In [12]:
collection = chroma_client.get_or_create_collection(name='testing_python_creation')

### Save embeddings

In [13]:
embedding_model_name = "all-mpnet-base-v2"
path = 'C:\\Users\\crossfire234\\Desktop\\Software\\LLMs\\simple-local-rag\\simple-local-rag\\human-nutrition-text.pdf'
file_name = os.path.basename(path)
collection.add(
    documents=[item['sentence_chunk'] for item in embedded_pages_and_chunks],
    metadatas=[{
        'page_number': item['page_number'],
        'char_count': item['chunk_char_count'],
        'word_count': item['chunk_word_count'],
        'token_count': item['chunk_token_count'],
        'embedding_model': embedding_model_name,
        'url': path
    } for item in embedded_pages_and_chunks],
    ids=[f"{file_name}_chunk_{i}" for i in range(len(embedded_pages_and_chunks))],
    embeddings=[item['embedding'].tolist() for item in embedded_pages_and_chunks]
)

### Query

In [14]:
query = "macronutrients functions"
query_embedding = embedding_model.encode(query)

In [15]:
results = collection.query(
    query_embeddings=[query_embedding.tolist()],
    n_results=5
)
for result in results['documents'][0]:
    print(result)

Macronutrients Nutrients that are needed in large amounts are called macronutrients. There are three classes of macronutrients: carbohydrates, lipids, and proteins. These can be metabolically processed into cellular energy. The energy from macronutrients comes from their chemical bonds. This chemical energy is converted into cellular energy that is then utilized to perform work, allowing our bodies to conduct their basic functions. A unit of measurement of food energy is the calorie. On nutrition food labels the amount given for “calories” is actually equivalent to each calorie multiplied by one thousand. A kilocalorie (one thousand calories, denoted with a small “c”) is synonymous with the “Calorie” (with a capital “C”) on nutrition food labels. Water is also a macronutrient in the sense that you require a large amount of it, but unlike the other macronutrients, it does not yield calories. Carbohydrates Carbohydrates are molecules composed of carbon, hydrogen, and oxygen.
Water There 

In [16]:
embedding_model_name = "all-mpnet-base-v2"

### Checking local GPU memory availability

Let's find out what hardware we've got available and see what kind of model(s) we'll be able to load.

> **Note:** You can also check this with the `!nvidia-smi` command.

In [17]:
# Get GPU available memory
import torch
gpu_memory_bytes = torch.cuda.get_device_properties(0).total_memory
gpu_memory_gb = round(gpu_memory_bytes / (2**30))
print(f"Available GPU memory: {gpu_memory_gb} GB")

Available GPU memory: 11 GB


### Pick gemma model based on GPU memory

In [18]:
# Note: the following is Gemma focused, however, there are more and more LLMs of the 2B and 7B size appearing for local use.
if gpu_memory_gb < 5.1:
    print(f"Your available GPU memory is {gpu_memory_gb}GB, you may not have enough memory to run a Gemma LLM locally without quantization.")
elif gpu_memory_gb < 8.1:
    print(f"GPU memory: {gpu_memory_gb} | Recommended model: Gemma 2B in 4-bit precision.")
    use_quantization_config = True 
    model_id = "google/gemma-2b-it"
elif gpu_memory_gb < 19.0:
    print(f"GPU memory: {gpu_memory_gb} | Recommended model: Gemma 2B in float16 or Gemma 7B in 4-bit precision.")
    use_quantization_config = False 
    model_id = "google/gemma-2b-it"
elif gpu_memory_gb > 19.0:
    print(f"GPU memory: {gpu_memory_gb} | Recommend model: Gemma 7B in 4-bit or float16 precision.")
    use_quantization_config = False 
    model_id = "google/gemma-7b-it"

print(f"use_quantization_config set to: {use_quantization_config}")
print(f"model_id set to: {model_id}")

GPU memory: 11 | Recommended model: Gemma 2B in float16 or Gemma 7B in 4-bit precision.
use_quantization_config set to: False
model_id set to: google/gemma-2b-it


### Authenticating hugging face account

In [19]:
from huggingface_hub import login
# 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
token = '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'
login(token)

Token has not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
Token is valid (permission: read).
Your token has been saved to C:\Users\crossfire234\.cache\huggingface\token
Login successful


### Ollama API
- how do I get the tokenizer
- what configuration is available
https://github.com/ollama/ollama/blob/main/docs/api.md#generate-a-completion
- gpu config
docker run -d --gpus=all -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama

- how do I write this as a docker compose
- what was the way to persist storage on chroma update?
https://www.youtube.com/watch?v=61kaK-e3Owc&t=634s (Seems to just require the host volume to be mounted)
```volumes:
      - ./chroma_data:/chroma/chroma
```

#### Raw mode
https://github.com/ollama/ollama/blob/main/docs/api.md#request-raw-mode


### Load the LLM model

In [20]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from transformers.utils import is_flash_attn_2_available 

# 1. Create quantization config for smaller model loading (optional)
# Requires !pip install bitsandbytes accelerate, see: https://github.com/TimDettmers/bitsandbytes, https://huggingface.co/docs/accelerate/
# For models that require 4-bit quantization (use this if you have low GPU memory available)
from transformers import BitsAndBytesConfig
quantization_config = BitsAndBytesConfig(load_in_4bit=True,
                                         bnb_4bit_compute_dtype=torch.float16)

# Bonus: Setup Flash Attention 2 for faster inference, default to "sdpa" or "scaled dot product attention" if it's not available
# Flash Attention 2 requires NVIDIA GPU compute capability of 8.0 or above, see: https://developer.nvidia.com/cuda-gpus
# Requires !pip install flash-attn, see: https://github.com/Dao-AILab/flash-attention 
if (is_flash_attn_2_available()) and (torch.cuda.get_device_capability(0)[0] >= 8):
  attn_implementation = "flash_attention_2"
else:
  attn_implementation = "sdpa"
print(f"[INFO] Using attention implementation: {attn_implementation}")

# 2. Pick a model we'd like to use (this will depend on how much GPU memory you have available)
#model_id = "google/gemma-7b-it"
model_id = model_id # (we already set this above)
print(f"[INFO] Using model_id: {model_id}")

# 3. Instantiate tokenizer (tokenizer turns text into numbers ready for the model) 
tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=model_id)

# 4. Instantiate the model
llm_model = AutoModelForCausalLM.from_pretrained(pretrained_model_name_or_path=model_id, 
                                                 torch_dtype=torch.float16, # datatype to use, we want float16
                                                 quantization_config=quantization_config if use_quantization_config else None,
                                                 low_cpu_mem_usage=False, # use full memory 
                                                 attn_implementation=attn_implementation) # which attention version to use

if not use_quantization_config: # quantization takes care of device setting automatically, so if it's not used, send model to GPU 
    llm_model.to("cuda")

[INFO] Using attention implementation: sdpa
[INFO] Using model_id: google/gemma-2b-it


Gemma's activation function should be approximate GeLU and not exact GeLU.
Changing the activation function to `gelu_pytorch_tanh`.if you want to use the legacy `gelu`, edit the `model.config` to set `hidden_activation=gelu`   instead of `hidden_act`. See https://github.com/huggingface/transformers/pull/29402 for more details.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [19]:
llm_model

GemmaForCausalLM(
  (model): GemmaModel(
    (embed_tokens): Embedding(256000, 2048, padding_idx=0)
    (layers): ModuleList(
      (0-17): 18 x GemmaDecoderLayer(
        (self_attn): GemmaSdpaAttention(
          (q_proj): Linear(in_features=2048, out_features=2048, bias=False)
          (k_proj): Linear(in_features=2048, out_features=256, bias=False)
          (v_proj): Linear(in_features=2048, out_features=256, bias=False)
          (o_proj): Linear(in_features=2048, out_features=2048, bias=False)
          (rotary_emb): GemmaRotaryEmbedding()
        )
        (mlp): GemmaMLP(
          (gate_proj): Linear(in_features=2048, out_features=16384, bias=False)
          (up_proj): Linear(in_features=2048, out_features=16384, bias=False)
          (down_proj): Linear(in_features=16384, out_features=2048, bias=False)
          (act_fn): PytorchGELUTanh()
        )
        (input_layernorm): GemmaRMSNorm()
        (post_attention_layernorm): GemmaRMSNorm()
      )
    )
    (norm): GemmaR

### Print number of model parameters

In [21]:
def get_model_num_params(model: torch.nn.Module):
    return sum([param.numel() for param in model.parameters()])

get_model_num_params(llm_model)

2506172416

In [22]:
def get_model_mem_size(model: torch.nn.Module):
    """
    Get how much memory a PyTorch model takes up.

    See: https://discuss.pytorch.org/t/gpu-memory-that-model-uses/56822
    """
    # Get model parameters and buffer sizes
    mem_params = sum([param.nelement() * param.element_size() for param in model.parameters()])
    mem_buffers = sum([buf.nelement() * buf.element_size() for buf in model.buffers()])

    # Calculate various model sizes
    model_mem_bytes = mem_params + mem_buffers # in bytes
    model_mem_mb = model_mem_bytes / (1024**2) # in megabytes
    model_mem_gb = model_mem_bytes / (1024**3) # in gigabytes

    return {"model_mem_bytes": model_mem_bytes,
            "model_mem_mb": round(model_mem_mb, 2),
            "model_mem_gb": round(model_mem_gb, 2)}

get_model_mem_size(llm_model)

{'model_mem_bytes': 5012344832, 'model_mem_mb': 4780.14, 'model_mem_gb': 4.67}

### Query the LLM with the test prompt to get the embedding output

### Test questions (queries)

In [23]:
# Nutrition-style questions generated with GPT4
gpt4_questions = [
    "What are the macronutrients, and what roles do they play in the human body?",
    "How do vitamins and minerals differ in their roles and importance for health?",
    "Describe the process of digestion and absorption of nutrients in the human body.",
    "What role does fibre play in digestion? Name five fibre containing foods.",
    "Explain the concept of energy balance and its importance in weight management."
]

# Manually created question list
manual_questions = [
    "How often should infants be breastfed?",
    "What are symptoms of pellagra?",
    "How does saliva help with digestion?",
    "What is the RDI for protein per day?",
    "water soluble vitamins"
]

query_list = gpt4_questions + manual_questions

### Create a function to apply a base prompt to every chat

In [24]:
def prompt_formatter(query: str, 
                     context_items: list[dict]) -> str:
    """
    Augments query with text-based context from context_items.
    """
    # Join context items into one dotted paragraph
    context = "- " + "\n- ".join(context_items)

    # Create a base prompt with examples to help the model
    # Note: this is very customizable, I've chosen to use 3 examples of the answer style we'd like.
    # We could also write this in a txt file and import it in if we wanted.
    base_prompt = """Based on the following context items, please answer the query.
Give yourself room to think by extracting relevant passages from the context before answering the query.
Don't return the thinking, only return the answer.
Make sure your answers are as explanatory as possible.
Use the following examples as reference for the ideal answer style.
\nExample 1:
Query: What are the fat-soluble vitamins?
Answer: The fat-soluble vitamins include Vitamin A, Vitamin D, Vitamin E, and Vitamin K. These vitamins are absorbed along with fats in the diet and can be stored in the body's fatty tissue and liver for later use. Vitamin A is important for vision, immune function, and skin health. Vitamin D plays a critical role in calcium absorption and bone health. Vitamin E acts as an antioxidant, protecting cells from damage. Vitamin K is essential for blood clotting and bone metabolism.
\nExample 2:
Query: What are the causes of type 2 diabetes?
Answer: Type 2 diabetes is often associated with overnutrition, particularly the overconsumption of calories leading to obesity. Factors include a diet high in refined sugars and saturated fats, which can lead to insulin resistance, a condition where the body's cells do not respond effectively to insulin. Over time, the pancreas cannot produce enough insulin to manage blood sugar levels, resulting in type 2 diabetes. Additionally, excessive caloric intake without sufficient physical activity exacerbates the risk by promoting weight gain and fat accumulation, particularly around the abdomen, further contributing to insulin resistance.
\nExample 3:
Query: What is the importance of hydration for physical performance?
Answer: Hydration is crucial for physical performance because water plays key roles in maintaining blood volume, regulating body temperature, and ensuring the transport of nutrients and oxygen to cells. Adequate hydration is essential for optimal muscle function, endurance, and recovery. Dehydration can lead to decreased performance, fatigue, and increased risk of heat-related illnesses, such as heat stroke. Drinking sufficient water before, during, and after exercise helps ensure peak physical performance and recovery.
\nNow use the following context items to answer the user query:
{context}
\nRelevant passages: <extract relevant passages from the context here>
User query: {query}
Answer:"""

    # Update base prompt with context items and query   
    base_prompt = base_prompt.format(context=context, query=query)

    # Create prompt template for instruction-tuned model
    dialogue_template = [
        {"role": "user",
        "content": base_prompt}
    ]

    # Apply the chat template
    prompt = tokenizer.apply_chat_template(conversation=dialogue_template,
                                          tokenize=False,
                                          add_generation_prompt=True)
    return prompt

### Query the LLM with context

In [25]:
def ask(query,
        embedding_model,
        collection,
        temperature=0.7,
        max_new_tokens=512,
        format_answer_text=True, 
        return_answer_only=True):
    """
    Takes a query, finds relevant resources/context and generates an answer to the query based on the relevant resources.
    """
    
    # Get just the scores and indices of top related results
    query_embedding = embedding_model.encode(query)
    
    # Create a list of context items
    results = collection.query(
    query_embeddings=[query_embedding.tolist()],
    n_results=5
)
    context_items = [result for result in results['documents'][0]]

    # Add score to context item
    """ for i, item in enumerate(context_items):
        item["score"] = scores[i].cpu() # return score back to CPU  """
        
    # Format the prompt with context items
    prompt = prompt_formatter(query=query,
                              context_items=context_items)
    
    # Tokenize the prompt
    input_ids = tokenizer(prompt, return_tensors="pt").to("cuda")

    # Generate an output of tokens
    outputs = llm_model.generate(**input_ids,
                                 temperature=temperature,
                                 do_sample=True,
                                 max_new_tokens=max_new_tokens)
    
    # Turn the output tokens into text
    output_text = tokenizer.decode(outputs[0])

    if format_answer_text:
        # Replace special tokens and unnecessary help message
        output_text = output_text.replace(prompt, "").replace("<bos>", "").replace("<eos>", "").replace("Sure, here is the answer to the user query:\n\n", "")

    # Only return the answer without the context items
    if return_answer_only:
        return output_text
    
    return output_text, context_items

In [26]:
# Define helper function to print wrapped text 
import textwrap

def print_wrapped(text, wrap_length=80):
    wrapped_text = textwrap.fill(text, wrap_length)
    print(wrapped_text)

In [27]:
from numpy import random
query = random.choice(query_list)
for query in query_list:
    print(f"Query: {query}")

    # Answer query with context and return context 
    answer, context_items = ask(query,
                                embedding_model, 
                                collection,
                                temperature=0.7,
                                max_new_tokens=512,
                                return_answer_only=False)

    print(f"Answer:\n")
    print_wrapped(answer)
    print(f"Context items:")
    context_items

Query: What are the macronutrients, and what roles do they play in the human body?


  attn_output = torch.nn.functional.scaled_dot_product_attention(


Answer:

Sure, here's the information from the context regarding the macronutrients and
their roles in the human body:  **Macronutrients**  * Carbohydrates: Provide
energy for various bodily functions. * Lipids: Assist in the formation of cell
structure and provide stored energy. * Proteins: Are essential for tissue
formation, cell repair, and hormone and enzyme production.  **Water**  * Water
is crucial for maintaining blood volume, regulating body temperature, and
facilitating chemical reactions in the body.  * Over 60% of total body weight is
water, making it of utmost importance for bodily functions.
Context items:
Query: How do vitamins and minerals differ in their roles and importance for health?
Answer:

The context does not provide relevant passages about the differences between
vitamins and minerals, so I cannot answer this question from the provided
context.
Context items:
Query: Describe the process of digestion and absorption of nutrients in the human body.
Answer:

Sure, h