##Retrival Augmentd Generation

In [81]:
from dotenv import load_dotenv
import os
from pathlib import Path

from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.llms.groq import Groq
from llama_index.core import (
    Settings,
    VectorStoreIndex,
    SimpleDirectoryReader,
    StorageContext,
    load_index_from_storage,
)

# 1) Env
load_dotenv()
groq_key = os.getenv("GROQ_API_KEY")
assert groq_key, "GROQ_API_KEY not set in .env"

# 2) Global model settings
Settings.embed_model = HuggingFaceEmbedding(
    model_name="sentence-transformers/all-MiniLM-L6-v2"
)
Settings.llm = Groq(
    model="llama-3.1-8b-instant",
    api_key=groq_key,
)

# 3) Load or build index
PERSIST_DIR = "storage"

if Path(PERSIST_DIR).exists():
    storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
    index = load_index_from_storage(storage_context)
else:
    documents = SimpleDirectoryReader("Data", recursive=True).load_data()
    index = VectorStoreIndex.from_documents(documents, show_progress=True)
    index.storage_context.persist(persist_dir=PERSIST_DIR)

query_engine = index.as_query_engine()


2025-12-12 09:05:12,280 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2
2025-12-12 09:05:23,053 - INFO - Loading all indices.


In [82]:
response = query_engine.query("What is heart made up of")
print(response)


2025-12-12 09:05:49,778 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"


The heart is made up of three main layers: the outermost layer, the middle layer responsible for pumping action, and the innermost layer that provides a smooth lining for the chambers and covers the valves.
