In [1]:
from dotenv import load_dotenv
import chromadb
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter

In [2]:
load_dotenv()

True

# List documents text and add them to vector database

In [3]:
# Load and split documents (for larger datasets)
documents = [
    "RAG stands for Retrieval-Augmented Generation.",
    "The capital of France is Paris.",
    "Machine learning enables AI models to improve over time.",
    "Python is commonly used for data science and AI applications.",
]


In [4]:
text_splitter = CharacterTextSplitter(chunk_size=200, chunk_overlap=20)
docs = [{"content": doc} for doc in documents]

In [5]:
# Initialize ChromaDB
client = chromadb.PersistentClient(path="./chroma_db")  # Saves database locally
collection = client.get_or_create_collection(name="rag_demo")


In [6]:
# Use Sentence Transformers for embeddings
embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

# Store documents as embeddings in ChromaDB
for i, doc in enumerate(docs):
    collection.add(ids=[str(i)], documents=[doc["content"]], embeddings=[embedding_model.embed_query(doc["content"])])
    

  embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
  from .autonotebook import tqdm as notebook_tqdm
Add of existing embedding ID: 0
Add of existing embedding ID: 1
Add of existing embedding ID: 2
Add of existing embedding ID: 3
Add of existing embedding ID: 0
Add of existing embedding ID: 1
Add of existing embedding ID: 2
Add of existing embedding ID: 3
Insert of existing embedding ID: 0
Add of existing embedding ID: 0
Insert of existing embedding ID: 1
Add of existing embedding ID: 1
Insert of existing embedding ID: 2
Add of existing embedding ID: 2
Insert of existing embedding ID: 3
Add of existing embedding ID: 3


# Implement the Retriever

In [7]:
def retrieve_top_k(query, k=2):
    query_embedding = embedding_model.embed_query(query)
    results = collection.query(query_embeddings=[query_embedding], n_results=k)
    if results and results["documents"]:
        return results["documents"][0]
    return []

# Test retrieval
query = "What is RAG?"
retrieved_context = retrieve_top_k(query)
print("Retrieved Documents:", retrieved_context)

Retrieved Documents: ['RAG stands for Retrieval-Augmented Generation.', 'Python is commonly used for data science and AI applications.']


# Use GPT for Response Generation

In [15]:
import os
import requests

API_KEY: str = os.getenv("HUGGING_FACE_KEY", default="")

query = "What is RAG?"

generated_prompt_with_context = retrieve_top_k(query, 1)

API_URL = "https://api-inference.huggingface.co/models/gpt2"
headers = {"Authorization": f"Bearer {API_KEY}"}

rag_prompt = f"""
Using following context:
{"\n".join(generated_prompt_with_context)}\nAnswer the question: {query} 
"""

payload = {"inputs": rag_prompt}
response = requests.post(API_URL, headers=headers, json=payload)

response_data = response.json()
for choice in response_data[0]["generated_text"].split("\n"):
    print(choice.replace("Answer:", "").strip())



Using following context:
RAG stands for Retrieval-Augmented Generation.
Answer the question: What is RAG?
A repeating rusherer comes from the precomputed control that many of us have derived from our time spent in the sciences. A DRMN uses this control many times over its lifespan to partition an ALLC into additional subunits for analysis. As a result, as RAG are averaged over all our "one-to-one" lives, RAG will break down on a molecular level into every single LTSL LTSL RAG with a bounding mathematical term applicable within the LTSL.
Take RAG sequenced from its earliest formation (destruction) and combined with 100 genes. Then four more lecithin and Cohamot-Heissères proteins (amplify-reinfection-long-lived spindle protein) are inscribed. Colony weights of the six malaria genes show up over this extended length. About 80 alleles up to 11 are Merkel cells; 8 for Akkermans; 1 for Afrumans and 5 for Dakkarzone. The web of alignments that allows a plasmid to be identically mapped acro

# Using GEMINI model

In [None]:
from google import genai

# Initialize Bedrock client
API_KEY: str = os.getenv("GEMINI_API_KEY", default="")

rag_prompt = f"""
Using following context:
{"\n".join(generated_prompt_with_context)}\nAnswer the question: {query} 
"""

client = genai.Client(api_key=API_KEY)

response = client.models.generate_content(
    model='gemini-2.0-flash', contents=rag_prompt
)
print(response.text)