# RAG Model (Pipeline)

In [50]:
# Install required libraries
!pip install PyPDF2 chromadb google-generativeai python-dotenv

Defaulting to user installation because normal site-packages is not writeable



[notice] A new release of pip is available: 25.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


# Step1: Indexing

In [51]:
# Import libraries and configure API key from .env for embedding and storage

# Import required libraries
from PyPDF2 import PdfReader
from chromadb import PersistentClient
from typing import List
import google.generativeai as genai
from google.generativeai import GenerativeModel, embed_content
from dotenv import load_dotenv
import os

# Load environment variables from .env file
load_dotenv()

# Get API key from .env
api_key = os.getenv("GEMINI_API_KEY")
if not api_key:
    raise ValueError("GEMINI_API_KEY not found in .env file. Please check your .env file and ensure it contains a valid key.")

# Configure the Gemini API key
from google.generativeai import configure
configure(api_key=api_key)

# Custom embedding function for Gemini
def gemini_embedding_function(texts: List[str]):
    embeddings = embed_content(model="models/embedding-001", content=texts, task_type="RETRIEVAL_DOCUMENT")
    # Debug: Print the type and structure of embeddings
    print(f"Embeddings type: {type(embeddings)}, Content: {embeddings}")
    # Extract the embedding vector list directly
    return embeddings['embedding']

# Initialize ChromaDB client
client = PersistentClient(path="./chroma_store")

In [52]:
# Load PDF document and split text into chunks

# Function to load PDF and extract text
def load_doc(file_path: str) -> str:
    doc = PdfReader(file_path)
    text = ""
    for page in doc.pages:
        text += page.extract_text()
    return text

# Load the PDF
file_path = "Prompt Engineering Notes.pdf"
raw_text = load_doc(file_path)

# Function to chunk text
def chunk_text(text: str, chunk_size: int = 1000) -> List[str]:
    return [text[i:i + chunk_size] for i in range(0, len(text), chunk_size)]

# Split text into chunks
chunks = chunk_text(raw_text)
print(f"Number of chunks created: {len(chunks)}")

Number of chunks created: 2


In [53]:
# Embed text chunks and store them in ChromaDB

# Create or get collection with custom embedding function
collection = client.get_or_create_collection(name="prompt_engineering_data")

# Embed and store chunks
for idx, chunk in enumerate(chunks):
    embedding = gemini_embedding_function([chunk])  # Returns [[-0.0038651999, ...]]
    collection.add(
        embeddings=embedding,  # Pass the embedding list directly
        documents=[chunk],
        ids=[f"chunk_{idx}"]
    )

print("Chunks embedded and stored.")

Embeddings type: <class 'dict'>, Content: {'embedding': [[-0.0038651999, -0.057339083, -0.013568151, 0.0008058864, 0.04618785, 0.05861834, 0.0324567, -0.011731126, 0.031681046, 0.039041698, 0.026550077, 0.018093457, -0.010437395, -0.012460071, -0.019655965, -0.056106415, 0.019210948, 0.036232214, 0.0029842732, -0.038603187, -0.0120304, -0.036876503, 0.021129863, -0.018699674, 0.01252138, 0.011008919, 0.009497376, -0.08484459, -0.03287711, 0.018966438, -0.014902838, 0.041731533, -0.020397991, 0.032543097, -0.017294047, -0.017387578, 0.03352678, 0.0028630437, 0.01831637, 0.029306646, -0.027892793, -0.0059806583, -0.011083477, -0.008029205, 0.0043557244, -0.030003915, -0.013227758, 0.04225345, 0.013577571, -0.047630258, 0.020505244, 0.016266547, 0.04085291, 0.002855867, -0.013012458, -0.018074108, 0.009592166, -0.0088652335, 0.0038918382, 0.03425106, -0.0036657054, 0.0012819235, -0.03239932, -0.0114647485, -0.014412447, -0.059433434, -0.031851605, 0.028379824, 0.06635605, -0.004074331, 0.

# Step2: Retrieval

In [54]:
# Implement and test retrieval of relevant chunks based on a query

# Function to retrieve relevant chunks
def retrieve_chunks(query: str, n_results: int = 3) -> List[str]:
    query_embedding = gemini_embedding_function([query])  # Returns [[...]]
    print(f"query_embedding type: {type(query_embedding)}, content: {query_embedding}")  # Debug print
    results = collection.query(query_embeddings=query_embedding, n_results=n_results)  # Use the list directly
    return results['documents'][0]

# Define query
query = "What is prompt engineering, also let me know the fundamental principles for the same"
retrieved_docs = retrieve_chunks(query)
print("Retrieved documents:", retrieved_docs)

Embeddings type: <class 'dict'>, Content: {'embedding': [[0.05979032, -0.005721661, -0.04536935, 0.0010204975, 0.044384606, 0.036143757, 0.035445698, -0.037998218, -0.0006648548, 0.07056057, 0.00039240002, 0.018596409, -0.0383092, 0.001544025, -0.014792006, -0.051335014, 0.022678455, 0.014955542, 0.04044032, -0.0823621, -0.008511623, -0.023352385, -0.014367334, -0.010154496, 0.013561452, -0.0147012975, -0.012276829, -0.06841165, 0.0010663831, 0.01805124, -0.06167818, 0.037191868, -0.0349311, -0.02301449, 0.008458694, -0.038687933, 0.012698167, -0.0008894162, -0.0008882697, 0.03955066, -0.010698935, 0.015840298, -0.042327087, -0.0035094353, 0.021065736, -0.010070455, -0.03814773, 0.07252117, 0.036233913, -0.0456164, 0.019769887, 0.008234902, 0.057150986, -0.035741508, -0.01426024, -0.08970876, 0.01430669, -0.045344915, 0.016355572, 0.037037935, -0.038524467, -0.014640292, 0.04642018, -0.017988479, -0.022767993, -0.059890445, -0.011453202, 0.041233134, 0.050100867, 0.024012843, 0.0246923

# Step3: Augmentation

In [55]:
# Combine retrieved documents into a single context
def augment_context(query: str, retrieved_docs: List[str]) -> str:
    context = "Based on the following information, answer the query:\n\n"
    context += "\n".join(retrieved_docs)
    context += f"\n\nQuery: {query}"
    return context

# Use the retrieved documents from the previous step
query = "What is prompt engineering, also let me know the fundamental principles for the same"
augmented_context = augment_context(query, retrieved_docs)
print("Augmented context:\n", augmented_context)

Augmented context:
 Based on the following information, answer the query:

Prompt Engineering Notes 
Prompt engineering is the art and science of designing effective input prompts to elicit desired 
responses from AI models, such as large language models (LLMs) or generative AI systems. As 
AI becomes integral to tasks like content creation, coding, and data analysis, the quality of 
prompts directly impacts the usefulness and accuracy of the output. Prompt engineering involves 
crafting clear, specific, and context-rich instructions to guide the model, ensuring it understands 
the task and delivers relevant results. 
One key principle is clarity. Vague prompts, such as “Tell me about AI,” can lead to broad or off-
topic responses. Instead, a well-engineered prompt like “Explain the role of transformers in 
modern AI models, focusing on their attention mechanism, in 200 words” provides direction and 
constraints. Context provision is another principle; including background information,

# Step4: Generation

In [56]:
# Generate a response using the Gemini generative model
def generate_response(context: str) -> str:
    try:
        model = genai.GenerativeModel('gemini-1.5-flash')  
        response = model.generate_content(context)
        return response.text
    except Exception as e:
        return f"Error: {str(e)}"

# Generate the response using the augmented context from Cell 6
generated_response = generate_response(augmented_context)
print("Generated response:\n", generated_response)

Generated response:
 Prompt engineering is the art and science of designing effective input prompts to get desired responses from AI models like large language models (LLMs) or generative AI systems.  Its importance stems from the fact that the quality of prompts directly affects the usefulness and accuracy of the AI's output in tasks such as content creation, coding, and data analysis.

Fundamental principles of prompt engineering include:

* **Clarity:**  Vague prompts lead to unhelpful answers.  Clear, specific instructions are crucial.
* **Context Provision:** Providing background information or the user's expertise level helps the model tailor its response appropriately.
* **Iteration:**  Initial prompts often need refinement.  Experimentation and adjusting the prompt based on the results are key.

