# ==============================================================================
# Groq RAG Pipeline
#
# Description:
# This script implements a complete Retrieval-Augmented Generation (RAG) pipeline.
# It uses the Groq API for ultra-fast language model inference and a local,
# open-source SentenceTransformer model for creating text embeddings.
#
# Pipeline Steps:
# 1.  Load Data: Extracts text from a specified PDF file.
# 2.  Chunk Data: Splits the extracted text into smaller, overlapping chunks.
# 3.  Embed Chunks: Converts text chunks into numerical vectors (embeddings).
# 4.  Semantic Search: Finds the most relevant chunks based on a user query.
# 5.  Generate Response: Sends the query and relevant chunks to a Groq LLM
#     to generate a context-aware answer.
# 6.  Evaluate Response: Uses a second Groq LLM call to score the answer
#     against a predefined ideal response.
# ==============================================================================

In [1]:
# --- 1. Environment Setup and Library Imports ---
import os
import json
import numpy as np
import fitz  # PyMuPDF library
from dotenv import load_dotenv
from groq import Groq
from sentence_transformers import SentenceTransformer

def initialize_clients():
    """Loads environment variables and initializes API clients and models."""
    load_dotenv()
    
    # Initialize Groq Client for LLM inference
    try:
        groq_api_key = os.getenv("GROQ_API_KEY")
        if not groq_api_key:
            raise ValueError("GROQ_API_KEY not found in the .env file.")
        client = Groq(api_key=groq_api_key)
        print("✅ Groq client initialized successfully.")
    except Exception as e:
        print(f"🔥 Error initializing Groq client: {e}")
        return None, None

    # Load a local SentenceTransformer model for creating embeddings
    try:
        # 'all-MiniLM-L6-v2' is a great default for its balance of speed and performance.
        model = SentenceTransformer("all-MiniLM-L6-v2")
        print("✅ SentenceTransformer embedding model loaded successfully.")
    except Exception as e:
        print(f"🔥 Error loading embedding model: {e}")
        return client, None
        
    return client, model

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# --- 2. Data Processing Functions ---

def extract_text_from_pdf(pdf_path: str) -> str | None:
    """Extracts all text content from a PDF file."""
    if not os.path.exists(pdf_path):
        print(f"🔥 Error: PDF file not found at '{pdf_path}'")
        return None
    try:
        doc = fitz.open(pdf_path)
        all_text = "".join(page.get_text() for page in doc)
        doc.close()
        return all_text
    except Exception as e:
        print(f"🔥 Error reading PDF file: {e}")
        return None

def chunk_text(text: str, chunk_size: int = 1000, overlap: int = 200) -> list[str]:
    """Splits a long text into smaller, overlapping chunks."""
    if not text:
        return []
    return [text[i:i + chunk_size] for i in range(0, len(text), chunk_size - overlap)]

In [3]:
# --- 3. Embedding and Search Functions ---

def create_embeddings(chunks: list[str], embedding_model) -> np.ndarray | None:
    """Creates numerical vector embeddings for a list of text chunks."""
    if not chunks or embedding_model is None:
        return None
    try:
        embeddings = embedding_model.encode(chunks, convert_to_numpy=True, show_progress_bar=True)
        return embeddings
    except Exception as e:
        print(f"🔥 Error creating embeddings: {e}")
        return None

def semantic_search(query: str, chunks: list[str], embeddings: np.ndarray, embedding_model, k: int = 3) -> list[str]:
    """Finds the top 'k' most relevant chunks for a given query."""
    if embeddings is None or embedding_model is None:
        return []
    
    # 1. Create an embedding for the user's query.
    query_embedding = embedding_model.encode([query], convert_to_numpy=True)[0]
    
    # 2. Calculate cosine similarity between the query and all chunk embeddings.
    # The formula is: (A ⋅ B) / (||A|| * ||B||)
    dot_products = np.dot(embeddings, query_embedding)
    norm_products = np.linalg.norm(embeddings, axis=1) * np.linalg.norm(query_embedding)
    similarities = dot_products / norm_products
    
    # 3. Get the indices of the top 'k' most similar chunks.
    top_k_indices = np.argsort(similarities)[-k:][::-1]
    
    # 4. Return the corresponding chunks.
    return [chunks[i] for i in top_k_indices]

In [4]:
# --- 4. LLM Generation and Evaluation Functions ---

def generate_groq_response(client, system_prompt: str, user_message: str, model: str = "llama3-8b-8192") -> str:
    """Generates a response from the Groq API based on a prompt."""
    if client is None:
        return "🔥 Error: Groq client is not initialized."
    try:
        response = client.chat.completions.create(
            model=model,
            temperature=0.2,  # Lower temperature for more factual, deterministic answers
            messages=[
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": user_message},
            ],
        )
        return response.choices[0].message.content
    except Exception as e:
        return f"🔥 Error generating response from Groq: {e}"

In [8]:
# --- 5. Main Pipeline Execution ---

def main():
    """Main function to orchestrate the entire RAG pipeline."""
    print("--- 🚀 Starting RAG Pipeline with Groq and Local Embeddings ---")
    
    # Initialize clients and models
    groq_client, embedding_model = initialize_clients()
    if not groq_client or not embedding_model:
        print("--- 🛑 Pipeline halted due to initialization errors. ---")
        return

    # Part 1: Data Ingestion and Chunking
    pdf_path = "data/AI_information.pdf"
    extracted_text = extract_text_from_pdf(pdf_path)
    if not extracted_text:
        print("--- 🛑 Pipeline halted because text could not be extracted. ---")
        return
        
    text_chunks = chunk_text(extracted_text)
    print(f"📄 Text from '{pdf_path}' extracted and split into {len(text_chunks)} chunks.")

    # Part 2: Embedding Creation
    chunk_embeddings = create_embeddings(text_chunks, embedding_model)
    if chunk_embeddings is None:
        print("--- 🛑 Pipeline halted because embeddings could not be created. ---")
        return
    print(f"✅ Embeddings created for all chunks with shape: {chunk_embeddings.shape}")

    # Part 3: Load Query and Perform Semantic Search
    try:
        with open('data/val.json') as f:
            val_data = json.load(f)[0]
        query = val_data['question']
        ideal_answer = val_data['ideal_answer']
        print(f"\n❓ Loaded Query: {query}")
    except Exception as e:
        print(f"🔥 Could not load 'data/val.json': {e}. Using a default query.")
        query = "What is Explainable AI and why is it important?"
        ideal_answer = "Explainable AI (XAI) makes AI decisions understandable to humans, which is crucial for trust, fairness, and accountability."

    top_chunks = semantic_search(query, text_chunks, chunk_embeddings, embedding_model, k=3)
    print("✅ Semantic search complete. Retrieved top relevant context chunks.")

    # Part 4: Generate Response using RAG
    print("\n💬 Generating response from Groq based on retrieved context...")
    rag_system_prompt = "You are a helpful AI assistant. Answer the user's question based ONLY on the provided context. If the context does not contain the answer, state that clearly."
    context_str = "\n\n---\n\n".join(top_chunks)
    user_prompt_for_rag = f"Context:\n{context_str}\n\nQuestion: {query}"
    
    ai_response = generate_groq_response(groq_client, rag_system_prompt, user_prompt_for_rag)
    print("\n💡 AI Response:\n", ai_response)

    # Part 5: Evaluate the Generated Response
    print("\n\n--- ⚖️ Evaluating AI Response ---")
    eval_system_prompt = "You are an expert evaluation system. Compare the 'AI Response' to the 'True Response' based on the user's query. Score the AI response on a scale of 0, 0.5, or 1. '1' for a correct and complete answer, '0.5' for a partially correct answer, and '0' for an incorrect answer. Provide only the score and a brief justification."
    eval_user_prompt = f"User Query: {query}\n\nTrue Response: {ideal_answer}\n\nAI Response: {ai_response}"
    
    evaluation_response = generate_groq_response(groq_client, eval_system_prompt, eval_user_prompt)
    print("\n⭐ Evaluation Result:\n", evaluation_response)
    
    print("\n--- ✅ Pipeline Finished Successfully ---")

if __name__ == "__main__":
    main()

--- 🚀 Starting RAG Pipeline with Groq and Local Embeddings ---
✅ Groq client initialized successfully.
✅ SentenceTransformer embedding model loaded successfully.
📄 Text from 'data/AI_information.pdf' extracted and split into 42 chunks.


Batches: 100%|██████████| 2/2 [00:00<00:00,  2.75it/s]


✅ Embeddings created for all chunks with shape: (42, 384)

❓ Loaded Query: What is 'Explainable AI' and why is it considered important?
✅ Semantic search complete. Retrieved top relevant context chunks.

💬 Generating response from Groq based on retrieved context...

💡 AI Response:
 According to the provided context, Explainable AI (XAI) is a technique aimed at making AI decisions more understandable, enabling users to assess their fairness and accuracy. It is considered important because it is essential for building trust in AI systems.


--- ⚖️ Evaluating AI Response ---

⭐ Evaluation Result:
 Score: 0.5

Justification: The AI response is partially correct. It correctly identifies the goal of Explainable AI (XAI) as making AI decisions more understandable and enabling users to assess their fairness and accuracy. However, it does not mention the importance of XAI for building trust, accountability, and ensuring fairness in AI systems, which is a crucial aspect of the true response.

--