# STEP1 -- LOAD A PDF FILE AND FETCH DATA

In [1]:
# https://ftp.dot.state.tx.us/pub/txdot-info/cst/pmis2004.pdf

import os, requests
from langchain_community.document_loaders import PyPDFLoader

PDF_URL = "https://ftp.dot.state.tx.us/pub/txdot-info/cst/pmis2004.pdf"
PDF_FILENAME = "Sample_PDF.pdf"
OUTPUT_TEXT_FILENAME = "pmis_extracted_text.txt"

def download_pdf(url, filename):
    print(f"Attempting to download PDF from: {url}")
    try:
        response = requests.get(url, stream=True)
        response.raise_for_status() # Raise an HTTPError for bad responses (4xx or 5xx)
        with open(filename, 'wb') as pdf_file:
            for chunk in response.iter_content(chunk_size=8192):
                pdf_file.write(chunk)
        print(f"Successfully downloaded {filename}")
    except requests.exceptions.RequestException as e:
        print(f"Error downloading PDF: {e}")
        return False
    return True

def extract_text_from_pdf_with_pypdfloader(pdf_path, output_path):
    full_text_content = []
    try:
        loader = PyPDFLoader(pdf_path)
        # load_and_split() splits by page by default
        # If you wanted to do other splits, you'd use load() and then a text splitter
        documents = loader.load_and_split()

        num_pages = len(documents)
        print(f"Loaded PDF: {pdf_path} with {num_pages} pages using PyPDFLoader.")

        if num_pages < 200:
            print(f"Warning: PDF has {num_pages} pages, which is less than the recommended 200 pages.")
            print("Consider using a larger PDF for better demonstration of chunking and indexing.")

        for i, doc in enumerate(documents):
            full_text_content.append(doc.page_content)
            if (i + 1) % 50 == 0 or (i + 1) == num_pages:
                print(f"Processed page {i + 1}/{num_pages}...")

        # Save the extracted text to a file
        with open(output_path, 'w', encoding='utf-8') as f:
            f.write("\n".join(full_text_content))

        print(f"Text extraction complete. Total pages processed: {num_pages}")
        print(f"Extracted text saved to: {output_path}")
        return True
    except FileNotFoundError:
        print(f"Error: PDF file not found at {pdf_path}")
        return False
    except Exception as e:
        print(f"An error occurred during PDF text extraction with PyPDFLoader: {e}")
        return False

if __name__ == "__main__":
    try:
        import requests
        from langchain_community.document_loaders import PyPDFLoader
    except ImportError as e:
        print(f"Missing required library: {e}. Please install using:")
        print("pip install requests langchain-community pypdf")
        exit()

    if not os.path.exists(PDF_FILENAME):
        print(f"PDF file '{PDF_FILENAME}' not found locally. Attempting to download...")
        if not download_pdf(PDF_URL, PDF_FILENAME):
            print(f"Please ensure you have a large PDF file named {PDF_FILENAME} in the same directory.")
            exit()

    # Extract text from the downloaded/existing PDF using PyPDFLoader
    print(f"\nStarting text extraction from {PDF_FILENAME} using PyPDFLoader...")
    if extract_text_from_pdf_with_pypdfloader(PDF_FILENAME, OUTPUT_TEXT_FILENAME):
        print("\nStep 1 (PDF Data Fetch) completed successfully using PyPDFLoader!")
        print(f"You can now find the extracted text in '{OUTPUT_TEXT_FILENAME}'.")
    else:
        print("\nStep 1 (PDF Data Fetch) failed.")



Starting text extraction from Sample_PDF.pdf using PyPDFLoader...
Loaded PDF: Sample_PDF.pdf with 249 pages using PyPDFLoader.
Processed page 50/249...
Processed page 100/249...
Processed page 150/249...
Processed page 200/249...
Processed page 249/249...
Text extraction complete. Total pages processed: 249
Extracted text saved to: pmis_extracted_text.txt

Step 1 (PDF Data Fetch) completed successfully using PyPDFLoader!
You can now find the extracted text in 'pmis_extracted_text.txt'.


# STEP2 -- SEMANTIC CHUNKING AND EMBEDDING

In [2]:
import os, uuid, json
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings

from langchain.docstore.document import Document

# --- Configuration (using constants from previous step's output) --- #

OUTPUT_TEXT_FILENAME = "pmis_extracted_text.txt"
EMBEDDING_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
CHUNK_SIZE = 1000
CHUNK_OVERLAP = 200
EMBEDDINGS_OUTPUT_FILE = "chunks_with_embeddings.json"

def chunk_text(text_content: str) -> list[Document]:
    print(f"Starting chunking with chunk_size={CHUNK_SIZE}, chunk_overlap={CHUNK_OVERLAP}")
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=CHUNK_SIZE,
        chunk_overlap=CHUNK_OVERLAP,
        length_function=len, 
        add_start_index=True,
    )
    # The splitter works on a list of strings, so wrap the single text content in a list
    chunks = text_splitter.create_documents([text_content])
    print(f"Text chunking complete. Created {len(chunks)} chunks.")
    return chunks


def embed_chunks(chunks: list[Document], model_name: str) -> list[Document]:
    print(f"Loading embedding model: {model_name}")
    try:
        embeddings = HuggingFaceEmbeddings(model_name=model_name, model_kwargs={'device': 'cpu'})
        print("Embedding model loaded successfully.")
    except Exception as e:
        print(f"Error loading embedding model: {e}")
        print("Please ensure 'sentence-transformers' library is installed: pip install sentence-transformers")
        print("Also check if the model name is correct and accessible.")
        return []

    print(f"Generating embeddings for {len(chunks)} chunks...")
    # Generate embeddings for each chunk's page_content
    # The embeddings object's embed_documents method takes a list of strings
    chunk_texts = [doc.page_content for doc in chunks]
    chunk_embeddings = embeddings.embed_documents(chunk_texts)

    for i, doc in enumerate(chunks):
        doc.metadata["embedding"] = chunk_embeddings[i]
        doc.metadata["chunk_id"] = str(uuid.uuid4())

    print("Embeddings generation complete.")
    return chunks

if __name__ == "__main__":
    try:
        from langchain.text_splitter import RecursiveCharacterTextSplitter
        from langchain_community.embeddings import HuggingFaceEmbeddings
        from langchain.docstore.document import Document
        import json
        import uuid
    except ImportError as e:
        print(f"Missing required library: {e}. Please install using:")
        print("pip install langchain-text-splitters langchain-community sentence-transformers")
        exit()

    if not os.path.exists(OUTPUT_TEXT_FILENAME):
        print(f"Error: Extracted text file '{OUTPUT_TEXT_FILENAME}' not found.")
        print("Please run the PDF extraction step first to create this file.")
        exit()

    with open(OUTPUT_TEXT_FILENAME, 'r', encoding='utf-8') as f:
        text_content = f.read()

    # Perform semantic chunking
    print("\n--- Starting Semantic Chunking ---")
    processed_chunks = chunk_text(text_content)
    if not processed_chunks:
        print("Chunking failed. Exiting.")
        exit()

    # Generate embeddings for the chunks
    print("\n--- Starting Embedding Generation ---")
    chunks_with_embeddings = embed_chunks(processed_chunks, EMBEDDING_MODEL_NAME)
    if not chunks_with_embeddings:
        print("Embedding generation failed. Exiting.")
        exit()

    # Save chunks with embeddings (optional, but good for inspection/debugging)
    # Saving them as a list of dictionaries.
    serializable_data = []
    for doc in chunks_with_embeddings:
        serializable_data.append({
            "page_content": doc.page_content,
            "metadata": doc.metadata,
        })

    try:
        with open(EMBEDDINGS_OUTPUT_FILE, 'w', encoding='utf-8') as f:
            json.dump(serializable_data, f, indent=2)
        print(f"\nChunks and embeddings saved to '{EMBEDDINGS_OUTPUT_FILE}' for inspection.")
    except Exception as e:
        print(f"Error saving chunks with embeddings to file: {e}")


    print("\nStep 2 (Semantic Chunking and Embedding) completed successfully!")
    print(f"You now have {len(chunks_with_embeddings)} chunks, each with its associated embedding.")


--- Starting Semantic Chunking ---
Starting chunking with chunk_size=1000, chunk_overlap=200
Text chunking complete. Created 336 chunks.

--- Starting Embedding Generation ---
Loading embedding model: sentence-transformers/all-MiniLM-L6-v2


  embeddings = HuggingFaceEmbeddings(model_name=model_name, model_kwargs={'device': 'cpu'})
  from .autonotebook import tqdm as notebook_tqdm


Embedding model loaded successfully.
Generating embeddings for 336 chunks...
Embeddings generation complete.

Chunks and embeddings saved to 'chunks_with_embeddings.json' for inspection.

Step 2 (Semantic Chunking and Embedding) completed successfully!
You now have 336 chunks, each with its associated embedding.


# STEP 3 - MILVUS INDEX CREATION & DATA INGESTION

In [1]:
import os
from pymilvus import (
    connections,
    utility,
    FieldSchema, CollectionSchema, DataType,
    Collection,
)
from sentence_transformers import SentenceTransformer
from typing import List, Dict, Any

# --- Configuration for Milvus Connection ---
MILVUS_HOST = "localhost"
MILVUS_PORT = "19530" 

COLLECTION_NAME = "document_chunks" 
VECTOR_DIMENSION = 384

EMBEDDINGS_JSON_FILE_PATH = "chunks_with_embeddings.json"
EMBEDDING_MODEL_NAME = "all-MiniLM-L6-v2"
embedding_model = None

# Define the schema for our document chunks
# We'll use a simple schema with a primary key, text content, and the embedding vector.
# For a full RAG pipeline, you should also include source metadata.
fields = [
    FieldSchema(name="pk", dtype=DataType.INT64, is_primary=True, auto_id=True),
    FieldSchema(name="page_content", dtype=DataType.VARCHAR, max_length=65535), # Store the text chunk
    FieldSchema(name="embedding", dtype=DataType.FLOAT_VECTOR, dim=VECTOR_DIMENSION)
]
schema = CollectionSchema(fields, "Document chunks for RAG demo")


# --- Index Parameters for HNSW and IVF ---
# These parameters are crucial for how Milvus builds and searches the index.
# HNSW parameters:
#   M: Max number of outgoing connections for each node in the graph. Higher M means denser graph,
#      better recall, but slower build/larger index. Common range 4-64.
#   efConstruction: Controls the size of the dynamic list during graph construction.
#                   Higher efConstruction means more accurate graph, but slower build.
#                   Common range 100-500.
HNSW_INDEX_PARAMS = {
    "metric_type": "COSINE", # Use cosine similarity for text embeddings
    "index_type": "HNSW",
    "params": {"M": 16, "efConstruction": 200}
}

# IVF parameters:
#   nlist: Number of clusters (quantizers). Higher nlist means smaller clusters,
#          potentially faster search (fewer comparisons per cluster), but more clusters to manage.
#          A common heuristic is sqrt(N) to 8*sqrt(N), where N is number of vectors.
#          For our PDF, it's likely hundreds to low thousands of chunks, so 1024 is a reasonable start.
#   nprobe: Number of clusters to search during query. Higher nprobe means better recall,
#           but slower search. Should be <= nlist.
IVF_INDEX_PARAMS = {
    "metric_type": "COSINE",
    "index_type": "IVF_FLAT", # IVF_FLAT is a common IVF variant
    "params": {"nlist": 1024}
}

# FLAT index parameters (brute-force search)
FLAT_INDEX_PARAMS = {
    "metric_type": "COSINE",
    "index_type": "FLAT", # No specific parameters for FLAT as it's a direct comparison
    "params": {}
}

def connect_to_milvus():
    try:
        print(f"Attempting to connect to Milvus at {MILVUS_HOST}:{MILVUS_PORT}...")
        connections.connect("default", host=MILVUS_HOST, port=MILVUS_PORT)
        utility.list_collections() # Verify connection
        print("Milvus connection successful!")
        return True
    except Exception as e:
        print(f"Milvus connection failed: {e}")
        print("Please ensure your Milvus Docker container is running (run 'docker ps').")
        return False

def create_milvus_collection_with_index(collection_name_suffix: str, index_params: Dict[str, Any]) -> Collection:
    full_collection_name = f"{COLLECTION_NAME}_{collection_name_suffix}"

    if utility.has_collection(full_collection_name):
        print(f"\nCollection '{full_collection_name}' already exists. Dropping and recreating...")
        utility.drop_collection(full_collection_name)

    print(f"\nCreating collection '{full_collection_name}' with schema...")
    collection = Collection(full_collection_name, schema)
    print(f"Collection '{full_collection_name}' created.")

    # Create the index
    print(f"Creating {index_params['index_type']} index on 'embedding' field for '{full_collection_name}'...")
    index = {
        "field_name": "embedding",
        "index_type": index_params["index_type"],
        "metric_type": index_params["metric_type"],
        "params": index_params["params"]
    }
    collection.create_index("embedding", index_params=index)
    print(f"Index '{index_params['index_type']}' created successfully on '{full_collection_name}'.")

    return collection

def load_chunks_and_embeddings_from_json(file_path: str) -> List[Dict[str, Any]]:
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            data = json.load(f)
        # Ensure we extract 'page_content' and 'embedding' from the loaded structure
        # The 'semantic_chunking_embedding.py' script saves it with 'page_content' and 'metadata'
        processed_data = [
            {"chunk": entry["page_content"], "embedding": entry["metadata"]["embedding"]}
            for entry in data
        ]
        print(f"Successfully loaded {len(processed_data)} chunk-embedding pairs from {file_path}")
        return processed_data
    except FileNotFoundError:
        print(f"Error: JSON file not found at {file_path}. Please ensure it exists and the path is correct.")
        return []
    except json.JSONDecodeError as e:
        print(f"Error decoding JSON from {file_path}: {e}")
        return []
    except KeyError as e:
        print(f"Error: Missing expected key in JSON data (e.g., 'page_content' or 'embedding' in 'metadata'). Check format. Error: {e}")
        return []
    except Exception as e:
        print(f"An unexpected error occurred while loading JSON from {file_path}: {e}")
        return []

def insert_data_into_milvus(collection: Collection, data_entries: List[Dict[str, Any]]):
    if not collection:
        print(f"Skipping data insertion for {collection.name if collection else 'unknown'} collection due to missing object.")
        return

    print(f"Inserting {len(data_entries)} entities into collection '{collection.name}'...")

    # Prepare data for insertion: Milvus requires data as columns
    chunks = [entry["chunk"] for entry in data_entries]
    embeddings = [entry["embedding"] for entry in data_entries]

    data = [
        chunks,      # corresponds to 'page_content' field in schema
        embeddings   # corresponds to 'embedding' field in schema
    ]

    try:
        insert_result = collection.insert(data)
        print(f"Successfully inserted data into '{collection.name}'. Insert count: {insert_result.insert_count}")
        # Always call flush() after inserts to ensure data is written and searchable
        collection.flush()
        # Refresh collection info to get updated entity count
        collection.load() # Load collection to query num_entities
        print(f"Collection '{collection.name}' flushed. Entity count: {collection.num_entities}")
    except Exception as e:
        print(f"Error inserting data into '{collection.name}': {e}")
    finally:
        # Ensure collection is loaded after insertion for immediate searchability
        try:
            if not collection.is_loaded:
                collection.load()
                print(f"Collection '{collection.name}' loaded into memory.")
        except Exception as e:
            print(f"Error ensuring collection '{collection.name}' is loaded: {e}")

if __name__ == "__main__":
    try:
        import pymilvus
        import json
        # SentenceTransformer is imported but not actively used for generation in this script,
        # but kept for consistency and future retrieval steps that use this model.
        from sentence_transformers import SentenceTransformer
    except ImportError as e:
        print(f"Missing required library: {e}. Please install using:")
        print("pip install pymilvus sentence-transformers")
        exit()

    # Connect to Milvus
    if not connect_to_milvus():
        exit()

    # Create Milvus Collections with Indexes
    print("\n--- Creating Milvus Collections and Indexes ---")
    hnsw_collection = create_milvus_collection_with_index("hnsw", HNSW_INDEX_PARAMS)
    ivf_collection = create_milvus_collection_with_index("ivf", IVF_INDEX_PARAMS)
    flat_collection = create_milvus_collection_with_index("flat", FLAT_INDEX_PARAMS) # For learning/comparison

    if not all([hnsw_collection, ivf_collection, flat_collection]):
        print("One or more Milvus collections could not be created. Exiting.")
        exit()

    # Load chunks and embeddings from JSON
    print("\n--- Loading Data from JSON ---")
    document_data = load_chunks_and_embeddings_from_json(EMBEDDINGS_JSON_FILE_PATH)
    if not document_data:
        print("No document data loaded. Exiting.")
        exit()

    # Ingest Data into all created Milvus Collections
    print("\n--- Ingesting Data into Milvus Collections ---")
    insert_data_into_milvus(hnsw_collection, document_data)
    insert_data_into_milvus(ivf_collection, document_data)
    insert_data_into_milvus(flat_collection, document_data)

    print("\nStep 3 (Milvus Indexing and Data Ingestion) completed successfully!")
    print(f"Your document chunks and embeddings are now indexed in Milvus in collections:")
    print(f"- '{COLLECTION_NAME}_hnsw'")
    print(f"- '{COLLECTION_NAME}_ivf'")
    print(f"- '{COLLECTION_NAME}_flat'")
    print("\nNext, we will proceed with building the retrieval pipeline using Milvus for vector search (Step 4).")

  from .autonotebook import tqdm as notebook_tqdm


Attempting to connect to Milvus at localhost:19530...
Milvus connection successful!

--- Creating Milvus Collections and Indexes ---

Creating collection 'document_chunks_hnsw' with schema...
Collection 'document_chunks_hnsw' created.
Creating HNSW index on 'embedding' field for 'document_chunks_hnsw'...
Index 'HNSW' created successfully on 'document_chunks_hnsw'.

Creating collection 'document_chunks_ivf' with schema...
Collection 'document_chunks_ivf' created.
Creating IVF_FLAT index on 'embedding' field for 'document_chunks_ivf'...
Index 'IVF_FLAT' created successfully on 'document_chunks_ivf'.

Creating collection 'document_chunks_flat' with schema...
Collection 'document_chunks_flat' created.
Creating FLAT index on 'embedding' field for 'document_chunks_flat'...
Index 'FLAT' created successfully on 'document_chunks_flat'.

--- Loading Data from JSON ---
Successfully loaded 336 chunk-embedding pairs from chunks_with_embeddings.json

--- Ingesting Data into Milvus Collections ---
In

# STEP 4 -- MILVUS RETRIEVAL PIPELINE

In [2]:
import os
import json
from pymilvus import connections, utility, Collection
from sentence_transformers import SentenceTransformer
from typing import List, Dict, Any
from huggingface_hub import snapshot_download

# --- Configuration for Milvus Connection ---
MILVUS_HOST = "localhost"
MILVUS_PORT = "19530"

COLLECTION_NAME = "document_chunks"

# --- Embedding Model Configuration ---
EMBEDDING_MODEL_NAME = "all-MiniLM-L6-v2"
VECTOR_DIMENSION = 384 

embedding_model = None 

def connect_to_milvus():
    try:
        print(f"Attempting to connect to Milvus at {MILVUS_HOST}:{MILVUS_PORT}...")
        connections.connect("default", host=MILVUS_HOST, port=MILVUS_PORT)
        # Verify connection by listing collections (or any other utility command)
        utility.list_collections()
        print("Milvus connection successful!")
        return True
    except Exception as e:
        print(f"Milvus connection failed: {e}")
        print("Please ensure your Milvus Docker container is running (run 'docker ps').")
        return False

def get_embedding_model():
    """Loads and returns the SentenceTransformer embedding model."""
    global embedding_model
    if embedding_model is None:
        # Define the local directory where the model will be downloaded/loaded from
        # This will be a subfolder within your working directory
        local_model_dir = os.path.join(os.getcwd(), "downloaded_embedding_model", EMBEDDING_MODEL_NAME.replace("/", "--"))
        os.makedirs(local_model_dir, exist_ok=True) # Ensure the directory exists
        print(f"Attempting to load model from or download to: {local_model_dir}")

        try:
            config_path = os.path.join(local_model_dir, "config.json")
            
            if not os.path.exists(config_path) or not os.path.exists(os.path.join(local_model_dir, "pytorch_model.bin")):
                print(f"Model files not found or incomplete in '{local_model_dir}'. Initiating robust download...")
                snapshot_download(
                    repo_id=f"sentence-transformers/{EMBEDDING_MODEL_NAME}",
                    local_dir=local_model_dir,
                    local_dir_use_symlinks=False, 
                    resume_download=True 
                )
                print(f"Model successfully downloaded to: {local_model_dir}")
            else:
                print(f"Model files found in '{local_model_dir}'. Loading from local directory.")

            # Now, load the model directly from the local directory
            embedding_model = SentenceTransformer(local_model_dir, model_kwargs={'device': 'cpu'})
            print("Embedding model loaded successfully from local path.")

        except Exception as e:
            print(f"Error loading embedding model {EMBEDDING_MODEL_NAME} from '{local_model_dir}': {e}")
            print("Please ensure you have an active internet connection for the initial download.")
            print("If issues persist, check permissions for the 'downloaded_embedding_model' directory.")
            embedding_model = None
    return embedding_model

def get_milvus_collection(collection_suffix: str) -> Collection:
    full_collection_name = f"{COLLECTION_NAME}_{collection_suffix}"
    if not utility.has_collection(full_collection_name):
        print(f"Error: Collection '{full_collection_name}' does not exist.")
        print("Please run milvus_full_setup_ingestion.py first to create and populate collections.")
        return None
    collection = Collection(full_collection_name)
    
    # Load the collection into memory for searching if not already loaded
    try:
        print(f"Loading collection '{collection.name}' into memory for search...")
        collection.load()
        print(f"Collection '{collection.name}' loaded.")
    except Exception as e:
        print(f"Error loading collection '{collection.name}': {e}")
        return None
            
    return collection

def retrieve_relevant_chunks(query_text: str, collection: Collection, top_k: int = 3) -> List[Dict[str, Any]]:
    if not collection:
        print(f"Collection '{collection.name if collection else 'unknown'}' not available or not loaded for search.")
        return []

    model = get_embedding_model()
    if not model:
        print("Embedding model not loaded. Cannot embed query.")
        return []

    print(f"\nEmbedding query: '{query_text}'...")
    query_embedding = model.encode(query_text).tolist() # Encode and convert to list

    # Milvus search parameters
    # The 'params' depend on the index type.
    # For HNSW, the important param is 'ef' (exploration factor). Higher ef means better recall, slower search.
    # For IVF_FLAT, the important param is 'nprobe' (number of clusters to search). Higher nprobe means better recall, slower search.
    # For FLAT, there are no specific search params other than metric_type.

    search_params = {
        "data": [query_embedding],
        "anns_field": "embedding",
        "limit": top_k,
        "output_fields": ["page_content"], # Fields to retrieve along with vector similarity results
        "expr": "pk > 0"
    }

    # Adjust search parameters based on index type
    if "hnsw" in collection.name:
        search_params["param"] = {"metric_type": "COSINE", "params": {"ef": 64}} # ef usually between top_k and efConstruction
    elif "ivf" in collection.name:
        search_params["param"] = {"metric_type": "COSINE", "params": {"nprobe": 10}} # nprobe should be <= nlist (1024)
    elif "flat" in collection.name:
        search_params["param"] = {"metric_type": "COSINE", "params": {}} # FLAT has no specific search params

    print(f"Searching collection '{collection.name}' with top_k={top_k}...")
    try:
        results = collection.search(**search_params)

        retrieved_chunks = []
        for hit in results[0]:
            retrieved_chunks.append({
                "page_content": hit.entity.get("page_content"),
                "distance": hit.distance
            })
        print(f"Retrieved {len(retrieved_chunks)} relevant chunks from '{collection.name}'.")
        return retrieved_chunks
    except Exception as e:
        print(f"Error during search in '{collection.name}': {e}")
        return []

if __name__ == "__main__":
    try:
        import pymilvus
        import json
        from sentence_transformers import SentenceTransformer
    except ImportError as e:
        print(f"Missing required library: {e}. Please install using:")
        print("pip install pymilvus sentence-transformers")
        exit()

    # Connect to Milvus
    if not connect_to_milvus():
        exit()

    # Get Milvus Collections
    print("\n--- Retrieving Milvus Collections ---")
    hnsw_collection = get_milvus_collection("hnsw")
    ivf_collection = get_milvus_collection("ivf")
    flat_collection = get_milvus_collection("flat")

    if not all([hnsw_collection, ivf_collection, flat_collection]):
        print("Not all required Milvus collections are available or loaded. Exiting.")
        exit()

    # Get Embedding Model (will download if not present)
    model = get_embedding_model()
    if not model:
        print("Failed to load embedding model. Exiting.")
        exit()

    # Interactive Search Loop
    print("\n--- Starting Interactive Retrieval ---")
    print("Enter 'exit' to quit.")

    while True:
        query = input("\nEnter your query: ")
        if query.lower() == 'exit':
            break

        print("\n--- Search Results (HNSW Index) ---")
        hnsw_results = retrieve_relevant_chunks(query, hnsw_collection, top_k=5)
        if hnsw_results:
            for i, chunk in enumerate(hnsw_results):
                print(f"HNSW Result {i+1} (Distance: {chunk['distance']:.4f}):")
                print(f"  {chunk['page_content'].strip()}")
        else:
            print("No results found from HNSW index.")

        print("\n--- Search Results (IVF_FLAT Index) ---")
        ivf_results = retrieve_relevant_chunks(query, ivf_collection, top_k=5)
        if ivf_results:
            for i, chunk in enumerate(ivf_results):
                print(f"IVF_FLAT Result {i+1} (Distance: {chunk['distance']:.4f}):")
                print(f"  {chunk['page_content'].strip()}")
        else:
            print("No results found from IVF_FLAT index.")
        
        print("\n--- Search Results (FLAT Index) ---")
        flat_results = retrieve_relevant_chunks(query, flat_collection, top_k=5)
        if flat_results:
            for i, chunk in enumerate(flat_results):
                print(f"FLAT Result {i+1} (Distance: {chunk['distance']:.4f}):")
                print(f"  {chunk['page_content'].strip()}")
        else:
            print("No results found from FLAT index.")


    print("\nStep 4 (Milvus Retrieval Pipeline) completed.")
    print("You can now see how different indexes perform for your queries.")


Attempting to connect to Milvus at localhost:19530...
Milvus connection successful!

--- Retrieving Milvus Collections ---
Loading collection 'document_chunks_hnsw' into memory for search...
Collection 'document_chunks_hnsw' loaded.
Loading collection 'document_chunks_ivf' into memory for search...
Collection 'document_chunks_ivf' loaded.
Loading collection 'document_chunks_flat' into memory for search...
Collection 'document_chunks_flat' loaded.
Attempting to load model from or download to: /Users/nitin/AgenticAI_Assignments/RAG_Assignment_1/downloaded_embedding_model/all-MiniLM-L6-v2
Model files not found or incomplete in '/Users/nitin/AgenticAI_Assignments/RAG_Assignment_1/downloaded_embedding_model/all-MiniLM-L6-v2'. Initiating robust download...


Fetching 30 files: 100%|██████████| 30/30 [00:09<00:00,  3.30it/s]


Model successfully downloaded to: /Users/nitin/AgenticAI_Assignments/RAG_Assignment_1/downloaded_embedding_model/all-MiniLM-L6-v2
Embedding model loaded successfully from local path.

--- Starting Interactive Retrieval ---
Enter 'exit' to quit.

--- Search Results (HNSW Index) ---

Embedding query: 'How are the condition of roads in general ?'...
Searching collection 'document_chunks_hnsw' with top_k=5...
Retrieved 5 relevant chunks from 'document_chunks_hnsw'.
HNSW Result 1 (Distance: 0.5817):
  person’s perception of pavement quality. 
The overall condition of Texas pavements got slightly worse in FY 2007 mainly because of 
increased distress on asphalt pavements.  Overall pavement distress got worse, but overall ride 
quality improved.  A prolonged drought that began in mid-FY 2005 and lasted through all of FY 
2006, rising material costs, increased competition for limited construction materials, and 
increased oilfield development traffic contributed to the decline in statewide pav

# STEP 5 -- Prompt Template and Generate output through LLM

In [3]:
import os
import json
from pymilvus import connections, utility, Collection
from sentence_transformers import SentenceTransformer
from typing import List, Dict, Any
from huggingface_hub import snapshot_download
import requests
from dotenv import load_dotenv

load_dotenv(".env")

# --- Configuration for Milvus Connection ---
MILVUS_HOST = "localhost"
MILVUS_PORT = "19530"

COLLECTION_NAME = "document_chunks"

# --- Embedding Model Configuration ---
EMBEDDING_MODEL_NAME = "all-MiniLM-L6-v2"
VECTOR_DIMENSION = 384
embedding_model = None

# --- Gemini API Configuration ---
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
GEMINI_API_URL = "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent"

def connect_to_milvus():
    try:
        print(f"Attempting to connect to Milvus at {MILVUS_HOST}:{MILVUS_PORT}...")
        connections.connect("default", host=MILVUS_HOST, port=MILVUS_PORT)
        utility.list_collections() # Verify connection
        print("Milvus connection successful!")
        return True
    except Exception as e:
        print(f"Milvus connection failed: {e}")
        print("Please ensure your Milvus Docker container is running (run 'docker ps').")
        return False

def get_embedding_model():
    global embedding_model
    if embedding_model is None:
        local_model_dir = os.path.join(os.getcwd(), "downloaded_embedding_model", EMBEDDING_MODEL_NAME.replace("/", "--"))
        os.makedirs(local_model_dir, exist_ok=True) # Ensure the directory exists
        print(f"Attempting to load model from or download to: {local_model_dir}")

        try:
            config_path = os.path.join(local_model_dir, "config.json")
            
            if not os.path.exists(config_path) or not os.path.exists(os.path.join(local_model_dir, "pytorch_model.bin")):
                print(f"Model files not found or incomplete in '{local_model_dir}'. Initiating robust download...")
                snapshot_download(
                    repo_id=f"sentence-transformers/{EMBEDDING_MODEL_NAME}",
                    local_dir=local_model_dir,
                    local_dir_use_symlinks=False,
                    resume_download=True
                )
                print(f"Model successfully downloaded to: {local_model_dir}")
            else:
                print(f"Model files found in '{local_model_dir}'. Loading from local directory.")

            embedding_model = SentenceTransformer(local_model_dir, model_kwargs={'device': 'cpu'})
            print("Embedding model loaded successfully from local path.")

        except Exception as e:
            print(f"Error loading embedding model {EMBEDDING_MODEL_NAME} from '{local_model_dir}': {e}")
            print("Please ensure you have an active internet connection for the initial download.")
            print("If issues persist, check permissions for the 'downloaded_embedding_model' directory.")
            embedding_model = None
    return embedding_model

def get_milvus_collection(collection_suffix: str) -> Collection:
    full_collection_name = f"{COLLECTION_NAME}_{collection_suffix}"
    if not utility.has_collection(full_collection_name):
        print(f"Error: Collection '{full_collection_name}' does not exist.")
        print("Please run milvus_full_setup_ingestion.py first to create and populate collections.")
        return None
    collection = Collection(full_collection_name)
    
    try:
        print(f"Loading collection '{collection.name}' into memory for search...")
        collection.load()
        print(f"Collection '{collection.name}' loaded.")
    except Exception as e:
        print(f"Error loading collection '{collection.name}': {e}")
        return None
            
    return collection

def retrieve_relevant_chunks(query_text: str, collection: Collection, top_k: int = 3) -> List[Dict[str, Any]]:
    if not collection:
        print(f"Collection '{collection.name if collection else 'unknown'}' not available for search.")
        return []

    model = get_embedding_model()
    if not model:
        print("Embedding model not loaded. Cannot embed query.")
        return []

    print(f"\nEmbedding query: '{query_text}'...")
    query_embedding = model.encode(query_text).tolist()

    search_params = {
        "data": [query_embedding],
        "anns_field": "embedding",
        "limit": top_k,
        "output_fields": ["page_content"],
        "expr": "pk > 0"
    }

    if "hnsw" in collection.name:
        search_params["param"] = {"metric_type": "COSINE", "params": {"ef": 64}}
    elif "ivf" in collection.name:
        search_params["param"] = {"metric_type": "COSINE", "params": {"nprobe": 10}}
    elif "flat" in collection.name:
        search_params["param"] = {"metric_type": "COSINE", "params": {}}

    print(f"Searching collection '{collection.name}' with top_k={top_k}...")
    try:
        results = collection.search(**search_params)

        retrieved_chunks = []
        for hit in results[0]:
            retrieved_chunks.append({
                "page_content": hit.entity.get("page_content"),
                "distance": hit.distance
            })
        print(f"Retrieved {len(retrieved_chunks)} relevant chunks from '{collection.name}'.")
        return retrieved_chunks
    except Exception as e:
        print(f"Error during search in '{collection.name}': {e}")
        return []

def generate_llm_response(query: str, context_chunks: List[str]) -> str:
    if not context_chunks:
        print("No context chunks provided for LLM. Generating response without context.")
        context_str = "No relevant context available."
    else:
        context_str = "\n".join([f"Chunk {i+1}: {chunk}" for i, chunk in enumerate(context_chunks)])

    prompt = f"""
    You are a helpful assistant specialized in answering questions based on provided information.
    Answer the user's query truthfully and concisely, using ONLY the context provided below.
    If the answer cannot be found in the context, state that you don't have enough information.

    User Query: {query}

    Context:
    {context_str}

    Answer:
    """

    chat_history = []
    chat_history.append({"role": "user", "parts": [{"text": prompt}]})

    payload = {"contents": chat_history}
    
    # Check if GEMINI_API_KEY is empty, if so, Canvas will inject it.
    api_key_param = f"?key={GEMINI_API_KEY}" if GEMINI_API_KEY else ""
    api_url_with_key = f"{GEMINI_API_URL}{api_key_param}"

    headers = {'Content-Type': 'application/json'}

    print("\nSending request to Gemini LLM...")
    try:
        response = requests.post(api_url_with_key, headers=headers, json=payload)
        response.raise_for_status() # Raise an exception for HTTP errors (4xx or 5xx)
        result = response.json()

        if result.get("candidates") and result["candidates"][0].get("content") and result["candidates"][0]["content"].get("parts"):
            llm_response = result["candidates"][0]["content"]["parts"][0]["text"]
            return llm_response
        else:
            print(f"Unexpected API response format: {result}")
            return "Could not generate a response from the LLM due to an unexpected API format."
    except requests.exceptions.RequestException as e:
        print(f"Error during Gemini API call: {e}")
        return "An error occurred while communicating with the LLM API."
    except Exception as e:
        print(f"An unexpected error occurred during LLM response generation: {e}")
        return "An unexpected error occurred during LLM response generation."


if __name__ == "__main__":
    try:
        import pymilvus
        import json
        from sentence_transformers import SentenceTransformer
        from huggingface_hub import snapshot_download
        import requests # Make sure requests is imported here
    except ImportError as e:
        print(f"Missing required library: {e}. Please install using:")
        print("pip install pymilvus sentence-transformers huggingface-hub requests")
        exit()

    # Connect to Milvus
    if not connect_to_milvus():
        exit()

    # Get Milvus Collections (using HNSW for RAG, you can choose another)
    print("\n--- Retrieving Milvus Collections ---")
    hnsw_collection = get_milvus_collection("hnsw")
    
    if not hnsw_collection:
        print("Required Milvus HNSW collection not available or loaded. Exiting.")
        exit()

    # Get Embedding Model
    model = get_embedding_model()
    if not model:
        print("Failed to load embedding model. Exiting.")
        exit()

    # Interactive RAG Loop
    print("\n--- Starting Interactive RAG Pipeline ---")
    print("Enter 'exit' to quit.")

    while True:
        user_query = input("\nEnter your query: ")
        if user_query.lower() == 'exit':
            break

        # Retrieve relevant chunks using the HNSW index
        print(f"\nRetrieving relevant chunks for query: '{user_query}'")
        retrieved_chunks = retrieve_relevant_chunks(user_query, hnsw_collection, top_k=5) # Get top 5 chunks

        # Extract only the page_content for the LLM context
        context_for_llm = [chunk['page_content'] for chunk in retrieved_chunks]
        
        # Generate LLM response
        print("\nGenerating LLM response...")
        llm_answer = generate_llm_response(user_query, context_for_llm)

        print("\n--- RAG Response ---")
        print(f"Query: {user_query}")
        print("\nRetrieved Context:")
        if context_for_llm:
            for i, chunk in enumerate(context_for_llm):
                print(f"  Chunk {i+1}: {chunk.strip()}")
        else:
            print("  No relevant context retrieved.")
        print(f"\nAnswer: {llm_answer}")
        print("--------------------")

    print("\nStep 5 (RAG LLM Generation) completed. Exiting RAG pipeline.")


Attempting to connect to Milvus at localhost:19530...
Milvus connection successful!

--- Retrieving Milvus Collections ---
Loading collection 'document_chunks_hnsw' into memory for search...
Collection 'document_chunks_hnsw' loaded.
Attempting to load model from or download to: /Users/nitin/AgenticAI_Assignments/RAG_Assignment_1/downloaded_embedding_model/all-MiniLM-L6-v2
Model files found in '/Users/nitin/AgenticAI_Assignments/RAG_Assignment_1/downloaded_embedding_model/all-MiniLM-L6-v2'. Loading from local directory.
Embedding model loaded successfully from local path.

--- Starting Interactive RAG Pipeline ---
Enter 'exit' to quit.

Retrieving relevant chunks for query: 'What is you assesment about Roads ?'

Embedding query: 'What is you assesment about Roads ?'...
Searching collection 'document_chunks_hnsw' with top_k=5...
Retrieved 5 relevant chunks from 'document_chunks_hnsw'.

Generating LLM response...

Sending request to Gemini LLM...

--- RAG Response ---
Query: What is you a

# STEP 6 -- RETRIEVAL EVALUATION AND RE-RANKING

In [None]:
import os
import json
import time # Import time for measuring performance
from pymilvus import connections, utility, Collection
from sentence_transformers import SentenceTransformer
from typing import List, Dict, Any
from huggingface_hub import snapshot_download
import requests # For making API calls to Gemini
import numpy as np # For numerical operations in MMR


# --- Configuration for Milvus Connection ---
MILVUS_HOST = "localhost"
MILVUS_PORT = "19530"
COLLECTION_NAME = "document_chunks"

# --- Embedding Model Configuration ---
EMBEDDING_MODEL_NAME = "all-MiniLM-L12-v2"
VECTOR_DIMENSION = 384

embedding_model = None
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY", "")
GEMINI_API_URL = "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent"

def connect_to_milvus():
    try:
        print(f"Attempting to connect to Milvus at {MILVUS_HOST}:{MILVUS_PORT}...")
        connections.connect("default", host=MILVUS_HOST, port=MILVUS_PORT)
        utility.list_collections()
        print("Milvus connection successful!")
        return True
    except Exception as e:
        print(f"Milvus connection failed: {e}")
        print("Please ensure your Milvus Docker container is running (run 'docker ps').")
        return False

def get_embedding_model():
    global embedding_model
    if embedding_model is None:
        local_model_dir = os.path.join(os.getcwd(), "downloaded_embedding_model", EMBEDDING_MODEL_NAME.replace("/", "--"))
        os.makedirs(local_model_dir, exist_ok=True)
        print(f"Attempting to load model from or download to: {local_model_dir}")

        # Check for existence of crucial files
        config_file_exists = os.path.exists(os.path.join(local_model_dir, "config.json"))
        pytorch_model_file_exists = os.path.exists(os.path.join(local_model_dir, "pytorch_model.bin"))

        model_is_downloaded_and_intact = config_file_exists and pytorch_model_file_exists

        if not model_is_downloaded_and_intact:
            print(f"Model files not found or incomplete in '{local_model_dir}'. Initiating robust download...")
            try:
                # Use snapshot_download to get all model files
                snapshot_download(
                    repo_id=f"sentence-transformers/{EMBEDDING_MODEL_NAME}",
                    local_dir=local_model_dir,
                    local_dir_use_symlinks=False, # Copy files directly, don't use symlinks
                    resume_download=True # Allow resuming interrupted downloads
                )
                print(f"Model successfully downloaded to: {local_model_dir}")
                # Re-check existence after download
                model_is_downloaded_and_intact = os.path.exists(os.path.join(local_model_dir, "config.json")) and \
                                                  os.path.exists(os.path.join(local_model_dir, "pytorch_model.bin"))
                if not model_is_downloaded_and_intact:
                    print(f"WARNING: Download completed, but essential files (config.json or pytorch_model.bin) are still missing in '{local_model_dir}'.")
                    print("This might indicate a problem with the downloaded archive or disk permissions.")
                    return None # Critical files missing even after download
            except Exception as download_e:
                print(f"Error during model download to '{local_model_dir}': {download_e}")
                print("Please check your internet connection, disk space, and permissions for the download directory.")
                return None # Return None if download fails

        if model_is_downloaded_and_intact:
            try:
                print(f"Loading embedding model {EMBEDDING_MODEL_NAME} from local path '{local_model_dir}'...")
                # Now that we are sure the files are local, use local_files_only=True
                embedding_model = SentenceTransformer(local_model_dir, model_kwargs={'device': 'cpu'}, local_files_only=True)
                print("Embedding model loaded successfully from local path.")
            except Exception as load_e:
                print(f"Error loading embedding model {EMBEDDING_MODEL_NAME} from local path '{local_model_dir}': {load_e}")
                print("This could be due to corrupted files or a compatibility issue.")
                print(f"Please try deleting the model cache folder '{local_model_dir}' and re-running the script to force a fresh download.")
                embedding_model = None
        else:
            print("Model was not downloaded or is incomplete. Cannot load embedding model.")
            embedding_model = None # Ensure it's None if download failed or files incomplete

    return embedding_model

def get_milvus_collection(collection_suffix: str) -> Collection:
    full_collection_name = f"{COLLECTION_NAME}_{collection_suffix}"
    if not utility.has_collection(full_collection_name):
        print(f"Error: Collection '{full_collection_name}' does not exist.")
        print("Please run milvus_full_setup_ingestion.py first to create and populate collections.")
        return None
    collection = Collection(full_collection_name)
    
    try:
        print(f"Loading collection '{collection.name}' into memory for search...")
        collection.load()
        print(f"Collection '{collection.name}' loaded.")
    except Exception as e:
        print(f"Error loading collection '{collection.name}': {e}")
        return None
            
    return collection

def retrieve_relevant_chunks(query_text: str, collection: Collection, top_k: int = 3) -> List[Dict[str, Any]]:
    if not collection:
        print(f"Collection '{collection.name if collection else 'unknown'}' not available for search.")
        return [], 0 # Return empty list and 0 time on error

    model = get_embedding_model()
    if not model:
        print("Embedding model not loaded. Cannot embed query.")
        return [], 0 # Return empty list and 0 time on error

    query_embedding = model.encode(query_text).tolist()

    search_params = {
        "data": [query_embedding],
        "anns_field": "embedding",
        "limit": top_k,
        "output_fields": ["page_content", "embedding"], # Request embeddings for MMR
        "expr": "pk > 0"
    }

    if "hnsw" in collection.name:
        search_params["param"] = {"metric_type": "COSINE", "params": {"ef": 64}}
    elif "ivf" in collection.name:
        search_params["param"] = {"metric_type": "COSINE", "params": {"nprobe": 10}}
    elif "flat" in collection.name:
        search_params["param"] = {"metric_type": "COSINE", "params": {}}

    search_start_time = time.time()
    try:
        results = collection.search(**search_params)
        search_end_time = time.time()
        search_time = search_end_time - search_start_time

        retrieved_chunks = []
        for hit in results[0]:
            retrieved_chunks.append({
                "page_content": hit.entity.get("page_content"),
                "embedding": hit.entity.get("embedding"), # Get embedding for MMR
                "distance": hit.distance
            })
        
        print(f"Retrieved {len(retrieved_chunks)} relevant chunks from '{collection.name}' in {search_time:.4f} seconds.")
        return retrieved_chunks, search_time
    except Exception as e:
        print(f"Error during search in '{collection.name}': {e}")
        return [], 0 # Return empty list and 0 time on error

def calculate_mmr(query_embedding: np.ndarray, documents: List[Dict[str, Any]], lambda_mult: float = 0.5, k: int = 5) -> List[Dict[str, Any]]:
    if not documents:
        return []

    # Ensure embeddings are numpy arrays
    doc_embeddings = np.array([doc['embedding'] for doc in documents])
    
    # Calculate initial relevance scores (already provided as distance, but normalize for consistency if needed)
    # Since cosine similarity is used, lower distance means higher similarity. We can use 1 - distance as a score.
    relevance_scores = np.array([1 - doc['distance'] for doc in documents])

    # Calculate pairwise similarity between all documents
    # Using dot product for cosine similarity with normalized vectors (which SentenceTransformer provides)
    document_similarity = np.dot(doc_embeddings, doc_embeddings.T)
    # Ensure diagonal is 0 or low to avoid self-similarity impacting selection
    np.fill_diagonal(document_similarity, -1) # Set diagonal to -1 to avoid picking same document

    selected_indices = []
    unselected_indices = list(range(len(documents)))

    for _ in range(min(k, len(documents))):
        if not unselected_indices:
            break

        mmr_scores = []
        for idx in unselected_indices:
            # Relevance part
            relevance = relevance_scores[idx]

            # Diversity part (max similarity to already selected documents)
            if selected_indices:
                diversity = np.max(document_similarity[idx, selected_indices])
            else:
                diversity = 0 # No selected documents yet, so no diversity penalty

            # MMR score
            mmr_score = lambda_mult * relevance - (1 - lambda_mult) * diversity
            mmr_scores.append((mmr_score, idx))

        # Select the document with the highest MMR score
        best_idx_in_unselected = max(mmr_scores, key=lambda x: x[0])[1]
        selected_indices.append(best_idx_in_unselected)
        unselected_indices.remove(best_idx_in_unselected)

    re_ranked_documents = [documents[i] for i in selected_indices]
    return re_ranked_documents


def generate_llm_response(query: str, context_chunks: List[str]) -> str:
    """
    Generates a response from the LLM using the provided query and context.
    """
    if not context_chunks:
        print("No context chunks provided for LLM. Generating response without context.")
        context_str = "No relevant context available."
    else:
        context_str = "\n".join([f"Chunk {i+1}: {chunk}" for i, chunk in enumerate(context_chunks)])

    prompt = f"""
    You are a helpful assistant specialized in answering questions based on provided information.
    Answer the user's query truthfully and concisely, using ONLY the context provided below.
    If the answer cannot be found in the context, state that you don't have enough information.

    User Query: {query}

    Context:
    {context_str}

    Answer:
    """

    chat_history = []
    chat_history.append({"role": "user", "parts": [{"text": prompt}]})

    payload = {"contents": chat_history}
    
    api_key_param = f"?key={GEMINI_API_KEY}" if GEMINI_API_KEY else ""
    api_url_with_key = f"{GEMINI_API_URL}{api_key_param}"

    headers = {'Content-Type': 'application/json'}

    print("\nSending request to Gemini LLM...")
    try:
        response = requests.post(api_url_with_key, headers=headers, json=payload)
        response.raise_for_status()
        result = response.json()

        if result.get("candidates") and result["candidates"][0].get("content") and result["candidates"][0]["content"].get("parts"):
            llm_response = result["candidates"][0]["content"]["parts"][0]["text"]
            return llm_response
        else:
            print(f"Unexpected API response format: {result}")
            return "Could not generate a response from the LLM due to an unexpected API format."
    except requests.exceptions.RequestException as e:
        print(f"Error during Gemini API call: {e}")
        return "An error occurred while communicating with the LLM API."
    except Exception as e:
        print(f"An unexpected error occurred during LLM response generation: {e}")
        return "An unexpected error occurred during LLM response generation."

if __name__ == "__main__":
    try:
        import pymilvus
        import json
        from sentence_transformers import SentenceTransformer
        from huggingface_hub import snapshot_download
        import requests
        import numpy as np
    except ImportError as e:
        print(f"Missing required library: {e}. Please install using:")
        print("pip install pymilvus sentence-transformers huggingface-hub requests numpy")
        exit()

    # Connect to Milvus
    if not connect_to_milvus():
        exit()

    # Get Milvus Collections
    print("\n--- Retrieving Milvus Collections ---")
    hnsw_collection = get_milvus_collection("hnsw")
    ivf_collection = get_milvus_collection("ivf")
    flat_collection = get_milvus_collection("flat")
    
    if not all([hnsw_collection, ivf_collection, flat_collection]):
        print("Not all required Milvus collections are available or loaded. Exiting.")
        exit()

    # Get Embedding Model
    model = get_embedding_model()
    if not model:
        print("Failed to load embedding model. Exiting.")
        exit()

    # Interactive RAG Loop with Evaluation, Re-ranking, and JSON Output for DOCX
    print("\n--- Starting Interactive RAG Pipeline (with Evaluation, Re-ranking & JSON Output) ---")
    print("Enter 'exit' to quit.")

    while True:
        user_query = input("\nEnter your query: ")
        if user_query.lower() == 'exit':
            break

        # Store results for comparison
        all_retrieval_results = {}

        # --- Perform retrieval for each index type ---
        print(f"\nPerforming retrieval for query: '{user_query}'")
        
        # HNSW Search
        hnsw_retrieved_chunks, hnsw_time = retrieve_relevant_chunks(user_query, hnsw_collection, top_k=20) # Get more for re-ranking
        all_retrieval_results["HNSW"] = {"chunks": hnsw_retrieved_chunks, "time": hnsw_time}

        # IVF_FLAT Search
        ivf_retrieved_chunks, ivf_time = retrieve_relevant_chunks(user_query, ivf_collection, top_k=20)
        all_retrieval_results["IVF_FLAT"] = {"chunks": ivf_retrieved_chunks, "time": ivf_time}
        
        # FLAT Search
        flat_retrieved_chunks, flat_time = retrieve_relevant_chunks(user_query, flat_collection, top_k=20)
        all_retrieval_results["FLAT"] = {"chunks": flat_retrieved_chunks, "time": flat_time}

        print("\n--- Retrieval Performance Comparison ---")
        for index_type, data in all_retrieval_results.items():
            print(f"  {index_type} Retrieval Time: {data['time']:.4f} seconds")
            if data["chunks"]:
                top_5_distances_str = ", ".join([f'{c["distance"]:.4f}' for c in data['chunks'][:5]])
                print(f"    Top 5 Similarity (Distance): [{top_5_distances_str}]")
            else:
                print("    No chunks retrieved.")

        # --- Display detailed search results for each index type ---
        print("\n--- Detailed Search Results ---")

        print("\n--- Search Results (HNSW Index) ---")
        current_hnsw_chunks = all_retrieval_results["HNSW"]["chunks"]
        if current_hnsw_chunks:
            for i, chunk in enumerate(current_hnsw_chunks[:5]):
                print(f"HNSW Result {i+1} (Distance: {chunk['distance']:.4f}):")
                print(f"  {chunk['page_content'].strip()}")
        else:
            print("No results found from HNSW index.")

        print("\n--- Search Results (IVF_FLAT Index) ---")
        current_ivf_chunks = all_retrieval_results["IVF_FLAT"]["chunks"]
        if current_ivf_chunks:
            for i, chunk in enumerate(current_ivf_chunks[:5]):
                print(f"IVF_FLAT Result {i+1} (Distance: {chunk['distance']:.4f}):")
                print(f"  {chunk['page_content'].strip()}")
        else:
            print("No results found from IVF_FLAT index.")
        
        print("\n--- Search Results (FLAT Index) ---")
        current_flat_chunks = all_retrieval_results["FLAT"]["chunks"]
        if current_flat_chunks:
            for i, chunk in enumerate(current_flat_chunks[:5]):
                print(f"FLAT Result {i+1} (Distance: {chunk['distance']:.4f}):")
                print(f"  {chunk['page_content'].strip()}")
        else:
            print("No results found from FLAT index.")


        # --- Re-ranking and LLM Generation (using HNSW results for simplicity) ---
        print("\n--- Applying Re-ranking and Generating LLM Response ---")
        query_embedding_np = model.encode(user_query).astype(np.float32) if model else np.array([])
        
        if hnsw_retrieved_chunks and query_embedding_np.size > 0:
            re_ranked_chunks = calculate_mmr(
                query_embedding_np,
                hnsw_retrieved_chunks,
                lambda_mult=0.7,
                k=5
            )
            print(f"Re-ranked {len(re_ranked_chunks)} chunks using MMR.")
            context_for_llm = [chunk['page_content'] for chunk in re_ranked_chunks]
        else:
            re_ranked_chunks = []
            context_for_llm = []
            print("No chunks retrieved from HNSW for re-ranking or query embedding failed.")

        # Generate LLM response
        print("Generating LLM response...")
        llm_answer = generate_llm_response(user_query, context_for_llm)

        print("\n--- Final RAG Response ---")
        print(f"Query: {user_query}")
        print("\nRetrieved & Re-ranked Context (sent to LLM):")
        if re_ranked_chunks:
            for i, chunk in enumerate(re_ranked_chunks):
                print(f"  Rank {i+1} (Distance: {chunk['distance']:.4f}): {chunk['page_content'].strip()}")
        else:
            print("  No relevant context retrieved or re-ranked.")
        print(f"\nAnswer: {llm_answer}")
        print("--------------------")

        # --- Save RAG output to JSON for DOCX generation ---
        output_data = {
            "query": user_query,
            "retrieved_context": context_for_llm,
            "llm_answer": llm_answer
        }
        json_output_filename = f"rag_output_{int(time.time())}.json"
        try:
            with open(json_output_filename, 'w', encoding='utf-8') as f:
                json.dump(output_data, f, indent=2)
            print(f"\nRAG output saved to JSON for DOCX generation: {json_output_filename}")
        except Exception as e:
            print(f"Error saving RAG output to JSON: {e}")


    print("\nStep 5 (RAG LLM Generation with JSON Output) completed. Now, run the DOCX generation script.")


Attempting to connect to Milvus at localhost:19530...
Milvus connection successful!

--- Retrieving Milvus Collections ---
Loading collection 'document_chunks_hnsw' into memory for search...
Collection 'document_chunks_hnsw' loaded.
Loading collection 'document_chunks_ivf' into memory for search...
Collection 'document_chunks_ivf' loaded.
Loading collection 'document_chunks_flat' into memory for search...
Collection 'document_chunks_flat' loaded.
Attempting to load model from or download to: /Users/nitin/AgenticAI_Assignments/RAG_Assignment_1/downloaded_embedding_model/all-MiniLM-L12-v2
Loading embedding model all-MiniLM-L12-v2 from local path '/Users/nitin/AgenticAI_Assignments/RAG_Assignment_1/downloaded_embedding_model/all-MiniLM-L12-v2'...
Embedding model loaded successfully from local path.

--- Starting Interactive RAG Pipeline (with Evaluation, Re-ranking & JSON Output) ---
Enter 'exit' to quit.

Performing retrieval for query: 'Your analysis on the roads for time period 2004-20

# STEP 7 -- DOCX REPORT GENERATION

In [3]:
import os
import json
from docx import Document
from docx.shared import Inches
from typing import List

def generate_docx_report(query: str, retrieved_context: List[str], llm_answer: str, output_filename: str = "rag_report.docx"):
    """
    Generates a DOCX report containing the user query, retrieved context, and LLM answer.
    """
    doc = Document()
    doc.add_heading('RAG System Report', level=1)

    doc.add_heading('1. User Query', level=2)
    doc.add_paragraph(query)

    doc.add_heading('2. Retrieved and Re-ranked Context', level=2)
    if retrieved_context:
        for i, chunk in enumerate(retrieved_context):
            doc.add_paragraph(f"Chunk {i+1}: {chunk.strip()}")
    else:
        doc.add_paragraph("No relevant context was retrieved or re-ranked for this query.")

    doc.add_heading('3. LLM Generated Answer', level=2)
    doc.add_paragraph(llm_answer)

    try:
        doc.save(output_filename)
        print(f"\nDOCX report saved to: {output_filename}")
    except Exception as e:
        print(f"Error saving DOCX report: {e}")


if __name__ == "__main__":
    json_file_path = "rag_output_1749222990.json"

    # Ensure necessary libraries are installed (this block remains for local execution safety)
    try:
        from docx import Document
        from docx.shared import Inches
        import json, os
    except ImportError as e:
        print(f"Missing required library: {e}. Please install using:")
        print("pip install python-docx")
        print("Please install the missing library to proceed.")
        raise RuntimeError("Required library not installed")

    if not os.path.exists(json_file_path):
        print(f"Error: JSON input file '{json_file_path}' not found.")
        print("Please ensure the path is correct and the JSON file was generated by the RAG pipeline.")
        raise FileNotFoundError(f"JSON input file '{json_file_path}' not found.")
    else:
        output_data = {}
        try:
            with open(json_file_path, 'r', encoding='utf-8') as f:
                output_data = json.load(f)
            print(f"Successfully loaded data from: {json_file_path}")

            query_from_json = output_data.get("query", "N/A")
            context_from_json = output_data.get("retrieved_context", [])
            answer_from_json = output_data.get("llm_answer", "No answer generated.")

            # Generate a unique DOCX filename based on the JSON file's timestamp or a new one
            base_filename = os.path.basename(json_file_path)
            # Remove "rag_output_" and ".json" to get the timestamp for the DOCX filename
            timestamp_part = base_filename.replace("rag_output_", "").replace(".json", "")
            docx_output_filename = f"RAG_Report_{timestamp_part}.docx"

            generate_docx_report(query_from_json, context_from_json, answer_from_json, docx_output_filename)
            print("\nDOCX Generation completed.")

        except Exception as e:
            print(f"Error loading JSON data from '{json_file_path}' or generating DOCX: {e}")
            raise



Successfully loaded data from: rag_output_1749222990.json

DOCX report saved to: RAG_Report_1749222990.docx

DOCX Generation completed.
