In [1]:
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.llms import Ollama
from langchain.prompts import PromptTemplate
import os
import time
from typing import List, Dict

# --- Configuration ---
FAISS_INDEX_PATH = "faiss_cvpr_index"  
EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
LOCAL_OLLAMA_MODEL = "cvpr-gemma-3-4b"
TOP_K = 5  # Number of relevant chunks
MAX_TOKENS = 512  # Limit response length

# --- Load FAISS Vector Store and Retriever ---
def load_vector_store(index_path: str) -> FAISS:
    """Load the FAISS vector store with error handling."""
    if not os.path.exists(index_path):
        raise FileNotFoundError(f"FAISS index not found at {index_path}. Check the path.")
    try:
        embeddings = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL)
        vector_store = FAISS.load_local(
            index_path,
            embeddings=embeddings,
            allow_dangerous_deserialization=True
        )
        print(f"FAISS index loaded with {vector_store.index.ntotal} vectors.")
        return vector_store
    except Exception as e:
        raise RuntimeError(f"Failed to load FAISS index: {e}")

# --- Connect to Ollama Model ---
def connect_ollama(model_name: str) -> Ollama:
    """Connect to the local Ollama model with error handling."""
    try:
        llm = Ollama(model=model_name, temperature=0.7)
        print(f"Connected to local Ollama model: '{model_name}'.")
        return llm
    except Exception as e:
        raise RuntimeError(f"Failed to connect to Ollama: {e}. Ensure Ollama is running.")

# --- Define Prompt Template ---
prompt_template_str = """<s>[INST] You are an assistant for question-answering tasks on CVPR research papers.
Use the following pieces of retrieved context to answer the question concisely.
If you don't know the answer or the context is insufficient, say 'I donâ€™t know.'
Context: {context}
Question: {question}
Answer: [/INST]"""
prompt = PromptTemplate.from_template(prompt_template_str)

# --- RAG Pipeline ---
def rag_pipeline(query: str, vector_store: FAISS, llm: Ollama) -> Dict:
    """Execute the RAG pipeline with retrieval and generation."""
    start_time = time.time()
    
    # Retrieve relevant documents
    retrieved_docs = vector_store.similarity_search(query, k=TOP_K)
    context_string = "\n\n---\n\n".join([doc.page_content for doc in retrieved_docs])
    
    # Format and generate response
    formatted_prompt = prompt.format(context=context_string, question=query)
    try:
        response = llm.invoke(formatted_prompt)
    except Exception as e:
        raise RuntimeError(f"Generation failed: {e}")
    
    # Calculate metrics
    latency = time.time() - start_time
    metrics = {
        "latency": latency,
        "retrieved_count": len(retrieved_docs),
    }
    
    return {
        "query": query,
        "response": response,
        "context": context_string,
        "metrics": metrics,
        "sources": retrieved_docs
    }

# --- Interactive Q&A Loop ---
def main():
    print("\n--- CVPR Research Assistant Ready ---")
    print("Ask a question about the papers, or type 'exit' to quit.")
    
    # Initialize components
    try:
        vector_store = load_vector_store(FAISS_INDEX_PATH)
        llm = connect_ollama(LOCAL_OLLAMA_MODEL)
    except Exception as e:
        print(f"Initialization failed: {e}")
        return
    
    while True:
        user_query = input("\nYour Question: ").strip()
        if user_query.lower() == 'exit':
            print("Exiting...")
            break
        
        try:
            result = rag_pipeline(user_query, vector_store, llm)
            print("\n### Answer:")
            print(result["response"])
            print("\n### Metrics:")
            print(f"Latency: {result['metrics']['latency']:.2f} seconds")
            print(f"Retrieved Chunks: {result['metrics']['retrieved_count']}")
            print("\n### Sources:")
            for i, doc in enumerate(result["sources"]):
                print(f"Source {i+1} (from paper: {doc.metadata.get('title', 'N/A')}):")
                print(f"> \"{doc.page_content[:250]}...\"")
        except Exception as e:
            print(f"Error processing query: {e}")

if __name__ == "__main__":
    main()


--- CVPR Research Assistant Ready ---
Ask a question about the papers, or type 'exit' to quit.


  embeddings = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL)
  from .autonotebook import tqdm as notebook_tqdm


FAISS index loaded with 2310 vectors.
Connected to local Ollama model: 'cvpr-gemma-3-4b'.


  llm = Ollama(model=model_name, temperature=0.7)



### Answer:
] Transformer-based sisr models, hat swinir dat continue deliver strong reconstruction results capturing long-range dependencies. many teams utilized pre-trained transformer models fine-tuned hybrid at- tention s>

### Metrics:
Latency: 33.69 seconds
Retrieved Chunks: 5

### Sources:
Source 1 (from paper: NTIRE 2025 Challenge on Image Super-Resolution ($\times$4): Methods and Results):
> "teams surpass last years best psnr score db, ten teams obtain results db, highlighting clear improvement reconstruction accuracy. track perception quality. snucv team ranks first highest perceptual score two teams achieve score seven teams exceed in-..."
Source 2 (from paper: End-to-End RGB-IR Joint Image Compression With Channel-wise Cross-modality Entropy Model):
> "ieee transactions intelligent transportation systems, vol. no. pp. liu, lin, cao, hu, wei, zhang, lin, guo, swin transformer hierarchical vision transformer using shifted windows, proceedings ieeecvf international conference