# Change Cache Directory 

In [1]:
import os
os.environ["HF_HOME"] = "D:/huggingface"
os.environ["HF_HUB_DISABLE_SYMLINKS_WARNING"] = "1"


# Import

In [2]:
import sys
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

In [3]:
project_root = os.path.abspath(os.path.join(os.getcwd(), "../"))
if project_root not in sys.path:
    sys.path.append(project_root)

from src.rag.retriever import Retriever
from src.rag.prompt import PromptBuilder
from src.rag.generator import Generator

# Initialize RAG components

In [4]:
retriever = Retriever()
prompt_builder = PromptBuilder()
generator = Generator(model_name="google/flan-t5-base") 

Device set to use cpu


# Define evaluation questions

In [5]:
questions = [
    "How do customers feel about Buy Now Pay Later?",
    "What are common complaints related to Credit Cards?",
    "How do customers describe Personal Loan issues?",
    "What problems are reported about Savings Accounts?",
    "Are there complaints related to Money Transfers?",
    # Add more as needed, up to 10
]

# Prepare list to collect results

In [15]:
results = []

for q in questions:
    # Retrieve relevant chunks (list of dicts)
    chunks = retriever.retrieve(q, top_k=5)
    
    # Extract the actual complaint text strings for prompt building
    context_texts = [chunk['text'] for chunk in chunks]
    
    # Build prompt with list of strings (complaint texts)
    prompt = prompt_builder.build_prompt(context_texts, q)
    
    # Generate answer
    answer = generator.generate_answer(prompt)
    
    # Format retrieved sources nicely for markdown (using metadata)
    sources = "; ".join(
        [f"{chunk['product']} complaint #{chunk['complaint_id']}" for chunk in chunks[:2]]
    )
    
    results.append({
        "Question": q,
        "Generated Answer": answer,
        "Retrieved Sources": sources,
        "Quality Score (1-5)": "",  # Manual scoring later
        "Comments/Analysis": ""
    })


# Convert to DataFrame for easier export/formatting

In [16]:
df_results = pd.DataFrame(results)

# Save as markdown

In [17]:
md_table = df_results.to_markdown(index=False)

In [19]:
output_path = "../reports/evaluation_table.md"  # Your desired file path

with open(output_path, "w", encoding="utf-8") as f:
    f.write(md_table)

print(f"Evaluation complete. Results saved to {output_path}")


Evaluation complete. Results saved to ../reports/evaluation_table.md


In [20]:
print(md_table)

| Question                                            | Generated Answer                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        | Retrieved Sources                                                  | Quality Score (1-5)   | Comments/Analysis   |
|:----------------------------------------------------|:---------------------------