In [28]:
%load_ext autoreload
%autoreload 2

In [29]:
%reload_ext autoreload

In [30]:
import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer
from faiss import IndexFlatL2
import pickle
from pathlib import Path
from langchain.llms import HuggingFacePipeline
from langchain.prompts import PromptTemplate
from transformers import pipeline
import torch
import os
import sys
from huggingface_hub import login

In [31]:
sys.path.append(os.path.abspath('../src/'))

In [32]:
from loggers import get_logger
from rag_helper import rag_pipeline

In [33]:
logger = get_logger(__name__)

In [34]:
# Set up directories
VECTOR_STORE_DIR = Path('../vector_store')
DATA_DIR = Path('../data')

#### Load vector stor and metadata

In [35]:
# Load vector store and metadata
try:
    with open(VECTOR_STORE_DIR / 'faiss_index.bin', 'rb') as f:
        index = pickle.load(f)
    with open(VECTOR_STORE_DIR / 'metadata.pkl', 'rb') as f:
        store_data = pickle.load(f)
        chunks = store_data['chunks']
        metadata = store_data['metadata']
    logger.info("Vector store and metadata loaded successfully.")
except FileNotFoundError as e:
    logger.error(f"Vector store files not found: {e}")
    raise
except Exception as e:
    logger.error(f"Error loading vector store or metadata: {e}")
    raise

2025-07-15 17:08:49,055 - INFO - Vector store and metadata loaded successfully.


#### Initialize Embedding Model

In [36]:
# Initialize embedding model with explicit checks
try:
    logger.info("Loading embedding model 'sentence-transformers/all-MiniLM-L6-v2'...")
    embedding_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
    if embedding_model is None:
        raise ValueError("Embedding model is None after initialization.")
    # Test the model
    test_embedding = embedding_model.encode(["test sentence"])
    logger.info(f"Embedding model loaded successfully. Test embedding shape: {test_embedding.shape}")
except Exception as e:
    logger.error(f"Failed to load or test embedding model 'sentence-transformers/all-MiniLM-L6-v2': {e}")
    logger.error("Ensure 'sentence-transformers' is installed and the model is accessible.")
    raise

2025-07-15 17:08:52,344 - INFO - Loading embedding model 'sentence-transformers/all-MiniLM-L6-v2'...
2025-07-15 17:08:56,484 - INFO - Embedding model loaded successfully. Test embedding shape: (1, 384)


#### Initiliaze th LLM

In [37]:
# Initialize LLM (try bart-base, fallback to flan-t5-base)
try:
        llm_pipeline = pipeline(
            "text2text-generation",
            model="google/flan-t5-base",
            device=0 if torch.cuda.is_available() else -1,
            max_length=200,
            do_sample=True,
            temperature=0.7
        )
        test_output = llm_pipeline("What are common issues with credit card billing disputes?")[0]['generated_text']
        logger.info(f"LLM (flan-t5-small) loaded and tested successfully. Test output: {test_output[:50]}...")
except Exception as e:
        logger.error(f"Failed to load flan-t5-small: {e}")
        logger.error("Ensure internet connection and try clearing cache: rm -rf ~/.cache/huggingface/hub")
        raise

llm = HuggingFacePipeline(pipeline=llm_pipeline)


Device set to use cpu


2025-07-15 17:09:02,904 - INFO - LLM (flan-t5-small) loaded and tested successfully. Test output: credit card fraud...


In [38]:
# Define prompt template
prompt_template = PromptTemplate(
    input_variables=["context", "question"],
    template="""
You are a financial analyst assistant for CrediTrust. Your task is to answer questions about customer complaints based solely on the provided context. If the context doesn't contain enough information to answer the question, state clearly that you don't have sufficient information. Provide a concise and accurate response.

Context: {context}

Question: {question}

Answer:
"""
)



In [39]:
# Qualitative evaluation
evaluation_questions = [
    "What are common issues with credit card billing disputes?",
    "Why do customers complain about Buy Now, Pay Later services?",
    "Are there any complaints about unauthorized transactions in money transfers?",
    "What problems do people face with savings account fees?",
    "How do personal loan complaints differ from credit card complaints?"
]


In [40]:
evaluation_results = []
for question in evaluation_questions:
    try:
        result = rag_pipeline(question, llm, embedding_model, index, chunks, metadata, prompt_template)
        quality_score = 3
        comment = "Review answer and sources for accuracy and relevance."
        evaluation_results.append({
            'question': question,
            'answer': result['answer'][:200] + "..." if len(result['answer']) > 200 else result['answer'],
            'sources': [f"Chunk ID: {chunk['metadata']['chunk_id']}, Product: {chunk['metadata']['product']}, Text: {chunk['text'][:100]}..." for chunk in result['retrieved_chunks'][:2]],
            'quality_score': quality_score,
            'comment': comment
        })
    except Exception as e:
        logger.error(f"Error evaluating question '{question}': {e}")
        evaluation_results.append({
            'question': question,
            'answer': "Error generating answer.",
            'sources': [],
            'quality_score': 1,
            'comment': f"Failed due to: {str(e)}"
        })


2025-07-15 17:09:24,449 - ERROR - Error in retrieving chunks for query 'What are common issues with credit card billing disputes?': 'NoneType' object has no attribute 'encode'
2025-07-15 17:09:24,449 - ERROR - Error in RAG pipeline for query 'What are common issues with credit card billing disputes?': 'NoneType' object has no attribute 'encode'
2025-07-15 17:09:24,449 - ERROR - Error evaluating question 'What are common issues with credit card billing disputes?': 'NoneType' object has no attribute 'encode'
2025-07-15 17:09:24,449 - ERROR - Error in retrieving chunks for query 'Why do customers complain about Buy Now, Pay Later services?': 'NoneType' object has no attribute 'encode'
2025-07-15 17:09:24,449 - ERROR - Error in RAG pipeline for query 'Why do customers complain about Buy Now, Pay Later services?': 'NoneType' object has no attribute 'encode'
2025-07-15 17:09:24,454 - ERROR - Error evaluating question 'Why do customers complain about Buy Now, Pay Later services?': 'NoneType' 

In [17]:
# Save evaluation results
try:
    evaluation_df = pd.DataFrame(evaluation_results)
    evaluation_df.to_markdown(DATA_DIR / 'evaluation_table.md', index=False)
    logger.info(f"Evaluation table saved to: {DATA_DIR / 'evaluation_table.md'}")
except Exception as e:
    logger.error(f"Error saving evaluation table: {e}")
    raise


2025-07-15 16:47:04,049 - INFO - Evaluation table saved to: ..\data\evaluation_table.md


In [18]:
# Print sample evaluation
print("\nSample Evaluation Results:")
print(evaluation_df.to_markdown(index=False))



Sample Evaluation Results:
| question                                                                     | answer                   | sources   |   quality_score | comment                                                    |
|:-----------------------------------------------------------------------------|:-------------------------|:----------|----------------:|:-----------------------------------------------------------|
| What are common issues with credit card billing disputes?                    | Error generating answer. | []        |               1 | Failed due to: 'NoneType' object has no attribute 'encode' |
| Why do customers complain about Buy Now, Pay Later services?                 | Error generating answer. | []        |               1 | Failed due to: 'NoneType' object has no attribute 'encode' |
| Are there any complaints about unauthorized transactions in money transfers? | Error generating answer. | []        |               1 | Failed due to: 'NoneType' object has n