# Task 3: Building the RAG Core Logic and Evaluation


In [25]:
# import dependencies 
import pandas as pd
from pathlib import Path
from langchain_community.vectorstores import FAISS
from sentence_transformers import SentenceTransformer
from transformers import pipeline
from langchain.docstore.document import Document
import pickle


## loading of vector store

In [26]:
# Set up paths
VECTOR_STORE_PATH = Path('F:/Intelligent_Complaint_Analysis/data')
VECTOR_STORE_FILE = VECTOR_STORE_PATH / 'faiss_index'
EMBEDDING_MODEL_FILE = VECTOR_STORE_PATH / 'embedding_model.pkl'


In [27]:

# Load embedding model
print("Loading embedding model...")
with open(EMBEDDING_MODEL_FILE, 'rb') as f:
    embedding_model = pickle.load(f)


Loading embedding model...


In [None]:

# Load vector store
print("Loading vector store...")
vector_store = FAISS.load_local(VECTOR_STORE_FILE, embeddings=embedding_model, allow_dangerous_deserialization=True)

Loading vector store...


## Initialize LLM

In [None]:
# Initialize LLM (using distilgpt2 for lightweight demo; replace with stronger model if available)
print("Loading language model...")
llm = pipeline('text-generation', model='distilgpt2', max_new_tokens=150, truncation=True)

Loading language model...


Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`
Device set to use cpu


In [23]:
# Add the parent directory to the system path
import os
import sys
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))

## Prompt template engineering 

In [None]:
# Prompt template
PROMPT_TEMPLATE = """You are a financial analyst assistant for CrediTrust. Your task is to answer questions about customer complaints based only on the provided context. If the context doesn't contain enough information to answer the question, state that clearly and do not make assumptions. Provide a concise and accurate answer.

Context:
{context}

Question:
{question}

Answer:
"""

## Evaluation Question 

In [9]:
# Evaluation questions
eval_questions = [
    "What are common issues with credit card complaints?",
    "How do consumers describe problems with Buy Now, Pay Later services?",
    "Are there any complaints about unauthorized transactions in money transfers?",
    "What fees are mentioned in savings account complaints?",
    "What are typical delays reported in personal loan processing?"
]

# Run Evaluation 

In [None]:
from src.rag_peplines import rag_pipeline
# Run evaluation
evaluation_results = []
for question in eval_questions:
    result = rag_pipeline(question)
    # For simplicity, assume a manual quality score (1-5) based on relevance and coherence
    # In practice, you'd evaluate based on ground truth or human judgment
    evaluation_results.append({
        'question': question,
        'answer': result['answer'],
        'retrieved_sources': [
            {
                'complaint_id': doc.metadata['complaint_id'],
                'product': doc.metadata['product'],
                'text': doc.page_content[:100] + "..." if len(doc.page_content) > 100 else doc.page_content
            } for doc in result['retrieved_docs'][:2]  # Show top 2 sources
        ],
        'quality_score': 3,  # Placeholder; adjust based on manual review
        'comments': "Placeholder: Evaluate coherence and relevance manually."
    })

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


In [20]:
# Save evaluation results as a CSV for reporting
eval_df = pd.DataFrame(evaluation_results)
#eval_df.to_csv('F:/Intelligent_Complaint_Analysis/data/evaluation_results.csv', index=False)
print("Evaluation results saved to evaluation_results.csv")

Evaluation results saved to evaluation_results.csv


# OUTPUT 

In [21]:
display(eval_df)

Unnamed: 0,question,answer,retrieved_sources,quality_score,comments
0,What are common issues with credit card compla...,1. The issue is not resolved by their credit c...,[{'complaint_id': 'f90ada6e-6d82-4805-ad95-076...,3,Placeholder: Evaluate coherence and relevance ...
1,How do consumers describe problems with Buy No...,I don't know. I don't know. I don't know. I do...,[{'complaint_id': '82ecdb40-7a0f-4df3-83b0-1e0...,3,Placeholder: Evaluate coherence and relevance ...
2,Are there any complaints about unauthorized tr...,Yes i have been asked so far but they have not...,[{'complaint_id': '3e821f9a-112a-499f-8ac5-0e4...,3,Placeholder: Evaluate coherence and relevance ...
3,What fees are mentioned in savings account com...,What are the fees for account holders?\nAnswer...,[{'complaint_id': '49abdcb5-5719-4d62-a162-19d...,3,Placeholder: Evaluate coherence and relevance ...
4,What are typical delays reported in personal l...,What is typical delay reported in personal loa...,[{'complaint_id': '36ae68ca-7661-4603-8fd0-553...,3,Placeholder: Evaluate coherence and relevance ...


In [24]:
display(eval_df)

Unnamed: 0,question,answer,retrieved_sources,quality_score,comments
0,What are common issues with credit card compla...,1. The issue is not resolved by their credit c...,[{'complaint_id': 'f90ada6e-6d82-4805-ad95-076...,3,Placeholder: Evaluate coherence and relevance ...
1,How do consumers describe problems with Buy No...,I don't know. I don't know. I don't know. I do...,[{'complaint_id': '82ecdb40-7a0f-4df3-83b0-1e0...,3,Placeholder: Evaluate coherence and relevance ...
2,Are there any complaints about unauthorized tr...,Yes i have been asked so far but they have not...,[{'complaint_id': '3e821f9a-112a-499f-8ac5-0e4...,3,Placeholder: Evaluate coherence and relevance ...
3,What fees are mentioned in savings account com...,What are the fees for account holders?\nAnswer...,[{'complaint_id': '49abdcb5-5719-4d62-a162-19d...,3,Placeholder: Evaluate coherence and relevance ...
4,What are typical delays reported in personal l...,What is typical delay reported in personal loa...,[{'complaint_id': '36ae68ca-7661-4603-8fd0-553...,3,Placeholder: Evaluate coherence and relevance ...
