In [None]:
# Cell 1: Initial setup - Mount Drive, Install Libraries, Force Restart
# *** IMPORTANT: Run this as your FIRST cell after "Disconnect and delete runtime" and reconnecting ***

from google.colab import drive
drive.mount('/content/drive')

print("Uninstalling existing versions to ensure a clean slate...")
# Uninstall all related packages comprehensively.
!pip uninstall -y torch torchvision torchaudio numpy transformers accelerate safetensors tokenizers sentencepiece packaging langchain langchain_community chromadb h5py sentence_transformers

print("\nInstalling latest versions of core libraries, allowing pip to resolve for NumPy 2.x...")

# Install PyTorch for Colab's CUDA version.
# For recent Colab, cu121 (CUDA 12.1) is common.
!pip install -qU torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121

# Install latest versions of other libraries.
# The -U flag ensures upgrade, and -q keeps output quiet.
# Let pip resolve dependencies to the latest compatible versions.
!pip install -qU transformers accelerate sentence-transformers langchain langchain_community chromadb h5py packaging

# --- Verification of installed versions (now safe to import after installation) ---
import torch
import numpy as np
try:
    import transformers
    import accelerate
    import sentence_transformers
    import langchain
    import packaging
except ImportError as e:
    print(f"Error importing modules for verification (this might happen before restart): {e}")

print(f"\nPyTorch CUDA available: {torch.cuda.is_available()}")
print(f"PyTorch CUDA version: {torch.version.cuda}")
print(f"NVIDIA GPU: {torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'N/A'}")
print(f"Installed torch version: {torch.__version__}")
print(f"Installed numpy version: {np.__version__}")
if 'transformers' in locals():
    print(f"Installed transformers version: {transformers.__version__}")
if 'accelerate' in locals():
    print(f"Installed accelerate version: {accelerate.__version__}")
if 'sentence_transformers' in locals():
    print(f"Installed sentence-transformers version: {sentence_transformers.__version__}")
if 'langchain' in locals():
    print(f"Installed langchain version: {langchain.__version__}")
if 'packaging' in locals():
    print(f"Installed packaging version: {packaging.__version__}")
# --- End Verification ---

print("\nInitial installations complete. Forcing runtime restart to apply all changes.")
import os
os._exit(0) # This will crash the runtime.

Mounted at /content/drive
Uninstalling existing versions to ensure a clean slate...
Found existing installation: torch 2.6.0+cu124
Uninstalling torch-2.6.0+cu124:
  Successfully uninstalled torch-2.6.0+cu124
Found existing installation: torchvision 0.21.0+cu124
Uninstalling torchvision-0.21.0+cu124:
  Successfully uninstalled torchvision-0.21.0+cu124
Found existing installation: torchaudio 2.6.0+cu124
Uninstalling torchaudio-2.6.0+cu124:
  Successfully uninstalled torchaudio-2.6.0+cu124
Found existing installation: numpy 2.0.2
Uninstalling numpy-2.0.2:
  Successfully uninstalled numpy-2.0.2
Found existing installation: transformers 4.53.0
Uninstalling transformers-4.53.0:
  Successfully uninstalled transformers-4.53.0
Found existing installation: accelerate 1.8.1
Uninstalling accelerate-1.8.1:
  Successfully uninstalled accelerate-1.8.1
Found existing installation: safetensors 0.5.3
Uninstalling safetensors-0.5.3:
  Successfully uninstalled safetensors-0.5.3
Found existing installation

In [2]:
# Cell 2: Main RAG Code (run this *after* Cell 1 has crashed and you've restarted the session)

# Re-mount Google Drive (essential after any restart/reset)
from google.colab import drive
drive.mount('/content/drive')

# Now, import the libraries (they should now be correctly installed and available)
import os
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma
from langchain.prompts import PromptTemplate
from langchain_community.llms import HuggingFacePipeline
# CORRECTED IMPORT: Use AutoModelForSeq2SeqLM for T5 models
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM, AutoModelForSeq2SeqLM
import torch
import pandas as pd
import warnings
import numpy as np

warnings.filterwarnings('ignore') # Suppress warnings

# --- Verification of imported versions ---
try:
    import accelerate
    import transformers
    import sentence_transformers
    import langchain
    import packaging
    print(f"\nChecking imported transformers version: {transformers.__version__}")
    print(f"Checking imported accelerate version: {accelerate.__version__}")
    print(f"Checking imported torch version: {torch.__version__}")
    print(f"Checking imported numpy version: {np.__version__}")
    print(f"Checking imported langchain version: {langchain.__version__}")
    print(f"Checking imported packaging version: {packaging.__version__}")

except ImportError as e:
    print(f"Critical Error: Failed to import necessary libraries after restart: {e}")
    print("Please ensure Cell 1 ran successfully and the runtime was restarted.")
    exit()

# --- Configuration ---
DATA_DIR = '/content/drive/MyDrive/'
VECTOR_STORE_DIR = '/content/drive/MyDrive/vector_store'
CHROMA_PERSIST_DIR = os.path.join(VECTOR_STORE_DIR, 'chroma_db_credi_trust')
EMBEDDING_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
LLM_MODEL_NAME = "google/flan-t5-small"

# --- 1. Retriever Implementation ---

print(f"Loading embedding model: {EMBEDDING_MODEL_NAME}...")
embedding_model = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL_NAME)
print("Embedding model loaded.")

# Load the persisted ChromaDB vector store
print(f"Loading ChromaDB from: {CHROMA_PERSIST_DIR}...")
try:
    vector_store = Chroma(
        persist_directory=CHROMA_PERSIST_DIR,
        embedding_function=embedding_model,
        collection_name="credi_trust_complaints"
    )
    print("ChromaDB vector store loaded successfully.")
except Exception as e:
    print(f"Error loading ChromaDB: {e}")
    print("Please ensure the persist_directory and collection_name are correct and the database was saved properly in Task 2.")
    vector_store = None

def retrieve_chunks(question: str, vector_store: Chroma, k: int = 5):
    if vector_store is None:
        print("Error: Vector store not loaded. Cannot retrieve chunks.")
        return []
    print(f"\nRetrieving top {k} chunks for query: '{question}'...")
    # CORRECTED TYPO: .similarity_search instead of .similarity.search
    retrieved_docs = vector_store.similarity_search(question, k=k)
    print(f"Retrieved {len(retrieved_docs)} chunks.")
    return retrieved_docs

# --- 2. Prompt Engineering ---

prompt_template_str = """
You are a financial analyst assistant for CrediTrust. Your task is to answer questions about customer complaints.
Use the following retrieved complaint excerpts to formulate your answer.
If the context doesn't contain the answer, state that you don't have enough information.
Be concise and directly answer the question based *only* on the provided context.

Context:
{context}

Question: {question}

Answer:
"""
PROMPT = PromptTemplate(template=prompt_template_str, input_variables=["context", "question"])
print("\nPrompt template defined.")

# --- 3. Generator Implementation ---

print(f"\nLoading LLM: {LLM_MODEL_NAME}...")
try:
    tokenizer = AutoTokenizer.from_pretrained(LLM_MODEL_NAME)

    if "t5" in LLM_MODEL_NAME.lower():
        # CORRECTED MODEL CLASS: Use AutoModelForSeq2SeqLM for T5
        model = AutoModelForSeq2SeqLM.from_pretrained(LLM_MODEL_NAME, torch_dtype=torch.float32, device_map="auto")
        pipe = pipeline(
            "text2text-generation", # T5 uses text2text-generation
            model=model,
            tokenizer=tokenizer,
            max_new_tokens=256,
            do_sample=True,
            temperature=0.7,
            top_p=0.95,
            repetition_penalty=1.1,
            pad_token_id=tokenizer.pad_token_id if tokenizer.pad_token_id is not None else tokenizer.eos_token_id,
        )
    else:
        # Keep AutoModelForCausalLM for other model types if you change LLM_MODEL_NAME later
        model = AutoModelForCausalLM.from_pretrained(
            LLM_MODEL_NAME,
            torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
            device_map="auto"
        )
        pipe = pipeline(
            "text-generation", # Causal models use text-generation
            model=model,
            tokenizer=tokenizer,
            max_new_tokens=256,
            do_sample=True,
            temperature=0.7,
            top_p=0.95,
            repetition_penalty=1.1,
            eos_token_id=tokenizer.eos_token_id,
            pad_token_id=tokenizer.pad_token_id if tokenizer.pad_token_id is not None else tokenizer.eos_token_id,
        )

    llm = HuggingFacePipeline(pipeline=pipe)
    print(f"LLM '{LLM_MODEL_NAME}' loaded successfully.")

except Exception as e:
    print(f"Error loading LLM: {e}")
    print("Please check the LLM_MODEL_NAME and ensure you have sufficient resources (GPU/RAM) if using a large model.")
    llm = None

def generate_answer(question: str, retrieved_docs: list, llm: HuggingFacePipeline):
    if llm is None:
        return "Error: Language model not loaded. Cannot generate answer."

    context_text = "\n\n".join([doc.page_content for doc in retrieved_docs])
    formatted_prompt = PROMPT.format(context=context_text, question=question)

    print("\nSending formatted prompt to LLM...")
    response = llm.invoke(formatted_prompt)

    if isinstance(response, list) and len(response) > 0 and 'generated_text' in response[0]:
        generated_text = response[0]['generated_text']
        # Remove the prompt itself from the generated text if the model echoes it
        if formatted_prompt in generated_text:
            generated_text = generated_text[generated_text.find(formatted_prompt) + len(formatted_prompt):].strip()
        # Remove "Answer:" prefix if present
        if generated_text.startswith("Answer:"):
            generated_text = generated_text[len("Answer:"):].strip()
        return generated_text
    elif isinstance(response, str):
        return response.strip()
    else:
        print(f"Unexpected LLM response format: {response}")
        return "Could not generate a coherent answer."


# --- 4. Qualitative Evaluation ---

print("\n--- Starting Qualitative Evaluation ---")

evaluation_questions = [
    "What are common complaints related to credit cards?",
    "Tell me about issues with personal loans.",
    "Are there any complaints about money transfers being delayed?",
    "What problems do consumers face with Buy Now, Pay Later services?",
    "Describe typical complaints about savings accounts.",
    "A customer reported a fraudulent charge on their card. What product is this related to?",
    "I sent money to a wrong account. What product category would this fall under?",
    "What is a common issue with interest rates on personal loans?",
    "Are there any complaints about account closures for savings accounts?",
    "What are the main concerns regarding BNPL late fees?"
]

evaluation_results = []

for i, question in enumerate(evaluation_questions):
    print(f"\n--- Evaluating Question {i+1}: {question} ---")
    retrieved_docs = retrieve_chunks(question, vector_store, k=5)

    sources_info = []
    for doc in retrieved_docs:
        sources_info.append(
            f"Source (Complaint ID: {doc.metadata.get('complaint_id', 'N/A')}, "
            f"Product: {doc.metadata.get('product', 'N/A')}):\n"
            f"{doc.page_content[:150]}..."
        )

    generated_answer = generate_answer(question, retrieved_docs, llm)

    evaluation_results.append({
        "Question": question,
        "Generated Answer": generated_answer,
        "Retrieved Sources": sources_info[:2],
        "Quality Score": " (Manual 1-5)",
        "Comments/Analysis": " (Manual analysis)"
    })

    print(f"\nGenerated Answer: {generated_answer}")
    print(f"\nTop Retrieved Sources (for analysis):")
    for src in sources_info[:2]:
        print(src)
    print("-" * 50)

print("\n--- Evaluation Table (Copy this to your report and fill in scores/comments) ---")
print("| Question | Generated Answer | Retrieved Sources (top 2) | Quality Score (1-5) | Comments/Analysis |")
print("|----------|------------------|---------------------------|---------------------|-------------------|")
for result in evaluation_results:
    formatted_sources = "<br>".join(result['Retrieved Sources'])
    answer_for_md = result['Generated Answer'].replace('|', '\|')
    sources_for_md = formatted_sources.replace('|', '\|')

    print(f"| {result['Question']} | {answer_for_md} | {sources_for_md} | {result['Quality Score']} | {result['Comments/Analysis']} |")

print("\nTask 3: RAG Core Logic and Evaluation completed. Please manually fill the evaluation table in your report.")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).

Checking imported transformers version: 4.53.1
Checking imported accelerate version: 1.8.1
Checking imported torch version: 2.5.1+cu121
Checking imported numpy version: 2.1.2
Checking imported langchain version: 0.3.26
Checking imported packaging version: 24.2
Loading embedding model: sentence-transformers/all-MiniLM-L6-v2...
Embedding model loaded.
Loading ChromaDB from: /content/drive/MyDrive/vector_store/chroma_db_credi_trust...
ChromaDB vector store loaded successfully.

Prompt template defined.

Loading LLM: google/flan-t5-small...


model.safetensors:   0%|          | 0.00/308M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

Device set to use cpu


LLM 'google/flan-t5-small' loaded successfully.

--- Starting Qualitative Evaluation ---

--- Evaluating Question 1: What are common complaints related to credit cards? ---

Retrieving top 5 chunks for query: 'What are common complaints related to credit cards?'...
Retrieved 0 chunks.

Sending formatted prompt to LLM...

Generated Answer: Credit cards are unsecured.

Top Retrieved Sources (for analysis):
--------------------------------------------------

--- Evaluating Question 2: Tell me about issues with personal loans. ---

Retrieving top 5 chunks for query: 'Tell me about issues with personal loans.'...
Retrieved 0 chunks.

Sending formatted prompt to LLM...

Generated Answer: What does CrediTrust do?

Top Retrieved Sources (for analysis):
--------------------------------------------------

--- Evaluating Question 3: Are there any complaints about money transfers being delayed? ---

Retrieving top 5 chunks for query: 'Are there any complaints about money transfers being delayed?'.