In [8]:
!pip install sentence_transformers
!pip install faiss-cpu

Defaulting to user installation because normal site-packages is not writeable
Defaulting to user installation because normal site-packages is not writeable
Collecting faiss-cpu
  Downloading faiss_cpu-1.10.0-cp310-cp310-manylinux_2_28_x86_64.whl (30.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m30.7/30.7 MB[0m [31m38.9 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Installing collected packages: faiss-cpu
Successfully installed faiss-cpu-1.10.0


In [11]:
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import evaluate
import numpy as np
import torch
from tqdm import tqdm
import pandas as pd
from openai import OpenAI
from sentence_transformers import SentenceTransformer
import faiss
import os
from typing import List, Dict, Tuple
import json

# Set your OpenAI API key
os.environ["OPENAI_API_KEY"] = "sk-****"  # Replace with your actual API key
client = OpenAI()

# Load your fine-tuned model
finetuned_model_path = "./flan-t5-empathy-final"
finetuned_tokenizer = AutoTokenizer.from_pretrained(finetuned_model_path)
finetuned_model = AutoModelForSeq2SeqLM.from_pretrained(finetuned_model_path)

# Set device
device = "cuda" if torch.cuda.is_available() else "cpu"
finetuned_model = finetuned_model.to(device)

# Load your knowledge base data - this would be your empathy dataset or any related corpus
# Here I'm assuming we'll use the training data as our knowledge base
data_files = {
    "train": "empathy_new/empathy_train_new.csv",
    "test": "empathy_new/empathy_test_new.csv"
}
dataset = load_dataset("csv", data_files=data_files)

# Class for RAG implementation
class RAGSystem:
    def __init__(self, knowledge_base: List[Dict], embedding_model: str = "all-MiniLM-L6-v2"):
        """
        Initialize the RAG system with a knowledge base and embedding model.
        
        Args:
            knowledge_base: List of dictionaries with input_text and target_text
            embedding_model: Name of the SentenceTransformer model to use
        """
        self.knowledge_base = knowledge_base
        self.embedding_model = SentenceTransformer(embedding_model)
        
        # Create FAISS index for fast similarity search
        self._create_index()
        
    def _create_index(self):
        """Create a FAISS index from the knowledge base"""
        print("Creating FAISS index for RAG...")
        # Extract texts for embedding
        texts = [item["input_text"] for item in self.knowledge_base]
        
        # Create embeddings
        embeddings = self.embedding_model.encode(texts, show_progress_bar=True)
        
        # Normalize embeddings for cosine similarity
        normalized_embeddings = embeddings / np.linalg.norm(embeddings, axis=1, keepdims=True)
        
        # Build FAISS index
        self.dimension = embeddings.shape[1]
        self.index = faiss.IndexFlatIP(self.dimension)  # Inner product for cosine similarity
        self.index.add(normalized_embeddings.astype(np.float32))
        
    def retrieve(self, query: str, top_k: int = 3) -> List[Dict]:
        """
        Retrieve the top_k most relevant documents for the query.
        
        Args:
            query: The user query
            top_k: Number of documents to retrieve
            
        Returns:
            List of retrieved documents with scores
        """
        # Embed the query
        query_embedding = self.embedding_model.encode([query])[0]
        
        # Normalize for cosine similarity
        query_embedding = query_embedding / np.linalg.norm(query_embedding)
        
        # Search the index
        scores, indices = self.index.search(
            np.array([query_embedding]).astype(np.float32), 
            k=top_k
        )
        
        # Return the retrieved documents with scores
        retrieved_docs = []
        for score, idx in zip(scores[0], indices[0]):
            retrieved_docs.append({
                "score": float(score),
                "input_text": self.knowledge_base[idx]["input_text"],
                "target_text": self.knowledge_base[idx]["target_text"]
            })
            
        return retrieved_docs

# Function to get Flan-T5 response
def get_flan_t5_response(model, tokenizer, query, device):
    """Generate response from Flan-T5 model"""
    inputs = tokenizer(query, return_tensors="pt", truncation=True, max_length=512).to(device)
    
    with torch.no_grad():
        output_ids = model.generate(
            input_ids=inputs["input_ids"],
            attention_mask=inputs["attention_mask"],
            max_length=128,
            num_beams=4,
            early_stopping=True
        )
    
    prediction = tokenizer.decode(output_ids[0], skip_special_tokens=True)
    return prediction

# Function to get GPT-4o response
def get_gpt4o_response(prompt):
    """Get response from GPT-4o using the OpenAI API"""
    try:
        response = client.chat.completions.create(
            model="gpt-4o",
            messages=[{"role": "user", "content": prompt}],
            temperature=0.7,
            max_tokens=1000
        )
        return response.choices[0].message.content
    except Exception as e:
        print(f"Error calling GPT-4o API: {e}")
        return f"Error: {str(e)}"

# Function to create prompt for GPT-4o using both Flan-T5 and RAG outputs
def create_gpt4o_prompt(query, flan_t5_response, rag_documents):
    """Create a prompt for GPT-4o using both Flan-T5 and RAG results"""
    
    prompt = f"""
You are assisting with generating an empathetic response to the following query:

QUERY: {query}

I have two sources of information to help craft the response:

1. OUTPUT FROM FINE-TUNED EMPATHY MODEL:
{flan_t5_response}

2. SIMILAR EXAMPLES FROM DATABASE:
"""
    
    for i, doc in enumerate(rag_documents, 1):
        prompt += f"""
Example {i} (Similarity Score: {doc['score']:.2f}):
- Situation: {doc['input_text']}
- Empathetic Response: {doc['target_text']}
"""
    
    prompt += """
Please synthesize these sources to create a highly empathetic response that addresses the query directly.
Your response should:
1. Show understanding of the emotions and situation
2. Validate their feelings
3. Offer appropriate support or perspective
4. Be natural and conversational in tone

EMPATHETIC RESPONSE:
"""
    
    return prompt

# Prepare the knowledge base for RAG
knowledge_base = [
    {
        "input_text": "At my sister's wedding, I wanted to say how proud I was of her.",
        "target_text": "You looked absolutely stunning today. I’m so proud of you—it means the world to see you this happy."
    },
    {
        "input_text": "When I'm in a loud cafeteria and need to ask for help finding a seat.",
        "target_text": "Excuse me, can you help me find a spot to sit? It’s a bit too loud for me to navigate easily."
    },
    {
        "input_text": "During therapy, I want to talk about how I felt last weekend when no one included me.",
        "target_text": "I felt left out last weekend when nobody reached out. It hurt more than I expected."
    },
    {
        "input_text": "I'm excited to share my science fair project with my classmates.",
        "target_text": "Hey everyone! I’ve been working hard on this—can’t wait to show you my volcano experiment!"
    },
    {
        "input_text": "When someone misunderstands my speech device output and thinks I’m being rude.",
        "target_text": "I’m sorry if that came off the wrong way—sometimes my device doesn’t get my tone right, but I promise I meant it kindly."
    },
    {
        "input_text": "In a casual conversation when I want to joke with a friend about our shared obsession with pizza.",
        "target_text": "If loving pizza was a crime, we’d be serving life sentences together!"
    },
{
        "input_text": "I want to tell my friend I missed them at school today.",
        "target_text": "School wasn’t the same without you today. I missed your jokes and your energy!"
    },
    {
        "input_text": "I'm nervous about presenting in front of the class.",
        "target_text": "I’m a little nervous, but I’ve prepared a lot. I’m ready to give it my best shot!"
    },
    {
        "input_text": "When someone new joins our group and I want to welcome them.",
        "target_text": "Hey! I’m really glad you’re here. Can’t wait to get to know you better!"
    },
    {
        "input_text": "When I'm in pain but can't express it quickly.",
        "target_text": "I’m hurting right now. Please give me a moment and help me feel more comfortable."
    },
    {
        "input_text": "I want to cheer someone up after they failed a test.",
        "target_text": "One bad test doesn’t define you. You’ve got this, and I believe in you!"
    },
    {
        "input_text": "During dinner, I want to tell my family how my day went.",
        "target_text": "Today was a mix of fun and stress, but I learned something new and made it through!"
    },
    {
        "input_text": "When my device is glitching and I need extra time.",
        "target_text": "Sorry, my speech device is acting up. Can you give me a second to fix it?"
    },
    {
        "input_text": "At the doctor's office, when they ask how I've been feeling.",
        "target_text": "I’ve been feeling a bit off lately. Some days are harder than others."
    },
    {
        "input_text": "I want to express joy after winning a game.",
        "target_text": "Yes! That was awesome. I feel like a champion right now!"
    },
    {
        "input_text": "When a friend shares something personal and I want to respond with empathy.",
        "target_text": "Thank you for sharing that. I’m here for you no matter what."
    },
    {
        "input_text": "When I'm overstimulated in a busy environment and need a break.",
        "target_text": "It’s too much for me right now. I need a quiet space to recharge."
    },
    {
        "input_text": "I want to tell someone that I like them in a playful way.",
        "target_text": "So… I kind of think you’re really cool. Just putting that out there!"
    },
    {
        "input_text": "When someone says something kind to me and I want to respond meaningfully.",
        "target_text": "That really means a lot to me. Thank you for being so thoughtful."
    },
    {
        "input_text": "I want to explain why I use a speech device.",
        "target_text": "I use this device to help me express myself. It’s my voice, just delivered differently."
    },
    {
        "input_text": "When someone interrupts me before I finish my sentence.",
        "target_text": "Please wait a second—I’m not done talking yet. I want to share my full thought."
    },
    {
        "input_text": "When I feel left out of a group activity.",
        "target_text": "I noticed I wasn’t included, and it made me feel invisible. I want to be part of things too."
    },
    {
        "input_text": "When I want to start a conversation at lunch.",
        "target_text": "What’s the best thing you’ve eaten this week? Let’s trade snack stories!"
    },
    {
        "input_text": "I want to talk about my favorite hobby—drawing.",
        "target_text": "I love drawing! It helps me show how I feel when words don’t quite fit."
    },
    {
        "input_text": "When I need to say 'no' without sounding upset.",
        "target_text": "No, thank you. That doesn’t work for me right now, but I appreciate you asking."
    },
    {
        "input_text": "I want to express gratitude to my teacher for supporting me.",
        "target_text": "Thank you for always encouraging me. You help me grow more than you know."
    },
{
        "input_text": "When I want to say I'm tired without sounding negative.",
        "target_text": "I’m feeling low on energy right now. I think I just need a little rest."
    },
    {
        "input_text": "When I want to join a group conversation but don’t know how to start.",
        "target_text": "Hey, can I join in? I’d love to be part of what you’re talking about."
    },
    {
        "input_text": "When someone makes fun of how I talk or use my device.",
        "target_text": "This is how I communicate. It deserves just as much respect as any voice."
    },
    {
        "input_text": "When I’m happy and want to share the moment with someone.",
        "target_text": "This just made my whole day! I wish I could bottle this feeling forever."
    },
    {
        "input_text": "When I want to ask someone for help opening something.",
        "target_text": "Can you help me open this, please? It’s being a bit stubborn today."
    },
    {
        "input_text": "When I want to say I'm okay, even if I don’t look it.",
        "target_text": "I know I might seem off, but I’m actually okay right now—just processing quietly."
    },
    {
        "input_text": "When I want to share how music helps me regulate emotions.",
        "target_text": "Music helps me feel calmer when things get overwhelming. It’s like a reset button."
    },
    {
        "input_text": "When I want to clarify something I said earlier.",
        "target_text": "I think what I said might’ve been confusing—let me explain it a little better."
    },
    {
        "input_text": "When I need time to think before responding.",
        "target_text": "Give me a second—I’m still thinking it through. I’ll respond in a moment."
    },
    {
        "input_text": "When someone is being overly helpful and I want space.",
        "target_text": "I appreciate you helping, but I’d like to try this myself first."
    },
    {
        "input_text": "When I want to tell a joke to lighten the mood.",
        "target_text": "Why don’t skeletons fight each other? They don’t have the guts!"
    },
    {
        "input_text": "When I want to say I’m proud of myself after a hard task.",
        "target_text": "That was tough, but I did it. I’m proud of myself for sticking with it!"
    },
    {
        "input_text": "When I want to say 'I love you' in a meaningful way.",
        "target_text": "You make me feel safe and seen—I love you with my whole heart."
    },
    {
        "input_text": "When I need to leave early from an event because I'm overwhelmed.",
        "target_text": "This has been fun, but I need to step away now. I’m starting to feel overwhelmed."
    },
    {
        "input_text": "When I want to request a break during a task.",
        "target_text": "Can we take a quick break? I’ll be able to focus better after that."
    },
    {
        "input_text": "When someone is talking too fast and I can’t follow.",
        "target_text": "Could you please slow down a bit? I want to make sure I understand you."
    },
    {
        "input_text": "When I want to express interest in someone’s story.",
        "target_text": "That sounds really interesting! Tell me more—I’m all ears."
    },
    {
        "input_text": "When I want to say I don’t know the answer but want to try.",
        "target_text": "I’m not sure right now, but I want to give it a try!"
    },
    {
        "input_text": "When I want to explain that I need visual support.",
        "target_text": "It helps me a lot when things are shown visually—can you write or draw it out?"
    },
    {
        "input_text": "When someone praises me and I want to receive it gracefully.",
        "target_text": "Thank you! I’ve been working really hard on that—I’m glad it shows."
    }
]

for item in dataset["train"]:
    knowledge_base.append({
        "input_text": str(item["input_text"]) if item["input_text"] is not None else "",
        "target_text": str(item["target_text"]) if item["target_text"] is not None else ""
    })

# Initialize the RAG system
rag_system = RAGSystem(knowledge_base)

# Main function to process a query through the entire pipeline
def process_query(query):
    """Process a query through Flan-T5, RAG, and GPT-4o"""
    results = {}
    
    # Step 1: Get response from fine-tuned Flan-T5 model
    print("Getting response from Flan-T5...")
    flan_t5_response = get_flan_t5_response(finetuned_model, finetuned_tokenizer, query, device)
    results["flan_t5_response"] = flan_t5_response
    
    # Step 2: Get relevant documents from RAG
    print("Retrieving relevant documents from RAG...")
    rag_documents = rag_system.retrieve(query, top_k=3)
    results["rag_documents"] = rag_documents
    
    # Step 3: Create prompt for GPT-4o
    print("Creating prompt for GPT-4o...")
    gpt4o_prompt = create_gpt4o_prompt(query, flan_t5_response, rag_documents)
    results["gpt4o_prompt"] = gpt4o_prompt
    
    # Step 4: Get response from GPT-4o
    print("Getting response from GPT-4o...")
    gpt4o_response = get_gpt4o_response(gpt4o_prompt)
    results["gpt4o_response"] = gpt4o_response
    
    return results

# Function to evaluate on test set
def evaluate_pipeline(test_dataset, num_samples=None):
    """Evaluate the pipeline on the test dataset"""
    # Get test queries
    test_queries = [str(x) if x is not None else "" for x in test_dataset["test"]["input_text"]]
    reference_responses = [str(x) if x is not None else "" for x in test_dataset["test"]["target_text"]]
    
    # Limit number of samples if specified
    if num_samples:
        test_queries = test_queries[:num_samples]
        reference_responses = reference_responses[:num_samples]
    
    results = []
    for i, query in enumerate(tqdm(test_queries, desc="Evaluating pipeline")):
        print(f"\nProcessing test query {i+1}/{len(test_queries)}")
        result = process_query(query)
        
        # Add reference response
        result["reference_response"] = reference_responses[i]
        result["query"] = query
        
        results.append(result)
        
        # Save results incrementally
        with open(f"pipeline_results_{i+1}.json", "w") as f:
            json.dump(result, f, indent=2)
    
    # Save all results
    with open("pipeline_results_all.json", "w") as f:
        json.dump(results, f, indent=2)
    
    return results

# Example usage
if __name__ == "__main__":
    print("\n===== Example Single Query Processing =====")
    # Process a single example
    example_query = "I just found out my dog has cancer and I don't know how I'm going to cope with this news."
    results = process_query(example_query)
    
    print("\n----- Flan-T5 Response -----")
    print(results["flan_t5_response"])
    
    print("\n----- RAG Retrieved Documents -----")
    for i, doc in enumerate(results["rag_documents"], 1):
        print(f"Document {i} (Score: {doc['score']:.2f}):")
        print(f"Input: {doc['input_text']}")
        print(f"Target: {doc['target_text']}")
        print()
    
    print("\n----- GPT-4o Response -----")
    print(results["gpt4o_response"])
    
    # Uncomment to evaluate on test set (be mindful of API costs!)
    # print("\n===== Evaluating on Test Set =====")
    # evaluate_pipeline(dataset, num_samples=5)  # Limit to 5 samples to avoid high API costs

Creating FAISS index for RAG...


Batches: 100%|█████████████████████████████████████████████████████████████████████| 2317/2317 [00:20<00:00, 110.44it/s]



===== Example Single Query Processing =====
Getting response from Flan-T5...
Retrieving relevant documents from RAG...
Creating prompt for GPT-4o...
Getting response from GPT-4o...
Error calling GPT-4o API: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}

----- Flan-T5 Response -----
I'm sorry to hear that. I'm sorry to hear that. I'm sorry to hear that.

----- RAG Retrieved Documents -----
Document 1 (Score: 0.77):
Input: ASSISTANT: I found out my dog has cancer today
Target: Oh no! Is it serious?

Document 2 (Score: 0.74):
Input: ASSISTANT: My dog has cancer.
Target: I am sorry to hear that.  Unfortunately a lot of dog breeds have a high risk of cancer.  Is it treatable?

Document 3 (Score: 0.67):
Input: ASSISTANT: My dog ha

In [25]:
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import evaluate
import numpy as np
import torch
from tqdm import tqdm
from sentence_transformers import SentenceTransformer
import faiss
import random
import os

# Set random seed for reproducibility
random.seed(42)
np.random.seed(42)
torch.manual_seed(42)

# Load your test dataset
data_files = {"test": "empathy_new/empathy_test_new.csv"}
test_dataset = load_dataset("csv", data_files=data_files)

# Take only 30% of the test dataset
test_data = test_dataset["test"]
dataset_size = len(test_data)
subset_size = int(dataset_size * 0.3)  # 30% of the dataset
indices = np.random.choice(dataset_size, subset_size, replace=False)

# Create a subset of the test dataset
subset_input_texts = [str(test_data["input_text"][i]) if test_data["input_text"][i] is not None else "" for i in indices]
subset_reference_texts = [str(test_data["target_text"][i]) if test_data["target_text"][i] is not None else "" for i in indices]

print(f"Using {subset_size} samples out of {dataset_size} total samples (30%)")

# Load metrics
bleu_metric = evaluate.load("bleu")
meteor_metric = evaluate.load("meteor")
rouge_metric = evaluate.load("rouge")

# Load your fine-tuned model
finetuned_model_path = "./flan-t5-empathy-final"
finetuned_tokenizer = AutoTokenizer.from_pretrained(finetuned_model_path)
finetuned_model = AutoModelForSeq2SeqLM.from_pretrained(finetuned_model_path)

# Set device
device = "cuda" if torch.cuda.is_available() else "cpu"
finetuned_model = finetuned_model.to(device)

# Load sentence transformer for embeddings
sentence_model = SentenceTransformer('all-MiniLM-L6-v2')
sentence_model = sentence_model.to(device)

# Define the provided knowledge base
knowledge_base = [
    {
        "input_text": "At my sister's wedding, I wanted to say how proud I was of her.",
        "target_text": "You looked absolutely stunning today. I'm so proud of you—it means the world to see you this happy."
    },
    {
        "input_text": "When I'm in a loud cafeteria and need to ask for help finding a seat.",
        "target_text": "Excuse me, can you help me find a spot to sit? It's a bit too loud for me to navigate easily."
    },
    {
        "input_text": "During therapy, I want to talk about how I felt last weekend when no one included me.",
        "target_text": "I felt left out last weekend when nobody reached out. It hurt more than I expected."
    },
    {
        "input_text": "I'm excited to share my science fair project with my classmates.",
        "target_text": "Hey everyone! I've been working hard on this—can't wait to show you my volcano experiment!"
    },
    {
        "input_text": "When someone misunderstands my speech device output and thinks I'm being rude.",
        "target_text": "I'm sorry if that came off the wrong way—sometimes my device doesn't get my tone right, but I promise I meant it kindly."
    },
    {
        "input_text": "In a casual conversation when I want to joke with a friend about our shared obsession with pizza.",
        "target_text": "If loving pizza was a crime, we'd be serving life sentences together!"
    },
    {
        "input_text": "I want to tell my friend I missed them at school today.",
        "target_text": "School wasn't the same without you today. I missed your jokes and your energy!"
    },
    {
        "input_text": "I'm nervous about presenting in front of the class.",
        "target_text": "I'm a little nervous, but I've prepared a lot. I'm ready to give it my best shot!"
    },
    {
        "input_text": "When someone new joins our group and I want to welcome them.",
        "target_text": "Hey! I'm really glad you're here. Can't wait to get to know you better!"
    },
    {
        "input_text": "When I'm in pain but can't express it quickly.",
        "target_text": "I'm hurting right now. Please give me a moment and help me feel more comfortable."
    },
  {
        "input_text": "At my sister's wedding, I wanted to say how proud I was of her.",
        "target_text": "You looked absolutely stunning today. I'm so proud of you—it means the world to see you this happy."
    },
    {
        "input_text": "When I'm in a loud cafeteria and need to ask for help finding a seat.",
        "target_text": "Excuse me, can you help me find a spot to sit? It's a bit too loud for me to navigate easily."
    },
    {
        "input_text": "During therapy, I want to talk about how I felt last weekend when no one included me.",
        "target_text": "I felt left out last weekend when nobody reached out. It hurt more than I expected."
    },
    {
        "input_text": "I'm excited to share my science fair project with my classmates.",
        "target_text": "Hey everyone! I've been working hard on this—can't wait to show you my volcano experiment!"
    },
    {
        "input_text": "When someone misunderstands my speech device output and thinks I'm being rude.",
        "target_text": "I'm sorry if that came off the wrong way—sometimes my device doesn't get my tone right, but I promise I meant it kindly."
    },
    {
        "input_text": "In a casual conversation when I want to joke with a friend about our shared obsession with pizza.",
        "target_text": "If loving pizza was a crime, we'd be serving life sentences together!"
    }, {
        "input_text": "I want to tell my friend I missed them at school today.",
        "target_text": "School wasn't the same without you today. I missed your jokes and your energy!"
    },
    {
        "input_text": "I'm nervous about presenting in front of the class.",
        "target_text": "I'm a little nervous, but I've prepared a lot. I'm ready to give it my best shot!"
    },
    {
        "input_text": "When someone new joins our group and I want to welcome them.",
        "target_text": "Hey! I'm really glad you're here. Can't wait to get to know you better!"
    },
    {
        "input_text": "When I'm in pain but can't express it quickly.",
        "target_text": "I'm hurting right now. Please give me a moment and help me feel more comfortable."
    },
    {
        "input_text": "I want to cheer someone up after they failed a test.",
        "target_text": "One bad test doesn't define you. You've got this, and I believe in you!"
    },
    {
        "input_text": "During dinner, I want to tell my family how my day went.",
        "target_text": "Today was a mix of fun and stress, but I learned something new and made it through!"
    },
    {
        "input_text": "When my device is glitching and I need extra time.",
        "target_text": "Sorry, my speech device is acting up. Can you give me a second to fix it?"
    },
    {
        "input_text": "At the doctor's office, when they ask how I've been feeling.",
        "target_text": "I've been feeling a bit off lately. Some days are harder than others."
    },
    {
        "input_text": "I want to express joy after winning a game.",
        "target_text": "Yes! That was awesome. I feel like a champion right now!"
    },
    {
        "input_text": "When a friend shares something personal and I want to respond with empathy.",
        "target_text": "Thank you for sharing that. I'm here for you no matter what."
    },
    {
        "input_text": "When I'm overstimulated in a busy environment and need a break.",
        "target_text": "It's too much for me right now. I need a quiet space to recharge."
    },
    {
        "input_text": "I want to tell someone that I like them in a playful way.",
        "target_text": "So… I kind of think you're really cool. Just putting that out there!"
    },
    {
        "input_text": "When someone says something kind to me and I want to respond meaningfully.",
        "target_text": "That really means a lot to me. Thank you for being so thoughtful."
    },
    {
        "input_text": "I want to explain why I use a speech device.",
        "target_text": "I use this device to help me express myself. It's my voice, just delivered differently."
    },
    {
        "input_text": "When someone interrupts me before I finish my sentence.",
        "target_text": "Please wait a second—I'm not done talking yet. I want to share my full thought."
    },
    {
        "input_text": "When I feel left out of a group activity.",
        "target_text": "I noticed I wasn't included, and it made me feel invisible. I want to be part of things too."
    },{
        "input_text": "When I want to start a conversation at lunch.",
        "target_text": "What's the best thing you've eaten this week? Let's trade snack stories!"
    },
    {
        "input_text": "I want to talk about my favorite hobby—drawing.",
        "target_text": "I love drawing! It helps me show how I feel when words don't quite fit."
    },
    {
        "input_text": "When I need to say 'no' without sounding upset.",
        "target_text": "No, thank you. That doesn't work for me right now, but I appreciate you asking."
    },
    {
        "input_text": "I want to express gratitude to my teacher for supporting me.",
        "target_text": "Thank you for always encouraging me. You help me grow more than you know."
    },
    {
        "input_text": "When I want to say I'm tired without sounding negative.",
        "target_text": "I'm feeling low on energy right now. I think I just need a little rest."
    },
    {
        "input_text": "When I want to join a group conversation but don't know how to start.",
        "target_text": "Hey, can I join in? I'd love to be part of what you're talking about."
    },
    {
        "input_text": "When someone makes fun of how I talk or use my device.",
        "target_text": "This is how I communicate. It deserves just as much respect as any voice."
    },
    {
        "input_text": "When I'm happy and want to share the moment with someone.",
        "target_text": "This just made my whole day! I wish I could bottle this feeling forever."
    },
    {
        "input_text": "When I want to ask someone for help opening something.",
        "target_text": "Can you help me open this, please? It's being a bit stubborn today."
    },
    {
        "input_text": "When I want to say I'm okay, even if I don't look it.",
        "target_text": "I know I might seem off, but I'm actually okay right now—just processing quietly."
    },
    {
        "input_text": "When I want to share how music helps me regulate emotions.",
        "target_text": "Music helps me feel calmer when things get overwhelming. It's like a reset button."
    },
    {
        "input_text": "When I want to clarify something I said earlier.",
        "target_text": "I think what I said might've been confusing—let me explain it a little better."
    }
]

# Function to build the retrieval index from the knowledge base
def build_retrieval_index_from_kb(kb):
    """Build a FAISS index for retrieval from the knowledge base"""
    texts = [item["input_text"] for item in kb]
    responses = [item["target_text"] for item in kb]
    
    print("Creating embeddings for knowledge base...")
    embeddings = sentence_model.encode(texts, show_progress_bar=True, convert_to_tensor=True)
    embeddings = embeddings.cpu().numpy()
    
    dimension = embeddings.shape[1]
    index = faiss.IndexFlatL2(dimension)
    index.add(embeddings)
    
    return index, texts, responses

# Define retrieval function
def retrieve_similar_examples(query, index, texts, responses, k=3):
    """Retrieve k most similar examples from the index"""
    query_embedding = sentence_model.encode([query], convert_to_tensor=True).cpu().numpy()
    distances, indices = index.search(query_embedding, k)
    
    retrieved_texts = [texts[idx] for idx in indices[0]]
    retrieved_responses = [responses[idx] for idx in indices[0]]
    
    return retrieved_texts, retrieved_responses

# Generate predictions with RAG function
def generate_predictions_with_rag(model, tokenizer, texts, index, kb_texts, kb_responses, device):
    """Generate predictions with RAG"""
    predictions = []
    model.eval()
    
    for text in tqdm(texts):
        # Retrieve similar examples
        retrieved_texts, retrieved_responses = retrieve_similar_examples(text, index, kb_texts, kb_responses)
        
        # Create an augmented prompt
        augmented_prompt = text + "\n\nSimilar situations and responses:\n"
        for i, (rt, rr) in enumerate(zip(retrieved_texts, retrieved_responses)):
            augmented_prompt += f"Situation {i+1}: {rt}\nResponse {i+1}: {rr}\n\n"
        
        augmented_prompt += "Generate an empathetic response to the original situation:"
        
        inputs = tokenizer(augmented_prompt, return_tensors="pt", truncation=True, max_length=768).to(device)
        
        with torch.no_grad():
            output_ids = model.generate(
                input_ids=inputs["input_ids"],
                attention_mask=inputs["attention_mask"],
                max_length=128,
                num_beams=4,
                early_stopping=True
            )
        
        prediction = tokenizer.decode(output_ids[0], skip_special_tokens=True)
        predictions.append(prediction)
    
    return predictions

# Utility function for diversity metrics
def calculate_diversity_metrics(predictions):
    """Calculate lexical diversity metrics"""
    vocab_size = set()
    total_tokens = 0
    
    for pred in predictions:
        tokens = pred.split()
        vocab_size.update(tokens)
        total_tokens += len(tokens)
    
    # TTR (Type-Token Ratio)
    ttr = len(vocab_size) / total_tokens if total_tokens > 0 else 0
    
    return {
        "vocabulary_size": len(vocab_size),
        "type_token_ratio": ttr
    }

# Function to load existing predictions
def load_existing_predictions(file_path, indices=None):
    """Load predictions from a file based on indices"""
    if not os.path.exists(file_path):
        print(f"Warning: File {file_path} not found. Returning empty list.")
        return []
    
    with open(file_path, 'r') as f:
        all_predictions = [line.strip() for line in f if line.strip()]
    
    if indices is not None and len(all_predictions) >= max(indices) + 1:
        return [all_predictions[i] for i in indices]
    else:
        print(f"Warning: Indices out of range or file format not as expected. Using all predictions.")
        return all_predictions[:len(indices)] if indices is not None else all_predictions

# Build retrieval index from the knowledge base
print("Building retrieval index from knowledge base...")
index, kb_texts, kb_responses = build_retrieval_index_from_kb(knowledge_base)

# Generate predictions with the fine-tuned model enhanced with RAG
print("Generating predictions from fine-tuned model with RAG...")
rag_predictions = generate_predictions_with_rag(finetuned_model, finetuned_tokenizer, subset_input_texts, 
                                              index, kb_texts, kb_responses, device)

# Format references for BLEU
references_for_bleu = [[ref] for ref in subset_reference_texts]

# Try to load existing fine-tuned model predictions
# Update the file path to where your fine-tuned predictions are stored
finetuned_predictions_path = "finetuned_predictions.txt"
if os.path.exists(finetuned_predictions_path):
    print(f"Loading existing fine-tuned predictions from {finetuned_predictions_path}")
    finetuned_predictions = load_existing_predictions(finetuned_predictions_path, indices)
    
    # Check if we have enough predictions
    if len(finetuned_predictions) < len(subset_input_texts):
        print(f"Warning: Only found {len(finetuned_predictions)} predictions, but need {len(subset_input_texts)}.")
        print("Make sure your fine-tuned predictions file has one prediction per line.")
        
    # Calculate metrics for fine-tuned model without RAG (if available)
    if len(finetuned_predictions) == len(subset_input_texts):
        finetuned_bleu = bleu_metric.compute(predictions=finetuned_predictions, references=references_for_bleu)
        finetuned_meteor = meteor_metric.compute(predictions=finetuned_predictions, references=subset_reference_texts)
        finetuned_rouge = rouge_metric.compute(predictions=finetuned_predictions, references=subset_reference_texts)
        finetuned_diversity = calculate_diversity_metrics(finetuned_predictions)
        
        print("\nFine-tuned Model Metrics (without RAG):")
        print(f"BLEU Score: {finetuned_bleu['bleu']:.4f}")
        print(f"METEOR Score: {finetuned_meteor['meteor']:.4f}")
        print(f"ROUGE-1 F1: {finetuned_rouge['rouge1'].mid.fmeasure:.4f}")
        print(f"Vocabulary Size: {finetuned_diversity['vocabulary_size']}")
        print(f"Type-Token Ratio: {finetuned_diversity['type_token_ratio']:.4f}")
else:
    print(f"Note: No existing fine-tuned predictions found at {finetuned_predictions_path}")
    print("Will only calculate metrics for RAG-enhanced model")

# Calculate metrics for fine-tuned model with RAG
rag_bleu = bleu_metric.compute(predictions=rag_predictions, references=references_for_bleu)
rag_meteor = meteor_metric.compute(predictions=rag_predictions, references=subset_reference_texts)
rag_rouge = rouge_metric.compute(predictions=rag_predictions, references=subset_reference_texts)
rag_diversity = calculate_diversity_metrics(rag_predictions)

# Print RAG model metrics
print("\nFine-tuned Model with RAG Metrics:")
print(f"BLEU Score: {rag_bleu['bleu']:.4f}")
print(f"METEOR Score: {rag_meteor['meteor']:.4f}")
print(f"ROUGE-1 F1: {rag_rouge['rouge1'].mid.fmeasure:.4f}")
print(f"Vocabulary Size: {rag_diversity['vocabulary_size']}")
print(f"Type-Token Ratio: {rag_diversity['type_token_ratio']:.4f}")

# Save RAG predictions
with open("rag_predictions_30pct_subset.txt", "w") as f:
    for pred in rag_predictions:
        f.write(f"{pred}\n")

# Save comparison samples if fine-tuned predictions are available
if os.path.exists(finetuned_predictions_path) and len(finetuned_predictions) == len(subset_input_texts):
    with open("rag_comparison_30pct_subset.txt", "w") as f:
        f.write(f"EVALUATION ON 30% SUBSET ({subset_size} SAMPLES)\n\n")
        for i in range(len(subset_input_texts)):
            f.write(f"Input: {subset_input_texts[i]}\n")
            f.write(f"Reference: {subset_reference_texts[i]}\n")
            f.write(f"Fine-tuned: {finetuned_predictions[i]}\n")
            f.write(f"RAG: {rag_predictions[i]}\n")
            f.write("="*50 + "\n")
        
    print(f"\nComparison results saved to 'rag_comparison_30pct_subset.txt'")
else:
    with open("rag_samples_30pct_subset.txt", "w") as f:
        f.write(f"RAG EVALUATION ON 30% SUBSET ({subset_size} SAMPLES)\n\n")
        for i in range(len(subset_input_texts)):
            f.write(f"Input: {subset_input_texts[i]}\n")
            f.write(f"Reference: {subset_reference_texts[i]}\n")
            f.write(f"RAG: {rag_predictions[i]}\n")
            f.write("="*50 + "\n")
        
    print(f"\nRAG samples saved to 'rag_samples_30pct_subset.txt'")

print("\nDone!")

Using 3250 samples out of 10834 total samples (30%)


[nltk_data] Downloading package wordnet to /home/nikhil/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt_tab to /home/nikhil/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /home/nikhil/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


Building retrieval index from knowledge base...
Creating embeddings for knowledge base...


Batches: 100%|████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 91.80it/s]


Generating predictions from fine-tuned model with RAG...


100%|███████████████████████████████████████████████████████████████████████████████| 3250/3250 [14:57<00:00,  3.62it/s]


Note: No existing fine-tuned predictions found at finetuned_predictions.txt
Will only calculate metrics for RAG-enhanced model

Fine-tuned Model with RAG Metrics:
BLEU Score: 0.0079
METEOR Score: 0.0788


AttributeError: 'numpy.float64' object has no attribute 'mid'

In [27]:
print(f"ROUGE-1: {rag_rouge['rouge1']:.4f}")
print(f"ROUGE-2: {rag_rouge['rouge2']:.4f}")
print(f"ROUGE-L: {rag_rouge['rougeL']:.4f}")

ROUGE-1: 0.1224
ROUGE-2: 0.0130
ROUGE-L: 0.1141
