This system addresses user queries by considering both the semantic meaning and emotional tone of the input.  When a user expresses a complaint or issue, such as "I'm really angry about this issue!", the system first analyzes the query in two key ways:

1. **Semantic Understanding:**  An embedding is generated to capture the core meaning of the complaint or issue. This embedding represents the user's topic of concern in a vector space, allowing for comparisons with other similar concepts.

2. **Emotion Detection:** The system analyzes the query's emotional content, specifically identifying strong emotions like anger or frustration.  This detection process assigns emotion scores, quantifying the intensity of the expressed feelings.

Next, the system searches a document collection.  It prioritizes documents that satisfy *both* the semantic and emotional criteria.  This ensures that returned documents are not only relevant to the topic of the query but also reflect the user's emotional state.

Finally, the system ranks the matching documents based on a combined similarity score that reflects both semantic and emotional relevance.  This approach aims to provide the most helpful and empathetic responses to users by considering the full context of their query.


In [None]:
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
from sentence_transformers import SentenceTransformer
from typing import List, Dict, Tuple
import torch
import numpy as np
from dataclasses import dataclass

@dataclass
class EmotionScore:
    emotion: str
    score: float

class EmotionAwareRAG:
    def __init__(self, emotion_model_name: str = "j-hartmann/emotion-english-distilroberta-base",
                 embedding_model_name: str = "sentence-transformers/all-MiniLM-L6-v2"):
        # Initialize emotion detection model
        self.emotion_classifier = pipeline("text-classification",
                                        model=emotion_model_name,
                                        return_all_scores=True)

        # Initialize embedding model for document retrieval
        self.embedding_model = SentenceTransformer(embedding_model_name)

        # Initialize document store
        self.documents: List[Dict] = []
        self.document_embeddings = None

    def detect_emotion(self, text: str) -> List[EmotionScore]:
        """Detect emotions in the input text."""
        emotions = self.emotion_classifier(text)[0]
        return [EmotionScore(e['label'], e['score']) for e in emotions]

    def add_documents(self, documents: List[Dict[str, str]], emotion_aware: bool = True):
        """
        Add documents to the retrieval system with emotion-aware embeddings.

        Args:
            documents: List of dictionaries containing 'text' and optional 'metadata'
            emotion_aware: Whether to include emotion information in document embeddings
        """
        self.documents = documents

        # Get base embeddings for all documents
        base_embeddings = self.embedding_model.encode([doc['text'] for doc in documents])

        if emotion_aware:
            # Get emotion scores for all documents
            emotion_features = []
            for doc in documents:
                emotions = self.detect_emotion(doc['text'])
                emotion_vector = [e.score for e in emotions]
                emotion_features.append(emotion_vector)

            # Combine base embeddings with emotion features
            emotion_features = np.array(emotion_features)
            self.document_embeddings = np.concatenate([base_embeddings, emotion_features], axis=1)
        else:
            self.document_embeddings = base_embeddings

    def retrieve(self, query: str, k: int = 3) -> List[Tuple[Dict, float]]:
        """
        Retrieve relevant documents considering both content and emotional similarity.

        Args:
            query: User query
            k: Number of documents to retrieve

        Returns:
            List of (document, similarity_score) tuples
        """
        # Get query embedding and emotions
        query_embedding = self.embedding_model.encode([query])[0]
        query_emotions = self.detect_emotion(query)
        query_emotion_vector = np.array([e.score for e in query_emotions])

        # Combine query features
        query_vector = np.concatenate([query_embedding, query_emotion_vector])

        # Calculate similarities
        similarities = np.dot(self.document_embeddings, query_vector) / (
            np.linalg.norm(self.document_embeddings, axis=1) * np.linalg.norm(query_vector)
        )

        # Get top-k documents
        top_indices = np.argsort(similarities)[-k:][::-1]

        return [(self.documents[idx], similarities[idx]) for idx in top_indices]

# Example usage
if __name__ == "__main__":
    # Initialize the system
    rag = EmotionAwareRAG()

    # Sample documents with different emotional content
    documents = [
        {"text": "I'm really excited about this new feature! It's going to be amazing!",
         "metadata": {"type": "positive"}},
        {"text": "Unfortunately, we encountered a critical error in the system.",
         "metadata": {"type": "negative"}},
        {"text": "Here are the step-by-step instructions to complete the task.",
         "metadata": {"type": "neutral"}}
    ]

    # Add documents to the system
    rag.add_documents(documents)

    # Example queries with different emotional content
    queries = [
        "I'm frustrated with this error message",
        "Can you help me solve this problem?",
        "This is awesome! Show me more!"
    ]

    # Retrieve relevant documents for each query
    for query in queries:
        print(f"\nQuery: {query}")
        print("Detected emotions:", rag.detect_emotion(query))
        results = rag.retrieve(query)
        print("Retrieved documents:")
        for doc, score in results:
            print(f"- Score: {score:.3f}, Text: {doc['text']}")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/1.00k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/329M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/294 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

Device set to use cpu


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]


Query: I'm frustrated with this error message
Detected emotions: [EmotionScore(emotion='anger', score=0.9645368456840515), EmotionScore(emotion='disgust', score=0.002973639639094472), EmotionScore(emotion='fear', score=0.002221904695034027), EmotionScore(emotion='joy', score=0.0005954282241873443), EmotionScore(emotion='neutral', score=0.006374949589371681), EmotionScore(emotion='sadness', score=0.020156873390078545), EmotionScore(emotion='surprise', score=0.003140360116958618)]
Retrieved documents:
- Score: 0.314, Text: Unfortunately, we encountered a critical error in the system.
- Score: 0.091, Text: Here are the step-by-step instructions to complete the task.
- Score: -0.041, Text: I'm really excited about this new feature! It's going to be amazing!

Query: Can you help me solve this problem?
Detected emotions: [EmotionScore(emotion='anger', score=0.10606434941291809), EmotionScore(emotion='disgust', score=0.04907247796654701), EmotionScore(emotion='fear', score=0.1843852251768112