# EmoSense: A Hybrid CNN-RAG Framework for Mental Health Analysis

## The CNN-based Facial Emotion Recognition Module

In [None]:
import cv2  
import numpy as np  
from keras.models import load_model  
import json  
from datetime import datetime  
import random

Load the trained model and video file.

In [None]:
video_path = 'test_video.mp4'

# Load exist model  
model = load_model('model_keras.h5')  

# Load video file  
cap = cv2.VideoCapture(video_path)

Obtain properties of the video and perform other preliminary works.

In [None]:
# Get video properties
fps = cap.get(cv2.CAP_PROP_FPS)
frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

# Load Haar Cascade model
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')

# Create list to store results
emotion_results = []
json_filename = f'emotion_results_{datetime.now().strftime("%Y%m%d_%H%M%S")}.json'

# Counter for controlling save frequency
counter = 0
save_interval = 10  # Save every 10 frames

# Playback control
paused = False

Perform the emotion detection process and save the data to a JSON file.

In [None]:
while True:
    if not paused:
        ret, frame = cap.read()
        if not ret:
            print("Video ended")
            break

        # Get current frame number
        current_frame = int(cap.get(cv2.CAP_PROP_POS_FRAMES))
        
        # Calculate video progress
        progress = (current_frame / frame_count) * 100

        # Turn captured frame to grayscale images
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

        # Detect human face
        faces = face_cascade.detectMultiScale(gray, 1.3, 5)

        for (x, y, w, h) in faces:
            # Draw a rectangle around the detected face
            cv2.rectangle(frame, (x, y), (x+w, y+h), (255, 0, 0), 2)

            # Extract the face area and resize it
            face = gray[y:y+h, x:x+w]
            face = cv2.resize(face, (48, 48))

            # Preprocess the image (normalize)
            face = face / 255.0
            face = np.stack((face,)*3, axis=-1)
            face = np.expand_dims(face, axis=0)

            # Predicting emotions
            emotion_prediction = model.predict(face)
            emotion_label = np.argmax(emotion_prediction)
            
            names = ['anger','contempt','disgust','fear','happy','sadness','surprise','neutral']
            current_emotion = names[emotion_label]

            # Add logic to convert disgust to neutral with 90% probability
            if current_emotion == 'disgust' and random.random() <= 0.9:
                current_emotion = 'neutral'

            # Display predicted sentiment on images
            cv2.putText(frame, f'Emotion: {current_emotion}', (x, y-10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.9, (36, 255, 12), 2)

            # Save results every N frames
            if counter % save_interval == 0:
                timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
                frame_data = {
                    "timestamp": timestamp,
                    "frame_number": current_frame,
                    "emotion": current_emotion
                }
                emotion_results.append(frame_data)

        # Display progress bar
        cv2.putText(frame, f'Progress: {progress:.1f}%', (10, 30),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)

        # Display
        cv2.imshow('Emotion Recognition', frame)

        # Update counter
        counter += 1

    # Key controls
    key = cv2.waitKey(1) & 0xFF
    if key == ord('q') or not ret:  # Press 'q' to quit
        break
    elif key == ord(' '):  # Press 'space' to pause/resume
        paused = not paused
    elif key == ord('s'):  # Press 's' to save current frame
        frame_filename = f'frame_{current_frame}.jpg'
        cv2.imwrite(frame_filename, frame)
        print(f"Saved frame to {frame_filename}")

# Release everything
cap.release()
cv2.destroyAllWindows()

# Save all results to JSON file
with open(json_filename, 'w', encoding='utf-8') as f:
    json.dump(emotion_results, f, ensure_ascii=False, indent=4)

print(f"Results saved to {json_filename}")

## The RAG-based Mental Health Report Generation Module

This module utilizes retrieval-augmented generation techniques to improve the generation quality of the LLM using mental health literatures.

In [None]:
import os
import json
from PyPDF2 import PdfReader
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain.schema import Document
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains import RetrievalQA
from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI

Set the API key for the Gemini model.

In [None]:
GOOGLE_API_KEY=""

The following code initializes the RAG system with the provided PDF documents and sets up the RAG pipeline for generating responses based on the emotional patterns detected from the video analysis.

Load and extract mental health literature from PDF files in a given directory.

In [None]:
directory = "docs"
docs = []
for filename in os.listdir(directory):
    if filename.endswith('.pdf'):
        filepath = os.path.join(directory, filename)
        try:
            reader = PdfReader(filepath)
            text = ""
            for page in reader.pages:
                text += page.extract_text() or ""
            docs.append(text)
        except Exception as e:
            print(f"Error reading {filename}: {e}")

Process the documents.

In [None]:
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
split_docs = text_splitter.split_documents([Document(page_content=doc) for doc in docs])

Initialize the embedding model.

In [None]:
embeddings = GoogleGenerativeAIEmbeddings(model="models/text-embedding-004", google_api_key=GOOGLE_API_KEY)

Create a FAISS vector store from the documents and their embeddings

In [None]:
vector_store = FAISS.from_documents(split_docs, embeddings)

Initialize the LLM for the RAG pipeline.

In [None]:
llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash", temperature=0.6, google_api_key=GOOGLE_API_KEY)

Establish the RAG pipeline.

In [None]:
prompt_template = """
Instructions:
You are an expert mental health support AI assistant tasked with analyzing emotional patterns and providing personalized guidance. Your role is to:
1. Analyze the user's emotional history from facial recognition data
2. Cross-reference this information with mental health resources
3. Generate a comprehensive yet sensitive support response

Context Documents:
{context}

Emotional Pattern Summary:
{question}

Please provide a response that includes:
1. Pattern Analysis:
   - Identify predominant emotions and significant emotional shifts
   - Note any recurring patterns or triggers
   - Highlight unusual or concerning emotional states

2. Professional Insights:
   - Draw relevant connections to mental health literature from the provided documents
   - Explain potential implications of observed emotional patterns
   - Identify any patterns that may warrant professional attention

3. Personalized Recommendations:
   - Suggest evidence-based coping strategies
   - Provide actionable steps for emotional regulation
   - Recommend relevant self-care practices

Important Guidelines:
- Maintain a professional yet empathetic tone
- Focus on objective observations rather than diagnoses
- You can cite specific sources from provided documents when making recommendations if you think they are relevant
- Try to make the response short and concise while covering all relevant aspects
- Focus on the most significant patterns identified in the summary
- Provide practical, personalized recommendations
- Some transient emotional states may be a missed detection if they disappear quickly and contradict the overall pattern

Note: If the retrieved documents don't directly address the observed patterns, provide general evidence-based guidance while clearly indicating this limitation.
    """

prompt = ChatPromptTemplate.from_template(prompt_template)

qa_chain = RetrievalQA.from_chain_type(
    llm=llm, 
    chain_type="stuff", 
    retriever=vector_store.as_retriever(search_kwargs={"top_k": 5}), 
    chain_type_kwargs={"prompt": prompt}
)

Load the detected emotion history.

In [None]:
target_file = ''
with open(f'{target_file}.json', 'r') as file:
    query = json.load(file)

Perform initial emotion summarization by sending the emotion detection history directly to a lightweight LLM.

In [None]:
# Ensure query is a string
if isinstance(query, list):
    # convert the list of dictionaries to a string by joining the 'emotion' values
    query = ' '.join( '[timestamp: ' + item['timestamp'] + ', emotion: ' + item['emotion'] + '].\n' for item in query if 'emotion' in item)

prompt = f'''

Please read the following emotion detection history and provide a summary that includes:

1. Primary Emotional States:
   - Most frequent emotions
   - Duration and intensity patterns
   - Significant emotional transitions

2. Temporal Patterns:
   - Daily/weekly cycles if present
   - Triggering patterns or sequences
   - Unusual or outlier emotional states

3. Key Statistics:
   - Distribution of different emotions
   - Average duration of emotional states
   - Frequency of emotional changes

Note that you should avoid making diagnoses or providing specific advice. If some emtions are only transient and disappear quickly, you can simply note that they may be missed detections.

Raw Emotion Detection Data:
{query}
'''
llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash-8b", temperature=0.7, google_api_key=GOOGLE_API_KEY)

initial_emotion_summarization = llm.invoke(prompt).content
print(initial_emotion_summarization)

Print out the retrieved documents if needed.

In [None]:
retrieved_docs = vector_store.as_retriever().invoke(initial_emotion_summarization)

Obtain the final mental health report based on the initial emotion summarization result.

In [None]:
answer = qa_chain.invoke(initial_emotion_summarization)
print(answer['result'])