# **Transcription with Conversation-Level Sentiment Annotations**
Below, we use the Hume and GPT APIs to generate conversation-level sentiment annotations for a Zoom conversation.

> We design a two-part pipeline to visualize Zoom meetings with conversation-level sentiment annotations. We first introduce novel metrics to capture conversation-level sentiments along three axes: comprehension, consensus, and cordiality. To obtain these metrics, we first identify each speaker's individual expressed sentiments during each of their responses. To determine speaker sentiment, we segment Zoom recordings by speaker and feed the video data, audio file (including information on voice prosity), and transcript (text content) of each segment to an off-the-shelf model that outputs a quantitative measure of the extent to which the speaker expresses 48 emotions. Afterward, for each segment, we combine the speaker's top 5 emotions with weights, uniformly sampled facial expressions, and spoken words in an instruction-tuned prompt to a multimodal large language model in order to determine conversation-level metrics.



# Initialization

In [1]:
# Install libraries
!pip install hume
!pip install hume[stream]
!pip install openai
!pip install python-dotenv
!pip install pydub
!pip install ffmpeg
!pip install moviepy
!pip install webvtt-py
!pip install opencv-python


^C


In [1]:
import os
from dotenv import load_dotenv
from openai import OpenAI
import requests
import base64
from pydub import AudioSegment
from hume import HumeBatchClient
import json
import pandas as pd
import matplotlib.pyplot as plt
# from google.colab import userdata
import webvtt
# from google.colab import userdata
import cv2
from moviepy.editor import VideoFileClip
import subprocess

## Merge three Modalities

## 1. Merge language to sentenses, and average scores for each emotions

In [6]:
import os
import json

def process_json_file(filepath, output_directory):
    # Determine output file path
    output_path = os.path.join(output_directory, os.path.basename(filepath).replace('_lang.json', '_lang_processed.json'))
    
    # Skip processing if the file already exists
    if os.path.exists(output_path):
        # print(f"Skipping existing file: {output_path}")
        return
    
    # Read JSON file
    with open(filepath, 'r') as file:
        data = json.load(file)

    print("processing:" , filepath)
    # Navigate to the predictions in JSON structure
    predictions = data[0]["results"]["predictions"][0]["models"]["language"]["grouped_predictions"][0]["predictions"]

    # Calculate average emotion scores
    emotion_scores = {}
    count_emotions = {}
    for pred in predictions:
        for emotion in pred["emotions"]:
            if emotion["name"] in emotion_scores:
                emotion_scores[emotion["name"]] += emotion["score"]
                count_emotions[emotion["name"]] += 1
            else:
                emotion_scores[emotion["name"]] = emotion["score"]
                count_emotions[emotion["name"]] = 1
    
    # Averaging the scores
    average_emotion_scores = {emotion: score / count_emotions[emotion] for emotion, score in emotion_scores.items()}
    
    # Simplify the output format to just language and emotions
    lang = {"language": average_emotion_scores}
    
    # Save processed data to a new file
    with open(output_path, 'w') as outfile:
        json.dump(lang, outfile, indent=4)
    print(f"Processed and saved: {output_path}")

# Directories
input_directory = "./dataset/outputs/hume"
output_directory = "./dataset/outputs/hume_processed"

# Create the output directory if it does not exist
os.makedirs(output_directory, exist_ok=True)

# List all files with '_lang.json' suffix
json_files = [os.path.join(input_directory, f) for f in os.listdir(input_directory) if f.endswith('_lang.json')]

# Process each JSON file
for file_path in json_files:
    process_json_file(file_path, output_directory)

print("All files processed or skipped if already existing.")


All files processed or skipped if already existing.


## 2. Merge face expressions to sentenses, and average scores for each emotions

In [8]:
import os
import json

def process_face_json_file(filepath, output_directory):
    # Determine output file path
    output_path = os.path.join(output_directory, os.path.basename(filepath).replace('_face.json', '_face_processed.json'))
    
    # Skip processing if the file already exists
    if os.path.exists(output_path):
        # print(f"Skipping existing file: {output_path}")
        return
    
    # Read JSON file
    with open(filepath, 'r') as file:
        data = json.load(file)
    
    # Navigate to the face predictions in JSON structure
    predictions = data[0]["results"]["predictions"]

    # Calculate average emotion scores
    emotion_scores = {}
    count_emotions = {}
    for pred in predictions:
        for model in pred["models"]["face"]["grouped_predictions"]:
            for emotion in model["predictions"][0]["emotions"]:
                if emotion["name"] in emotion_scores:
                    emotion_scores[emotion["name"]] += emotion["score"]
                    count_emotions[emotion["name"]] += 1
                else:
                    emotion_scores[emotion["name"]] = emotion["score"]
                    count_emotions[emotion["name"]] = 1
    
    # Averaging the scores
    average_emotion_scores = {emotion: score / count_emotions[emotion] for emotion, score in emotion_scores.items()}
    
    # Simplify the output format to just average emotion scores for face
    processed_data = {
        "face": average_emotion_scores
    }

    # Save processed data to a new file
    with open(output_path, 'w') as outfile:
        json.dump(processed_data, outfile, indent=4)
    print(f"Processed and saved: {output_path}")

# Directories
input_directory = "./dataset/outputs/hume"
output_directory = "./dataset/outputs/hume_processed"

# Create the output directory if it does not exist
os.makedirs(output_directory, exist_ok=True)

# List all files with '_face.json' suffix
json_files = [os.path.join(input_directory, f) for f in os.listdir(input_directory) if f.endswith('_face.json')]

# Process each JSON file
for file_path in json_files:
    process_face_json_file(file_path, output_directory)

print("All face files processed or skipped if already existing.")


All face files processed or skipped if already existing.


## process prosody

In [9]:
import os
import json

def process_prosody_json_file(filepath, output_directory):
    full_emotions_list = [
        "Admiration", "Adoration", "Aesthetic Appreciation", "Amusement", "Anger", "Annoyance",
        "Anxiety", "Awe", "Awkwardness", "Boredom", "Calmness", "Concentration", "Confusion",
        "Contemplation", "Contempt", "Contentment", "Craving", "Desire", "Determination",
        "Disappointment", "Disapproval", "Disgust", "Distress", "Doubt", "Ecstasy", "Embarrassment",
        "Empathic Pain", "Enthusiasm", "Entrancement", "Envy", "Excitement", "Fear", "Gratitude",
        "Guilt", "Horror", "Interest", "Joy", "Love", "Nostalgia", "Pain", "Pride", "Realization",
        "Relief", "Romance", "Sadness", "Sarcasm", "Satisfaction", "Shame", "Surprise (negative)",
        "Surprise (positive)", "Sympathy", "Tiredness", "Triumph"
    ]

    output_path = os.path.join(output_directory, os.path.basename(filepath).replace('_prosody.json', '_prosody_processed.json'))
    
    if os.path.exists(output_path):
        # print(f"Skipping existing file: {output_path}")
        return
    
    with open(filepath, 'r') as file:
        data = json.load(file)

    if not data[0]["results"]["predictions"] or not data[0]["results"]["predictions"][0]["models"]["prosody"]["grouped_predictions"]:
        # Handle files with no predictions by setting all emotions to zero
        emotion_scores = {emotion: 0 for emotion in full_emotions_list}
    else:
        predictions = data[0]["results"]["predictions"][0]["models"]["prosody"]["grouped_predictions"][0]["predictions"]
        emotion_scores = {}
        count_emotions = {}

        for pred in predictions:
            for emotion in pred["emotions"]:
                if emotion["name"] in emotion_scores:
                    emotion_scores[emotion["name"]] += emotion["score"]
                    count_emotions[emotion["name"]] += 1
                else:
                    emotion_scores[emotion["name"]] = emotion["score"]
                    count_emotions[emotion["name"]] = 1

        emotion_scores = {emotion: score / count_emotions.get(emotion, 1) for emotion, score in emotion_scores.items()}

    processed_data = {
        "prosody": emotion_scores
    }

    with open(output_path, 'w') as outfile:
        json.dump(processed_data, outfile, indent=4)

    print(f"Processed and saved: {output_path}")

# Example usage:
input_directory = "./dataset/outputs/hume"
output_directory = "./dataset/outputs/hume_processed"
os.makedirs(output_directory, exist_ok=True)
json_files = [os.path.join(input_directory, f) for f in os.listdir(input_directory) if f.endswith('_prosody.json')]

for file_path in json_files:
    process_prosody_json_file(file_path, output_directory)

print("All prosody files processed or skipped if already existing.")


Processed and saved: ./dataset/outputs/hume_processed\0_00-00-00.000_00-00-05.672_Chandler_neutral_neutral_prosody_processed.json
Processed and saved: ./dataset/outputs/hume_processed\0_00-00-05.881_00-00-07.383_The-Interviewer_neutral_neutral_prosody_processed.json
Processed and saved: ./dataset/outputs/hume_processed\0_00-00-07.383_00-00-10.330_Chandler_neutral_neutral_prosody_processed.json
Processed and saved: ./dataset/outputs/hume_processed\0_00-00-10.761_00-00-13.513_The-Interviewer_neutral_neutral_prosody_processed.json
Processed and saved: ./dataset/outputs/hume_processed\0_00-00-18.393_00-00-24.858_Chandler_surprise_positive_prosody_processed.json
Processed and saved: ./dataset/outputs/hume_processed\0_00-00-25.067_00-00-28.278_The-Interviewer_neutral_neutral_prosody_processed.json
Processed and saved: ./dataset/outputs/hume_processed\0_00-00-32.741_00-00-35.827_Chandler_neutral_neutral_prosody_processed.json
Processed and saved: ./dataset/outputs/hume_processed\0_00-00-32.74

## Now we have simplified modalities in folder hume_processed
## Then we merge into one file with the information in the file names

In [6]:
import os
import json

def merge_json_files(input_directory, output_directory):
    # Create the output directory if it doesn't exist
    os.makedirs(output_directory, exist_ok=True)
    
    # Collect filenames without extension and modality suffix
    base_files = set(filename.rsplit("_", 2)[0] for filename in os.listdir(input_directory) if "_processed.json" in filename)

    for base_filename in base_files:
        # Check if all necessary files are present
        required_files = [
            f"{base_filename}_face_processed.json",
            f"{base_filename}_prosody_processed.json",
            f"{base_filename}_lang_processed.json"
        ]
        
        if all(os.path.exists(os.path.join(input_directory, fname)) for fname in required_files):
            # Check if the merged file already exists
            merged_filename = f"{base_filename}_merged.json"
            merged_path = os.path.join(output_directory, merged_filename)
            if os.path.exists(merged_path):
                print(f"Skipping {base_filename} - merged file already exists.")
                continue
            
            # Initialize the merged data dictionary
            merged_data = {"predicted": {"face": {}, "prosody": {}, "lang": {}}}
            
            # Merge the data from each modality file
            for modality in ["face", "prosody", "lang"]:
                modality_filename = f"{base_filename}_{modality}_processed.json"
                modality_path = os.path.join(input_directory, modality_filename)
                with open(modality_path, "r") as file:
                    modality_data = json.load(file)
                    merged_data["predicted"][modality] = modality_data.get(modality, {})
            
            # Save the merged data to a new JSON file in the output directory
            with open(merged_path, "w") as file:
                json.dump(merged_data, file, indent=4)
            
            print(f"Merged {base_filename} into {merged_filename}")
        else:
            print(f"Skipping {base_filename} - not all modalities are available.")

# Usage
input_directory = './dataset/outputs/hume_processed'
output_directory = './dataset/outputs/hume_processed_merged'
merge_json_files(input_directory, output_directory)


Skipping 65_00-00-21.981_00-00-28.194_Joanna_neutral_neutral - not all modalities are available.
Skipping 199_00-00-03.646_00-00-05.568_Rachel_joy_positive - not all modalities are available.
Skipping 59_00-00-08.801_00-00-09.353_Doug_joy_positive - not all modalities are available.
Skipping 116_00-00-03.963_00-00-09.884_Ross_neutral_neutral - not all modalities are available.
Skipping 493_00-00-25.484_00-00-28.486_Monica_neutral_neutral - not all modalities are available.
Skipping 5_00-01-01.270_00-01-02.645_Rachel_anger_negative - not all modalities are available.
Skipping 389_00-00-07.091_00-00-08.925_Joey_neutral_neutral - not all modalities are available.
Skipping 954_00-00-46.088_00-00-47.672_Ross_surprise_negative - not all modalities are available.
Skipping 226_00-00-03.211_00-00-05.338_Danny_joy_positive - not all modalities are available.
Skipping 302_00-00-04.421_00-00-05.922_Ross_sadness_negative - not all modalities are available.
Skipping 523_00-00-54.929_00-00-56.901_Mon

# Add ground truth to json files, read from the file name

In [7]:
import os
import json
from datetime import timedelta

def format_timedelta(td):
    total_seconds = int(td.total_seconds())
    hours, remainder = divmod(total_seconds, 3600)
    minutes, seconds = divmod(remainder, 60)
    return f"{hours:02d}-{minutes:02d}-{seconds:02d}.000"

def process_text_files(input_directory, output_directory, predicted_directory):
    # Create the output directory if it doesn't exist
    os.makedirs(output_directory, exist_ok=True)
    
    for filename in os.listdir(input_directory):
        if filename.endswith(".txt"):
            # Extract the metadata from the filename
            dialogue_id, offset_start, offset_end, speaker, emotion, sentiment = filename[:-4].split("_")
            
            # Read the text content from the file
            file_path = os.path.join(input_directory, filename)
            with open(file_path, "r") as file:
                text_content = file.read()
            
            # Load the predicted emotions from the corresponding JSON file
            predicted_filename = f"{dialogue_id}_{offset_start}_{offset_end}_{speaker}_{emotion}_{sentiment}_merged.json"
            predicted_path = os.path.join(predicted_directory, predicted_filename)
            if os.path.exists(predicted_path):
                with open(predicted_path, "r") as file:
                    predicted_data = json.load(file)
            else:
                predicted_data = {"predicted": {"face": {}, "prosody": {}, "lang": {}}}
            
            # Construct the JSON structure
            merged_data = {
                "metadata": {
                    "dialogue_id": dialogue_id,
                    "time_start": offset_start,
                    "time_end": offset_end,
                    "speaker": speaker,
                    "emotion": emotion,
                    "sentiment": sentiment,
                    "text_content": text_content,
                    "file_name":filename.replace(".txt", "")
                },
                "predicted": predicted_data["predicted"]
            }
            
            # Save the merged data to a new JSON file in the output directory
            output_filename = f"{dialogue_id}_{offset_start}_{offset_end}_{speaker}_{emotion}_{sentiment}_merged.json"
            output_path = os.path.join(output_directory, output_filename)
            with open(output_path, "w") as file:
                json.dump(merged_data, file, indent=4)
            
            print(f"Processed {filename} into {output_filename}")

# Usage
input_directory = './dataset/processed_clips'
output_directory = './dataset/outputs/merged_all'
predicted_directory = './dataset/outputs/hume_processed_merged'
process_text_files(input_directory, output_directory, predicted_directory)

Processed 0_00-00-00.000_00-00-02.793_Phoebe_sadness_negative.txt into 0_00-00-00.000_00-00-02.793_Phoebe_sadness_negative_merged.json
Processed 0_00-00-04.671_00-00-06.005_Monica_surprise_negative.txt into 0_00-00-04.671_00-00-06.005_Monica_surprise_negative_merged.json
Processed 100_00-00-00.000_00-00-02.544_Monica_neutral_neutral.txt into 100_00-00-00.000_00-00-02.544_Monica_neutral_neutral_merged.json
Processed 101_00-00-00.000_00-00-08.133_Rachel_sadness_negative.txt into 101_00-00-00.000_00-00-08.133_Rachel_sadness_negative_merged.json
Processed 102_00-00-00.000_00-00-01.711_Monica_neutral_neutral.txt into 102_00-00-00.000_00-00-01.711_Monica_neutral_neutral_merged.json
Processed 102_00-00-01.711_00-00-03.189_Rachel_neutral_neutral.txt into 102_00-00-01.711_00-00-03.189_Rachel_neutral_neutral_merged.json
Processed 103_00-00-00.000_00-00-02.043_Monica_neutral_neutral.txt into 103_00-00-00.000_00-00-02.043_Monica_neutral_neutral_merged.json
Processed 103_00-00-02.210_00-00-04.421_R

# The processed files will be saved into dataset/outputs/merged_all