In [1]:
%pwd

'/cephfs/volumes/hpc_data_usr/k24083007/2070c87e-fe07-4f03-a6c4-cae0de8ce617'

In [2]:
%cd cmu-mosei-experiments/CMU-MultimodalSDK-Tutorials/

/cephfs/volumes/hpc_data_usr/k24083007/2070c87e-fe07-4f03-a6c4-cae0de8ce617/cmu-mosei-experiments/CMU-MultimodalSDK-Tutorials


In [3]:
!nvidia-smi

Wed Jul 16 14:26:04 2025       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.230.02             Driver Version: 535.230.02   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA A100 80GB PCIe          On  | 00000000:00:07.0 Off |                    0 |
| N/A   31C    P0              41W / 300W |      0MiB / 81920MiB |      0%      Default |
|                                         |                      |             Disabled |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [135]:
import os
import sys
import numpy as np
from mmsdk import mmdatasdk as md
from constants.paths import SDK_PATH, DATA_PATH


import torch
import pickle
import requests
import re
from collections import defaultdict
from typing import Dict, List, Tuple, Any
import json

# Visualization and analysis
import matplotlib.pyplot as plt
import seaborn as sns

# For deep inspection of dataset structures
import inspect
from pprint import pprint


In [136]:
pt_data_path = '../CARAT/data/train_valid_test.pt'
print(f"Data file exists: {os.path.exists(pt_data_path)}")

if os.path.exists(pt_data_path):
    print(f"File size: {os.path.getsize(pt_data_path) / (1024*1024):.2f} MB")
    
    # Load the data
    try:
        data = torch.load(pt_data_path, map_location='cuda:0')
        print(f"Data keys: {list(data.keys())}")
        if 'train' in data:
            print(f"Train data keys: {list(data['train'].keys())}")
    except Exception as e:
        print(f"Error loading data: {e}")

Data file exists: True
File size: 5274.60 MB
Data keys: ['settings', 'dict', 'train', 'valid', 'test']
Train data keys: ['src-text', 'src-visual', 'src-audio', 'tgt']


In [137]:
# Ensure SDK is in path
sys.path.append(SDK_PATH)

# Make sure DATA_PATH exists
if not os.path.exists(DATA_PATH):
    print(f"Error: DATA_PATH does not exist: {DATA_PATH}")
    print("Please modify DATA_PATH to point to your .csd files directory")
    # os.makedirs(DATA_PATH, exist_ok=True)
else:
    data_files = os.listdir(DATA_PATH)
    print("Available data files:")
    print('\n'.join(data_files))

Available data files:
CMU_MOSEI_COVAREP.csd
CMU_MOSEI_TimestampedWords.csd
CMU_MOSEI_OpenFace2.csd
CMU_MOSEI_VisualFacet42.csd
CMU_MOSEI_TimestampedWordVectors.csd
CMU_MOSEI_TimestampedPhones.csd
CMU_MOSEI_Labels.csd


In [138]:
text_field = 'CMU_MOSEI_TimestampedWordVectors'
visual_field = 'CMU_MOSEI_VisualFacet42'
acoustic_field = 'CMU_MOSEI_COVAREP'

# Define the features to load
features = [
    text_field, 
    visual_field, 
    acoustic_field
]

recipe = {feat: os.path.join(DATA_PATH, feat) + '.csd' for feat in features}

# Load the dataset
try:
    dataset = md.mmdataset(recipe)
    print("Dataset loaded successfully!")
except Exception as e:
    print(f"Error loading dataset: {e}")
    print("Available files:", data_files)

[92m[1m[2025-07-04 09:28:29.043] | Success | [0mComputational sequence read from file ./data/CMU_MOSEI_TimestampedWordVectors.csd ...
[94m[1m[2025-07-04 09:28:29.168] | Status  | [0mChecking the integrity of the <glove_vectors> computational sequence ...
[94m[1m[2025-07-04 09:28:29.168] | Status  | [0mChecking the format of the data in <glove_vectors> computational sequence ...


                                                                                  

[92m[1m[2025-07-04 09:28:30.176] | Success | [0m<glove_vectors> computational sequence data in correct format.
[94m[1m[2025-07-04 09:28:30.176] | Status  | [0mChecking the format of the metadata in <glove_vectors> computational sequence ...
[92m[1m[2025-07-04 09:28:30.177] | Success | [0mComputational sequence read from file ./data/CMU_MOSEI_VisualFacet42.csd ...
[94m[1m[2025-07-04 09:28:30.261] | Status  | [0mChecking the integrity of the <FACET 4.2> computational sequence ...
[94m[1m[2025-07-04 09:28:30.261] | Status  | [0mChecking the format of the data in <FACET 4.2> computational sequence ...


                                                                                  

[92m[1m[2025-07-04 09:28:31.224] | Success | [0m<FACET 4.2> computational sequence data in correct format.
[94m[1m[2025-07-04 09:28:31.224] | Status  | [0mChecking the format of the metadata in <FACET 4.2> computational sequence ...
[92m[1m[2025-07-04 09:28:31.225] | Success | [0mComputational sequence read from file ./data/CMU_MOSEI_COVAREP.csd ...
[94m[1m[2025-07-04 09:28:31.306] | Status  | [0mChecking the integrity of the <COVAREP> computational sequence ...
[94m[1m[2025-07-04 09:28:31.306] | Status  | [0mChecking the format of the data in <COVAREP> computational sequence ...


                                                                                  

[92m[1m[2025-07-04 09:28:32.263] | Success | [0m<COVAREP> computational sequence data in correct format.
[94m[1m[2025-07-04 09:28:32.263] | Status  | [0mChecking the format of the metadata in <COVAREP> computational sequence ...
[92m[1m[2025-07-04 09:28:32.263] | Success | [0mDataset initialized successfully ... 
Dataset loaded successfully!




In [139]:
# "Note that the SDK always expect collapse functions with two arguments: `intervals` and `features`." 
# "Even if you don't use intervals (as is in the case below) you still need to define your function"

# def preserve_features(intervals: np.array, features: np.array) -> np.array:
#     """
#     Preserves temporal information while respecting alignment bins
    
#     """
#     if features.shape[0] == 0:
#         return features
#     elif features.shape[0] == 1:
#         return features[0]  # Single feature - return as-is
#     else:
#         # Multiple features in bin - take the median preserving the peak of the distribution
#         # This keeps the middle frame as the representative feature
#         mid_idx = features.shape[0] // 2
#         return features[mid_idx]
    
# dataset.align(text_field, collapse_functions=[preserve_features])

In [140]:
# Add labels and align to them
label_field = 'CMU_MOSEI_Labels'

# Add labels to the dataset
label_recipe = {label_field: os.path.join(DATA_PATH, label_field + '.csd')}

try:
    # Add the labels to the dataset
    dataset.add_computational_sequences(label_recipe, destination=None)
    # # Align to labels to get labeled segments
    # dataset.align(label_field)
    print("Labels added successfully!")

except Exception as e:
    print(f"Error adding labels: {e}")
    print("Available files:", data_files)


[92m[1m[2025-07-04 09:28:35.460] | Success | [0mComputational sequence read from file ./data/CMU_MOSEI_Labels.csd ...
[94m[1m[2025-07-04 09:28:35.527] | Status  | [0mChecking the integrity of the <All Labels> computational sequence ...
[94m[1m[2025-07-04 09:28:35.527] | Status  | [0mChecking the format of the data in <All Labels> computational sequence ...


                                                                                  

[92m[1m[2025-07-04 09:28:36.330] | Success | [0m<All Labels> computational sequence data in correct format.
[94m[1m[2025-07-04 09:28:36.330] | Status  | [0mChecking the format of the metadata in <All Labels> computational sequence ...
Labels added successfully!




In [141]:
#Testing out without collapse_fns
dataset.align(label_field, collapse_functions=None)

print("Alignment done")

[94m[1m[2025-07-04 09:28:36.385] | Status  | [0mUnify was called ...
[92m[1m[2025-07-04 09:28:37.067] | Success | [0mUnify completed ...
[94m[1m[2025-07-04 09:28:37.067] | Status  | [0mPre-alignment based on <CMU_MOSEI_Labels> computational sequence started ...
[94m[1m[2025-07-04 09:28:52.580] | Status  | [0mPre-alignment done for <CMU_MOSEI_TimestampedWordVectors> ...
[94m[1m[2025-07-04 09:29:09.581] | Status  | [0mPre-alignment done for <CMU_MOSEI_VisualFacet42> ...
[94m[1m[2025-07-04 09:30:20.809] | Status  | [0mPre-alignment done for <CMU_MOSEI_COVAREP> ...
[94m[1m[2025-07-04 09:30:21.731] | Status  | [0mAlignment starting ...


                                                                                                  

[92m[1m[2025-07-04 09:31:38.981] | Success | [0mAlignment to <CMU_MOSEI_Labels> complete.
[94m[1m[2025-07-04 09:31:38.981] | Status  | [0mReplacing dataset content with aligned computational sequences
[92m[1m[2025-07-04 09:31:39.092] | Success | [0mInitialized empty <CMU_MOSEI_TimestampedWordVectors> computational sequence.
[94m[1m[2025-07-04 09:31:39.098] | Status  | [0mChecking the format of the data in <CMU_MOSEI_TimestampedWordVectors> computational sequence ...


                                                                      

[92m[1m[2025-07-04 09:31:39.142] | Success | [0m<CMU_MOSEI_TimestampedWordVectors> computational sequence data in correct format.
[94m[1m[2025-07-04 09:31:39.144] | Status  | [0mChecking the format of the metadata in <CMU_MOSEI_TimestampedWordVectors> computational sequence ...
[92m[1m[2025-07-04 09:31:39.145] | Success | [0mInitialized empty <CMU_MOSEI_VisualFacet42> computational sequence.
[94m[1m[2025-07-04 09:31:39.145] | Status  | [0mChecking the format of the data in <CMU_MOSEI_VisualFacet42> computational sequence ...


                                                                      

[92m[1m[2025-07-04 09:31:39.184] | Success | [0m<CMU_MOSEI_VisualFacet42> computational sequence data in correct format.
[94m[1m[2025-07-04 09:31:39.188] | Status  | [0mChecking the format of the metadata in <CMU_MOSEI_VisualFacet42> computational sequence ...
[92m[1m[2025-07-04 09:31:39.191] | Success | [0mInitialized empty <CMU_MOSEI_COVAREP> computational sequence.
[94m[1m[2025-07-04 09:31:39.204] | Status  | [0mChecking the format of the data in <CMU_MOSEI_COVAREP> computational sequence ...


                                                                      

[92m[1m[2025-07-04 09:31:39.258] | Success | [0m<CMU_MOSEI_COVAREP> computational sequence data in correct format.
[94m[1m[2025-07-04 09:31:39.264] | Status  | [0mChecking the format of the metadata in <CMU_MOSEI_COVAREP> computational sequence ...
[92m[1m[2025-07-04 09:31:39.267] | Success | [0mInitialized empty <CMU_MOSEI_Labels> computational sequence.
[94m[1m[2025-07-04 09:31:39.267] | Status  | [0mChecking the format of the data in <CMU_MOSEI_Labels> computational sequence ...


                                                                      

[92m[1m[2025-07-04 09:31:39.321] | Success | [0m<CMU_MOSEI_Labels> computational sequence data in correct format.
[94m[1m[2025-07-04 09:31:39.321] | Status  | [0mChecking the format of the metadata in <CMU_MOSEI_Labels> computational sequence ...
Alignment done


In [142]:
# Total segments of each modality
text_segments = len(list(dataset[text_field].keys()))
visual_segments = len(list(dataset[visual_field].keys()))
acoustic_segments = len(list(dataset[acoustic_field].keys()))
label_segments = len(list(dataset[label_field].keys()))

print(f"Text segments: {text_segments}")
print(f"Visual segments: {visual_segments}")
print(f"Acoustic segments: {acoustic_segments}")
print(f"Label segments: {label_segments}")

Text segments: 23248
Visual segments: 23240
Acoustic segments: 23248
Label segments: 23248


In [143]:
# Looking at 5 sample segments:
sample_segments = list(dataset[label_field].keys())[:5]
for segment in sample_segments:
    try:
        text_shape = dataset[text_field][segment]['features'].shape
        visual_shape = dataset[visual_field][segment]['features'].shape
        acoustic_shape = dataset[acoustic_field][segment]['features'].shape
        label_shape = dataset[label_field][segment]['features'].shape
        
        print(f"\nSegment: {segment}")
        print(f"  Text: {text_shape}, Visual: {visual_shape}, Acoustic: {acoustic_shape}, Label: {label_shape}")
    except KeyError as e:
        print(f"Missing data for segment {segment}: {e}")


Segment: --qXJuDtHPw[0]
  Text: (22, 300), Visual: (215, 35), Acoustic: (714, 74), Label: (1, 7)

Segment: -3g5yACwYnA[0]
  Text: (55, 300), Visual: (535, 35), Acoustic: (1781, 74), Label: (1, 7)

Segment: -3g5yACwYnA[1]
  Text: (22, 300), Visual: (162, 35), Acoustic: (539, 74), Label: (1, 7)

Segment: -3g5yACwYnA[2]
  Text: (23, 300), Visual: (277, 35), Acoustic: (922, 74), Label: (1, 7)

Segment: -3g5yACwYnA[3]
  Text: (42, 300), Visual: (430, 35), Acoustic: (1432, 74), Label: (1, 7)


In [144]:
non_aligned_data = {
    'train': defaultdict(list),
    'val': defaultdict(list), 
    'test': defaultdict(list)
}

# Regex to extract video ID from segment names
pattern = re.compile(r'(.*)\[.*\]')

# Standard CMU-MOSEI splits from mmdatasdk
DATASET = md.cmu_mosei
train_split = set(DATASET.standard_folds.standard_train_fold)
val_split = set(DATASET.standard_folds.standard_valid_fold)
test_split = set(DATASET.standard_folds.standard_test_fold)

In [145]:
# Check the feature dimensions of all segments for all modalities

def review_features_dim(features, target_dim):
    """
    Sequence processing based on target dimension for respective labels
    """
    padded = False

    if features.size == 0:
        return features, padded
    # For 2D arrays (mostly t, a, v which are sequences of features)
    if len(features.shape) == 2:
        if features.shape[1] == target_dim:
            return features, False
        elif features.shape[1] > target_dim:
            # Truncate to target dimension
            padded = True
            return features[:, :target_dim], padded
        else:
            # Pad to target dimension
            padded = True
            padding = np.zeros((features.shape[0], target_dim - features.shape[1]), dtype=np.float32)
            return np.hstack([features, padding]), padded
        
    # For 1D arrays (labels or single feature sequences)
    elif len(features.shape) == 1:
        current_dim = features.shape[0]
        if current_dim == target_dim:
            return features, padded
        elif current_dim > target_dim:
            padded = True
            return features[:target_dim], padded
        else:
            padded = True
            padding = np.zeros(target_dim - current_dim, dtype=np.float32)
            return np.hstack([features, padding]), padded
    
    # Unknown shape
    else:
        print(f"Warning: Unsupported feature shape {features.shape}. Returning as is.")
        return features, padded

    

# Function to only check audio features and return come flags if they have nan or inf values
def process_audio_features(audio_features):
    """
    Check if audio features contain NaN or Inf/-Inf values and replace them
    """
    audio_features = np.nan_to_num(audio_features)
    # audio_features[audio_features == -np.inf] = 0.0
    # audio_features[audio_features == np.inf] = 0.0

    return audio_features, np.isnan(audio_features).any(), np.isinf(audio_features).any()


In [146]:
def process_labels(label_features, target_dim=7):
    """
    CMU-MOSEI original label format: [sentiment, happy, sad, anger, surprise, disgust, fear] 
    CARAT needs only 6 emotions and that too in a multi-hot encoding format 
    """
    # Convert to numpy array if not already
    if isinstance(label_features, list):
        label_features = np.array(label_features)
    else:
        # Ensure we have 7 dimensions
        if label_features.shape[-1] == target_dim:
            emotion_labels = label_features.flatten()[1:7]
            emotion_labels = (emotion_labels > 0.0).astype(np.float32)
            return emotion_labels # Return first 6 emotions
        elif label_features.shape[-1] == 6:
            return label_features  # Already in correct format
        else:
            print(f"Warning: Unexpected label shape {label_features.shape}. Expected 6 or 7 dimensions.")
            return None
    
    



In [147]:
stats = {
        'processed_count': 0, 
        'missing_data_count': 0, 
        'empty_features_count': 0, 
        'wrong_splits_count': 0,
        'dimension_issues_count': 0
    }

all_segment_keys = list(dataset[label_field].keys())
total_segments = len(all_segment_keys)
print(f"Processing {total_segments} segments...")


for segment_key in all_segment_keys:
    try:
        # Extracting video ids for determining splits
        vid_match = re.search(pattern, segment_key)
        if not vid_match:
            stats['wrong_splits_count'] += 1
            continue
        vid = vid_match.group(1)

        # Extract features for this segment (keeping original lengths)
        text_features = dataset[text_field][segment_key]['features'].astype(np.float32)
        visual_features = dataset[visual_field][segment_key]['features'].astype(np.float32)
        acoustic_features = dataset[acoustic_field][segment_key]['features'].astype(np.float32)
        label_features = dataset[label_field][segment_key]['features'].astype(np.float32)

        # Show original (intervals, dimensions) for each modality
        # print(f"\nOriginal shapes for segment {segment_key}:")
        # print(f"  Text: {text_features.shape}, Acoustic: {acoustic_features.shape}, Visual: {visual_features.shape}, Label: {label_features.shape}")

        # Empty features cehck
        if (text_features.size == 0 or visual_features.size == 0 or acoustic_features.size == 0 or label_features.size == 0):
            stats['empty_features_count'] += 1
            continue

        # Process each modality
        text_features, text_dim_mismatch = review_features_dim(text_features, target_dim=300)
        acoustic_features, audio_dim_mismatch = review_features_dim(acoustic_features, target_dim=74)
        visual_features, visual_dim_mismatch = review_features_dim(visual_features, target_dim=35)
        label_features, label_dim_mismatch = review_features_dim(label_features, target_dim=7)

        # print(f"\n Processed segment: {segment_key}")
        # print(f"  Text: {text_features.shape}, Acoustic: {acoustic_features.shape}, Visual: {visual_features.shape}, Label: {label_features.shape}")
        if (text_dim_mismatch or audio_dim_mismatch or visual_dim_mismatch or label_dim_mismatch):
            print(f"  Dimension mismatch")
            stats['dimension_issues_count'] += 1
            continue

        audio_features, nan, inf = process_audio_features(acoustic_features)
        if nan or inf:
            print(f"  Warning: Audio features for segment {segment_key} contain NaN or Inf values")
            stats['empty_features_count'] += 1
            continue

        # Determine split
        if vid in train_split:
            split = 'train'
        elif vid in val_split:
            split = 'val'  
        elif vid in test_split:
            split = 'test'
        else:
            stats['wrong_splits_count'] += 1
            continue
        
        # Processing labels
        emotion_labels = process_labels(label_features, target_dim=7)
        if emotion_labels is None:
            stats['missing_data_count'] += 1
            continue
        
        stats['processed_count'] += 1
        
        # Store in CARAT format
        non_aligned_data[split]['src-text'].append(text_features)
        non_aligned_data[split]['src-audio'].append(acoustic_features)
        non_aligned_data[split]['src-visual'].append(visual_features)
        non_aligned_data[split]['tgt'].append(emotion_labels)

    except Exception as e:
        print(f"Segment {segment_key} is missing some data: {e}")
        stats['missing_data_count'] += 1
        continue

# Summary: 
print("\n---------------------- Summary ----------------------")
print(f"Processed {stats['processed_count']} segments out of {total_segments} total segments.")
print(f"Missing data in {stats['missing_data_count']} segments.")   
print(f"Empty features in {stats['empty_features_count']} segments.")
print(f"Wrong splits in {stats['wrong_splits_count']} segments.")
print(f"Dimension issues in {stats['dimension_issues_count']} segments.")

# print(stats)
    
        

Processing 23248 segments...
Segment HuIKyKkEL0Q[0] is missing some data: 'HuIKyKkEL0Q[0]'
Segment JGEEA_JVriE[1] is missing some data: 'JGEEA_JVriE[1]'
Segment JGEEA_JVriE[2] is missing some data: 'JGEEA_JVriE[2]'
Segment JGEEA_JVriE[3] is missing some data: 'JGEEA_JVriE[3]'
Segment aa0J1AXSseY[4] is missing some data: 'aa0J1AXSseY[4]'
Segment aa0J1AXSseY[5] is missing some data: 'aa0J1AXSseY[5]'
Segment aa0J1AXSseY[6] is missing some data: 'aa0J1AXSseY[6]'
Segment zsRTbbKlsEg[0] is missing some data: 'zsRTbbKlsEg[0]'

---------------------- Summary ----------------------
Processed 22852 segments out of 23248 total segments.
Missing data in 8 segments.
Empty features in 0 segments.
Wrong splits in 388 segments.
Dimension issues in 0 segments.


In [148]:
print("\n--------- Non-Aligned Data -------------")
# Check the structure of non-aligned data and first 5 key value entries
print(f"Train segments: {len(non_aligned_data['train']['src-text'])}")
print(f"Validation segments: {len(non_aligned_data['val']['src-text'])}")
print(f"Test segments: {len(non_aligned_data['test']['src-text'])}")
print(f"-"*40)
print("First 5 segments in train split:") # To show variable and original time_steps intervals
for i, (text, audio, visual, label) in enumerate(zip(
        non_aligned_data['train']['src-text'][:3],
        non_aligned_data['train']['src-audio'][:3],
        non_aligned_data['train']['src-visual'][:3],
        non_aligned_data['train']['tgt'][:3])):
    print(f"\nSegment {i+1}:")
    print(f"  Text shape: {text.shape}")
    print(f"  Audio shape: {audio.shape}")
    print(f"  Visual shape: {visual.shape}")
    print(f"  Label shape: {label.shape}")

"""
label: (6) -> [happy, sad, anger, surprise, disgust, fear] 
    averaged from 3 annotators
unaligned:
text: (50, 300)
visual: (500, 35)
audio: (500, 74)  
"""


--------- Non-Aligned Data -------------
Train segments: 16322
Validation segments: 1871
Test segments: 4659
----------------------------------------
First 5 segments in train split:

Segment 1:
  Text shape: (55, 300)
  Audio shape: (1781, 74)
  Visual shape: (535, 35)
  Label shape: (6,)

Segment 2:
  Text shape: (22, 300)
  Audio shape: (539, 74)
  Visual shape: (162, 35)
  Label shape: (6,)

Segment 3:
  Text shape: (23, 300)
  Audio shape: (922, 74)
  Visual shape: (277, 35)
  Label shape: (6,)


'\nlabel: (6) -> [happy, sad, anger, surprise, disgust, fear] \n    averaged from 3 annotators\nunaligned:\ntext: (50, 300)\nvisual: (500, 35)\naudio: (500, 74)  \n'

In [149]:
# Save for CARAT
output_path = '../CARAT/data/cmu_mosei_unaligned_ree.pt'
torch.save(non_aligned_data, output_path)

In [None]:
print("\n--------- Checking labels processing -------------")
print("Checking raw label values before thresholding:")

# Check a few segments from your created dataset
for i in range(3):
    if i < len(non_aligned_data['train']['tgt']):
        label = non_aligned_data['train']['tgt'][i]
        print(f"\nSegment {i+1}:")
        print(f"  Processed label: {label}")
        print(f"  Label sum: {label.sum()}")
        print(f"  Non-zero emotions: {np.count_nonzero(label)}")

# Also check some raw labels before processing
print("\nChecking raw labels from dataset:")
sample_segments = list(dataset[label_field].keys())[:3]
for i, segment in enumerate(sample_segments):
    try:
        raw_label = dataset[label_field][segment]['features'].flatten()
        print(f"\nRaw segment {i+1} ({segment}):")
        print(f"  Raw label: {raw_label}")
        print(f"  Sentiment: {raw_label[0]:.2f}")
        print(f"  Emotions: {raw_label[1:7]}")
        print(f"  Emotions > 0: {raw_label[1:7] > 0.0}")
        print(f"  Emotions > 1: {raw_label[1:7] > 1.5}")
    except Exception as e:
        print(f"Error processing segment {segment}: {e}")


--------- Checking labels processing -------------
Checking raw label values before thresholding:

Segment 1:
  Processed label: [1. 1. 0. 0. 0. 1.]
  Label sum: 3.0
  Non-zero emotions: 3

Segment 2:
  Processed label: [0. 0. 0. 0. 0. 0.]
  Label sum: 0.0
  Non-zero emotions: 0

Segment 3:
  Processed label: [1. 1. 0. 0. 0. 1.]
  Label sum: 3.0
  Non-zero emotions: 3

Checking raw labels from dataset:

Raw segment 1 (--qXJuDtHPw[0]):
  Raw label: [1.        0.6666667 0.        0.        0.        0.        0.       ]
  Sentiment: 1.00
  Emotions: [0.6666667 0.        0.        0.        0.        0.       ]
  Emotions > 0: [ True False False False False False]
  Emotions > 1.5: [False False False False False False]

Raw segment 2 (-3g5yACwYnA[0]):
  Raw label: [1.        0.6666667 0.6666667 0.        0.        0.        0.6666667]
  Sentiment: 1.00
  Emotions: [0.6666667 0.6666667 0.        0.        0.        0.6666667]
  Emotions > 0: [ True  True False False False  True]
  Emotion