In [172]:
"""utility and helper functions / classes."""
import json
import logging
import os
import random
from typing import Tuple

import numpy as np
import torch
from sklearn.metrics import f1_score
from tqdm import tqdm
from transformers import AutoTokenizer

logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s.%(msecs)03d %(levelname)s %(module)s - %(funcName)s: %(message)s",
    datefmt="%Y-%m-%d %H:%M:%S",
)


def make_MELD_IEMOCAP():

    SEED = 42
    ratios = {"train": 0.9, "val": 0.1, "test": 0}

    assert sum(list(ratios.values())) == 1

    utterance_ordered = {}

    with open(f"./MELD/utterance-ordered.json", "r") as stream:
        utterance_ordered["MELD"] = json.load(stream)

    with open(f"./IEMOCAP/utterance-ordered.json", "r") as stream:
        utterance_ordered["IEMOCAP"] = json.load(stream)

    diaids_merged = []

    for DATASET in ["MELD", "IEMOCAP"]:
        for SPLIT in ["train", "val", "test"]:
            diaids = list(utterance_ordered[DATASET][SPLIT].keys())
            for diaid in diaids:
                diaids_merged.append(f"{DATASET}/{SPLIT}/{diaid}")

    random.seed(SEED)
    random.shuffle(diaids_merged)

    train_idx = int(len(diaids_merged) * ratios["train"])
    val_idx = int(len(diaids_merged) * (ratios["train"] + ratios["val"]))

    diaids_train = diaids_merged[:train_idx]
    diaids_val = diaids_merged[train_idx:val_idx]
    diaids_test = diaids_merged[val_idx:]

    assert len(diaids_merged) == (
        len(diaids_train) + len(diaids_val) + len(diaids_test)
    )

    diaids_merged = {"train": diaids_train, "val": diaids_val, "test": diaids_test}

    utterance_ordered_merged = {}

    for SPLIT in ["train", "val", "test"]:
        utterance_ordered_merged[SPLIT] = {}

        for diaid in tqdm(diaids_merged[SPLIT]):

            d_, s_, d__ = diaid.split("/")
            utterance_ordered_merged[SPLIT][diaid] = [
                f"{d_}/{s_}/{d__}/{uttid}" for uttid in utterance_ordered[d_][s_][d__]
            ]

    assert len(
        [
            val___
            for key, val in utterance_ordered.items()
            for key_, val_ in val.items()
            for key__, val__ in val_.items()
            for val___ in val__
        ]
    ) == len(
        [
            val__
            for key, val in utterance_ordered_merged.items()
            for key_, val_ in val.items()
            for val__ in val_
        ]
    )

    with open("./utterance-ordered-MELD_IEMOCAP.json", "w") as stream:
        json.dump(utterance_ordered_merged, stream, indent=4)


def get_num_classes(DATASET: str) -> int:
    """Get the number of classes to be classified by dataset."""
    if DATASET == "MELD":
        NUM_CLASSES = 7
    elif DATASET == "IEMOCAP":
        NUM_CLASSES = 6
    elif DATASET == "MELD_IEMOCAP":
        NUM_CLASSES = 7
    else:
        raise ValueError

    return 7


def compute_metrics(eval_predictions) -> dict:
    """Return f1_weighted, f1_micro, and f1_macro scores."""
    predictions, label_ids = eval_predictions
    preds = np.argmax(predictions, axis=1)

    f1_weighted = f1_score(label_ids, preds, average="weighted")
    f1_micro = f1_score(label_ids, preds, average="micro")
    f1_macro = f1_score(label_ids, preds, average="macro")

    return {"f1_weighted": f1_weighted, "f1_micro": f1_micro, "f1_macro": f1_macro}


def set_seed(seed: int) -> None:
    """Set random seed to a fixed value.

    Set everything to be deterministic
    """
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True


def get_emotion2id(DATASET: str) -> Tuple[dict, dict]:
    """Get a dict that converts string class to numbers."""
    emotions = [
        "neutral",
        "joy",
        #"excited",
        "surprise",
        "anger",
        "frustration",
        "sadness",
        "disgust",
        "fear",
    ]
        
    emotion2id = {
        "neutral": 0,
        "joy": 1,
        "happiness": 1,
        "excited": 1,
        "surprise": 2,
        "anger": 3,
        "frustration": 4,
        "sadness": 5,
        "disgust": 6,
        "fear": 7,
    }
    id2emotion = {idx: emotion for idx, emotion in enumerate(emotions)}

    return emotion2id, id2emotion


class ErcTextDataset(torch.utils.data.Dataset):
    def __init__(
        self,
        DATASET="MELD",
        SPLIT="train",
        speaker_mode="upper",
        num_past_utterances=0,
        num_future_utterances=0,
        model_checkpoint="roberta-base",
        ROOT_DIR="./",
        ONLY_UPTO=False,
        SEED=0,
    ):
        """Initialize emotion recognition in conversation text modality dataset class."""

        self.DATASET = DATASET
        self.ROOT_DIR = ROOT_DIR
        self.SPLIT = SPLIT
        self.speaker_mode = speaker_mode
        self.num_past_utterances = num_past_utterances
        self.num_future_utterances = num_future_utterances
        self.model_checkpoint = model_checkpoint
        self.emotion2id, self.id2emotion = get_emotion2id(self.DATASET)
        self.ONLY_UPTO = ONLY_UPTO
        self.SEED = SEED

        self._load_emotions()
        self._load_utterance_ordered()
        self._string2tokens()

    def _load_emotions(self):
        """Load the supervised labels"""
        if self.DATASET in ["MELD", "IEMOCAP"]:
            with open(
                os.path.join(self.ROOT_DIR, self.DATASET, "emotions.json"), "r"
            ) as stream:
                self.emotions = json.load(stream)[self.SPLIT]

    def _load_utterance_ordered(self):
        """Load the ids of the utterances in order."""
        if self.DATASET in ["MELD", "IEMOCAP"]:
            path = os.path.join(self.ROOT_DIR, self.DATASET, "utterance-ordered.json")
        elif self.DATASET == "MELD_IEMOCAP":
            path = "./utterance-ordered-MELD_IEMOCAP.json"

        with open(path, "r") as stream:
            self.utterance_ordered = json.load(stream)[self.SPLIT]

    def __len__(self):
        return len(self.inputs_)

    def _load_utterance_speaker_emotion(self, uttid, speaker_mode) -> dict:
        """Load an speaker-name prepended utterance and emotion label"""

        if self.DATASET in ["MELD", "IEMOCAP"]:
            text_path = os.path.join(
                self.ROOT_DIR, self.DATASET, "raw-texts", self.SPLIT, uttid + ".json"
            )
        elif self.DATASET == "MELD_IEMOCAP":
            assert len(uttid.split("/")) == 4
            d_, s_, d__, u_ = uttid.split("/")
            text_path = os.path.join(self.ROOT_DIR, d_, "raw-texts", s_, u_ + ".json")

        with open(text_path, "r") as stream:
            text = json.load(stream)

        utterance = text["Utterance"].strip()
        emotion = text["Emotion"]

        if self.DATASET == "MELD":
            speaker = text["Speaker"]
        elif self.DATASET == "IEMOCAP":
            sessid = text["SessionID"]
            # https: // www.ssa.gov/oact/babynames/decades/century.html
            speaker = {
                "Ses01": {"Female": "Mary", "Male": "James"},
                "Ses02": {"Female": "Patricia", "Male": "John"},
                "Ses03": {"Female": "Jennifer", "Male": "Robert"},
                "Ses04": {"Female": "Linda", "Male": "Michael"},
                "Ses05": {"Female": "Elizabeth", "Male": "William"},
            }[sessid][text["Speaker"]]
        elif self.DATASET == "MELD_IEMOCAP":
            speaker = ""
        else:
            raise ValueError(f"{self.DATASET} not supported!!!!!!")

        if speaker_mode is not None and speaker_mode.lower() == "upper":
            utterance = speaker.upper() + ": " + utterance
        elif speaker_mode is not None and speaker_mode.lower() == "title":
            utterance = speaker.title() + ": " + utterance

        return {"Utterance": utterance, "Emotion": emotion}

    def _create_input(
        self, diaids, speaker_mode, num_past_utterances, num_future_utterances
    ):
        """Create an input which will be an input to RoBERTa."""

        args = {
            "diaids": diaids,
            "speaker_mode": speaker_mode,
            "num_past_utterances": num_past_utterances,
            "num_future_utterances": num_future_utterances,
        }

        logging.debug(f"arguments given: {args}")
        tokenizer = AutoTokenizer.from_pretrained(self.model_checkpoint, use_fast=True)
        max_model_input_size = 128
        num_truncated = 0

        inputs = []
        for diaid in tqdm(diaids):
            ues = [
                self._load_utterance_speaker_emotion(uttid, speaker_mode)
                for uttid in self.utterance_ordered[diaid]
            ]

            num_tokens = [len(tokenizer(ue["Utterance"])["input_ids"]) for ue in ues]

            for idx, ue in enumerate(ues):
                if ue["Emotion"] not in list(self.emotion2id.keys()):
                    continue

                label = self.emotion2id[ue["Emotion"]]

                indexes = [idx]
                indexes_past = [
                    i for i in range(idx - 1, idx - num_past_utterances - 1, -1)
                ]
                indexes_future = [
                    i for i in range(idx + 1, idx + num_future_utterances + 1, 1)
                ]

                offset = 0
                if len(indexes_past) < len(indexes_future):
                    for _ in range(len(indexes_future) - len(indexes_past)):
                        indexes_past.append(None)
                elif len(indexes_past) > len(indexes_future):
                    for _ in range(len(indexes_past) - len(indexes_future)):
                        indexes_future.append(None)

                for i, j in zip(indexes_past, indexes_future):
                    if i is not None and i >= 0:
                        indexes.insert(0, i)
                        offset += 1
                        if (
                            sum([num_tokens[idx_] for idx_ in indexes])
                            > max_model_input_size
                        ):
                            del indexes[0]
                            offset -= 1
                            num_truncated += 1
                            break
                    if j is not None and j < len(ues):
                        indexes.append(j)
                        if (
                            sum([num_tokens[idx_] for idx_ in indexes])
                            > max_model_input_size
                        ):
                            del indexes[-1]
                            num_truncated += 1
                            break

                utterances = [ues[idx_]["Utterance"] for idx_ in indexes]

                if num_past_utterances == 0 and num_future_utterances == 0:
                    assert len(utterances) == 1
                    final_utterance = utterances[0]

                elif num_past_utterances > 0 and num_future_utterances == 0:
                    if len(utterances) == 1:
                        final_utterance = utterances[-1]
                    else:
                        final_utterance = (
                            " [CTXS] " + " ".join(utterances[:-1]) + " [CTXE] " + utterances[-1]
                        )

                elif num_past_utterances == 0 and num_future_utterances > 0:
                    if len(utterances) == 1:
                        final_utterance = utterances[0] + " [CTX] "
                    else:
                        final_utterance = (
                            utterances[0] + " [CTX] " + " ".join(utterances[1:])
                        )

                elif num_past_utterances > 0 and num_future_utterances > 0:
                    if len(utterances) == 1:
                        final_utterance = utterances[0]
                    else:
                        final_utterance = (
                            " ".join(utterances[:offset])
                            + " [BFR] "
                            + utterances[offset]
                            + " [AFT] "
                            + " ".join(utterances[offset + 1 :])
                        )
                else:
                    raise ValueError

                #input_ids_attention_mask = tokenizer(final_utterance)
                #input_ids = input_ids_attention_mask["input_ids"]
                #attention_mask = input_ids_attention_mask["attention_mask"]

                input_ = {
                    "id": self.utterance_ordered[diaid][idx],
                    "utterance": final_utterance,
                    "label": self.id2emotion[label],
                    "num_tokens": num_tokens[idx]
                }

                inputs.append(input_)

        logging.info(f"number of truncated utterances: {num_truncated}")
        return inputs

    def _string2tokens(self):
        """Convert string to (BPE) tokens."""
        logging.info(f"converting utterances into tokens ...")

        diaids = sorted(list(self.utterance_ordered.keys()))

        set_seed(self.SEED)
        random.shuffle(diaids)

        if self.ONLY_UPTO:
            logging.info(f"Using only the first {self.ONLY_UPTO} dialogues ...")
            diaids = diaids[: self.ONLY_UPTO]

        logging.info(f"creating input utterance data ... ")
        self.inputs_ = self._create_input(
            diaids=diaids,
            speaker_mode=self.speaker_mode,
            num_past_utterances=self.num_past_utterances,
            num_future_utterances=self.num_future_utterances,
        )

    def __getitem__(self, index):

        return self.inputs_[index]

In [173]:
import pandas as pd

In [174]:
ds_meld_train = ErcTextDataset(DATASET="MELD", SPLIT="train", speaker_mode=None, num_past_utterances=2, num_future_utterances=2)

2023-06-09 16:23:29.144 INFO 567090319 - _string2tokens: converting utterances into tokens ...
2023-06-09 16:23:29.145 INFO 567090319 - _string2tokens: creating input utterance data ... 
100%|██████████████████████████████████████| 1038/1038 [00:01<00:00, 682.29it/s]
2023-06-09 16:23:32.071 INFO 567090319 - _create_input: number of truncated utterances: 14


In [175]:
ds_meld_val = ErcTextDataset(DATASET="MELD", SPLIT="val", speaker_mode=None, num_past_utterances=2, num_future_utterances=2)

2023-06-09 16:23:36.135 INFO 567090319 - _string2tokens: converting utterances into tokens ...
2023-06-09 16:23:36.136 INFO 567090319 - _string2tokens: creating input utterance data ... 
100%|████████████████████████████████████████| 114/114 [00:00<00:00, 713.14it/s]
2023-06-09 16:23:37.482 INFO 567090319 - _create_input: number of truncated utterances: 1


In [176]:
ds_meld_test = ErcTextDataset(DATASET="MELD", SPLIT="test", speaker_mode=None, num_past_utterances=2, num_future_utterances=2)

2023-06-09 16:23:37.500 INFO 567090319 - _string2tokens: converting utterances into tokens ...
2023-06-09 16:23:37.501 INFO 567090319 - _string2tokens: creating input utterance data ... 
100%|████████████████████████████████████████| 280/280 [00:00<00:00, 686.26it/s]
2023-06-09 16:23:39.221 INFO 567090319 - _create_input: number of truncated utterances: 0


In [177]:
ds_iemocap_train = ErcTextDataset(DATASET="IEMOCAP", SPLIT="train", speaker_mode=None, num_past_utterances=2, num_future_utterances=2)

2023-06-09 16:23:39.239 INFO 567090319 - _string2tokens: converting utterances into tokens ...
2023-06-09 16:23:39.240 INFO 567090319 - _string2tokens: creating input utterance data ... 
100%|█████████████████████████████████████████| 100/100 [00:01<00:00, 88.07it/s]
2023-06-09 16:23:41.692 INFO 567090319 - _create_input: number of truncated utterances: 585


In [178]:
ds_iemocap_val = ErcTextDataset(DATASET="IEMOCAP", SPLIT="val", speaker_mode=None, num_past_utterances=2, num_future_utterances=2)

2023-06-09 16:23:41.705 INFO 567090319 - _string2tokens: converting utterances into tokens ...
2023-06-09 16:23:41.707 INFO 567090319 - _string2tokens: creating input utterance data ... 
100%|███████████████████████████████████████████| 20/20 [00:00<00:00, 82.40it/s]
2023-06-09 16:23:43.243 INFO 567090319 - _create_input: number of truncated utterances: 167


In [179]:
ds_iemocap_test = ErcTextDataset(DATASET="IEMOCAP", SPLIT="test", speaker_mode=None, num_past_utterances=2, num_future_utterances=2)

2023-06-09 16:23:43.256 INFO 567090319 - _string2tokens: converting utterances into tokens ...
2023-06-09 16:23:43.257 INFO 567090319 - _string2tokens: creating input utterance data ... 
100%|███████████████████████████████████████████| 31/31 [00:00<00:00, 81.58it/s]
2023-06-09 16:23:44.989 INFO 567090319 - _create_input: number of truncated utterances: 285


In [180]:
keep_iemocap = ["joy", "neutral", "sadness", "anger"]

In [181]:
df_meld_train = pd.DataFrame(ds_meld_train.inputs_)
df_meld_train = df_meld_train.assign(path=df_meld_train.id.apply(lambda x: f"multimodal-datasets/MELD/raw-audios/train_resampled/{x}.wav"))

df_meld_val = pd.DataFrame(ds_meld_val.inputs_)
df_meld_val = df_meld_val.assign(path=df_meld_val.id.apply(lambda x: f"multimodal-datasets/MELD/raw-audios/val_resampled/{x}.wav"))

df_meld_test = pd.DataFrame(ds_meld_test.inputs_)
df_meld_test = df_meld_test.assign(path=df_meld_test.id.apply(lambda x: f"multimodal-datasets/MELD/raw-audios/test_resampled/{x}.wav"))

df_iemocap_train = pd.DataFrame(ds_iemocap_train.inputs_)
df_iemocap_train = df_iemocap_train[df_iemocap_train["label"].isin(keep_iemocap)]
df_iemocap_train = df_iemocap_train.assign(path=df_iemocap_train.id.apply(lambda x: f"multimodal-datasets/IEMOCAP/raw-audios/train/{x}.wav"))

df_iemocap_val = pd.DataFrame(ds_iemocap_val.inputs_)
df_iemocap_val = df_iemocap_val[df_iemocap_val["label"].isin(keep_iemocap)]
df_iemocap_val = df_iemocap_val.assign(path=df_iemocap_val.id.apply(lambda x: f"multimodal-datasets/IEMOCAP/raw-audios/val/{x}.wav"))

df_iemocap_test = pd.DataFrame(ds_iemocap_test.inputs_)
df_iemocap_test = df_iemocap_test[df_iemocap_test["label"].isin(keep_iemocap)]
df_iemocap_test = df_iemocap_test.assign(path=df_iemocap_test.id.apply(lambda x: f"multimodal-datasets/IEMOCAP/raw-audios/test/{x}.wav"))

In [182]:
df_train = pd.concat([df_iemocap_train, df_iemocap_val, df_meld_train, df_meld_val]).reset_index(drop=True)
df_val = df_train.sample(frac=0.100, random_state=42).reset_index(drop=True)
df_train = df_train[~df_train["id"].isin(df_val["id"].tolist())].reset_index(drop=True).drop(columns=["id"])
df_train

Unnamed: 0,utterance,label,num_tokens,path
0,The only one I know still love his parents. [B...,joy,15,multimodal-datasets/IEMOCAP/raw-audios/train/S...
1,The only one I know still love his parents. Ye...,neutral,13,multimodal-datasets/IEMOCAP/raw-audios/train/S...
2,Oh it's not bad thing it's good thing. You kno...,joy,9,multimodal-datasets/IEMOCAP/raw-audios/train/S...
3,"You know it's nice here, the air is sweet. You...",sadness,14,multimodal-datasets/IEMOCAP/raw-audios/train/S...
4,"You're not sorry you came? Not sorry, no. I c...",sadness,5,multimodal-datasets/IEMOCAP/raw-audios/train/S...
...,...,...,...,...
13725,That would be no. Come on. It doesn't taste ba...,neutral,15,multimodal-datasets/MELD/raw-audios/val_resamp...
13726,"Come on. It doesn't taste bad. Yeah, it's kind...",joy,9,multimodal-datasets/MELD/raw-audios/val_resamp...
13727,"Yeah, it's kinda sweet, sorta like, uh... Cant...",neutral,4,multimodal-datasets/MELD/raw-audios/val_resamp...
13728,Cantaloupe juice. Exactly. [BFR] You've tasted...,surprise,12,multimodal-datasets/MELD/raw-audios/val_resamp...


In [186]:
df_train["num_tokens"].quantile(q=0.95)

34.0

In [187]:
df_train["num_tokens"].quantile(q=0.99)

49.0

In [136]:
df_train[df_train["path"].apply(lambda x: True if "IEMOCAP" in x else False)]["label"].unique()

array(['joy', 'neutral', 'sadness', 'anger'], dtype=object)

In [137]:
df_val

Unnamed: 0,id,utterance,label,path
0,Ses04M_script02_1_M005,The flashlight; the silver one. There's only ...,neutral,multimodal-datasets/IEMOCAP/raw-audios/val/Ses...
1,Ses03F_impro05_F016,Did you lock- Did you lock it even? You probab...,anger,multimodal-datasets/IEMOCAP/raw-audios/train/S...
2,dia155_utt10,Oh you’re serious. Sure! Great! Well umm [BFR]...,neutral,multimodal-datasets/MELD/raw-audios/train_resa...
3,dia626_utt9,"Whoa! Are you okay? Whew! Stood up to fast, go...",neutral,multimodal-datasets/MELD/raw-audios/train_resa...
4,dia449_utt4,I’m so-so sorry about yesterday. I-I’m really ...,joy,multimodal-datasets/MELD/raw-audios/train_resa...
...,...,...,...,...
1534,dia255_utt7,I wasn't thinking. I was too busy fallin'... D...,joy,multimodal-datasets/MELD/raw-audios/train_resa...
1535,Ses03M_script02_1_M014,Are you cold? Do you want my jacket? We shoul...,joy,multimodal-datasets/IEMOCAP/raw-audios/train/S...
1536,dia546_utt4,Which one do you think she is? May I help you?...,neutral,multimodal-datasets/MELD/raw-audios/train_resa...
1537,dia262_utt0,"[BFR] Ok, you can do this. [AFT] It's just li...",neutral,multimodal-datasets/MELD/raw-audios/train_resa...


In [138]:
df_test = pd.concat([df_iemocap_test.drop(columns=["id"]), df_meld_test.drop(columns=["id"])]).reset_index(drop=True)
df_test

Unnamed: 0,utterance,label,path
0,"[BFR] Brian, I need help. [AFT] Babe, I don't...",sadness,multimodal-datasets/IEMOCAP/raw-audios/test/Se...
1,"Brian, I need help. [BFR] Babe, I don't know w...",neutral,multimodal-datasets/IEMOCAP/raw-audios/test/Se...
2,"Babe, I don't know what to tell you. Don't gi...",neutral,multimodal-datasets/IEMOCAP/raw-audios/test/Se...
3,"I wish I had some answers for you, babe. I me...",neutral,multimodal-datasets/IEMOCAP/raw-audios/test/Se...
4,I went to school and I got my degree. And I g...,neutral,multimodal-datasets/IEMOCAP/raw-audios/test/Se...
...,...,...,...
3846,"Oh, it is. It isn't. [BFR] It is. [AFT] Isn't!",neutral,multimodal-datasets/MELD/raw-audios/test_resam...
3847,It isn't. It is. [BFR] Isn't! [AFT],anger,multimodal-datasets/MELD/raw-audios/test_resam...
3848,[BFR] Yeah baby! [AFT] I’m really glad you gu...,joy,multimodal-datasets/MELD/raw-audios/test_resam...
3849,Yeah baby! [BFR] I’m really glad you guys are ...,neutral,multimodal-datasets/MELD/raw-audios/test_resam...


In [139]:
import os

In [140]:
not_file = []
for i, row in df_train.iterrows():
    if not os.path.isfile(f"../{row['path']}"):
        not_file.append(row['path'])

In [141]:
not_file_test = []
for i, row in df_test.iterrows():
    if not os.path.isfile(f"../{row['path']}"):
        not_file_test.append(row['path'])

In [142]:
not_file

['multimodal-datasets/MELD/raw-audios/train_resampled/dia125_utt3.wav',
 'multimodal-datasets/MELD/raw-audios/val_resampled/dia110_utt7.wav']

In [143]:
not_file_test

[]

In [144]:
len(df_train)

13730

In [145]:
df_train = df_train[df_train["path"] != 'multimodal-datasets/MELD/raw-audios/train_resampled/dia125_utt3.wav']
df_train = df_train[df_train["path"] != 'multimodal-datasets/MELD/raw-audios/val_resampled/dia110_utt7.wav']

In [146]:
len(df_train)

13728

In [147]:
len(df_test)

3851

In [148]:
df_train

Unnamed: 0,utterance,label,path
0,The only one I know still love his parents. [B...,joy,multimodal-datasets/IEMOCAP/raw-audios/train/S...
1,The only one I know still love his parents. Ye...,neutral,multimodal-datasets/IEMOCAP/raw-audios/train/S...
2,Oh it's not bad thing it's good thing. You kno...,joy,multimodal-datasets/IEMOCAP/raw-audios/train/S...
3,"You know it's nice here, the air is sweet. You...",sadness,multimodal-datasets/IEMOCAP/raw-audios/train/S...
4,"You're not sorry you came? Not sorry, no. I c...",sadness,multimodal-datasets/IEMOCAP/raw-audios/train/S...
...,...,...,...
13725,That would be no. Come on. It doesn't taste ba...,neutral,multimodal-datasets/MELD/raw-audios/val_resamp...
13726,"Come on. It doesn't taste bad. Yeah, it's kind...",joy,multimodal-datasets/MELD/raw-audios/val_resamp...
13727,"Yeah, it's kinda sweet, sorta like, uh... Cant...",neutral,multimodal-datasets/MELD/raw-audios/val_resamp...
13728,Cantaloupe juice. Exactly. [BFR] You've tasted...,surprise,multimodal-datasets/MELD/raw-audios/val_resamp...


In [149]:
df_test

Unnamed: 0,utterance,label,path
0,"[BFR] Brian, I need help. [AFT] Babe, I don't...",sadness,multimodal-datasets/IEMOCAP/raw-audios/test/Se...
1,"Brian, I need help. [BFR] Babe, I don't know w...",neutral,multimodal-datasets/IEMOCAP/raw-audios/test/Se...
2,"Babe, I don't know what to tell you. Don't gi...",neutral,multimodal-datasets/IEMOCAP/raw-audios/test/Se...
3,"I wish I had some answers for you, babe. I me...",neutral,multimodal-datasets/IEMOCAP/raw-audios/test/Se...
4,I went to school and I got my degree. And I g...,neutral,multimodal-datasets/IEMOCAP/raw-audios/test/Se...
...,...,...,...
3846,"Oh, it is. It isn't. [BFR] It is. [AFT] Isn't!",neutral,multimodal-datasets/MELD/raw-audios/test_resam...
3847,It isn't. It is. [BFR] Isn't! [AFT],anger,multimodal-datasets/MELD/raw-audios/test_resam...
3848,[BFR] Yeah baby! [AFT] I’m really glad you gu...,joy,multimodal-datasets/MELD/raw-audios/test_resam...
3849,Yeah baby! [BFR] I’m really glad you guys are ...,neutral,multimodal-datasets/MELD/raw-audios/test_resam...


In [150]:
df_train.reset_index(drop=True).to_csv("train_text_df.csv")

In [151]:
df_test.reset_index(drop=True).to_csv("test_text_df.csv")