In [1]:
import os
import pandas as pd
import numpy as np
import seaborn as sns

from pathlib import Path

## Read data

In [2]:
parent_folder = "../data/all_datasets"

emotions = ["Anger", "Disgust", "Fear", "Happy", "Neutral", "Sad"]

In [3]:
def get_all_samples_names(parent_folder):
    all_samples = []

    for train_test in ["train", "test"]:
        parent_folder = os.path.join(parent_folder, train_test)
        for emotion in emotions:
            all_samples.append(os.listdir(os.path.join(parent_folder, emotion)))
        parent_folder = Path(parent_folder).parent
    
    return all_samples

In [4]:
def get_all_samples_names_per_emotion(parent_folder):
    current_samples = []
    all_samples = []
    train_samples = []
    test_samples = []
    df = pd.DataFrame()
    df_train = pd.DataFrame()
    df_test = pd.DataFrame()


    for emotion in emotions:
        train_path = os.path.join(parent_folder, "train")    # Train
        current_samples.append(os.listdir(os.path.join(train_path, emotion)))
        test_path = os.path.join(parent_folder, "test")    # Test
        current_samples[0].extend(os.listdir(os.path.join(test_path, emotion)))
        
        df[emotion] = current_samples
        current_samples = []
 
    return df

In [5]:
get_all_samples_names_per_emotion(parent_folder)

Unnamed: 0,Anger,Disgust,Fear,Happy,Neutral,Sad
0,"[03-01-05-01-02-01-16.wav, 1022_ITS_ANG_XX.wav...","[1073_IOM_DIS_XX.wav, 1066_IOM_DIS_XX.wav, OAF...","[03-01-06-01-02-02-02.wav, 1029_TAI_FEA_XX.wav...","[YAF_wire_happy.wav, OAF_bought_happy.wav, 100...","[1060_ITS_NEU_XX.wav, 1075_ITS_NEU_XX.wav, OAF...","[1039_IEO_SAD_MD.wav, JK_sa01.wav, 1042_ITS_SA..."


In [6]:
def get_all_samples_names_per_emotion_x(parent_folder):
    current_samples = []
    all_samples = []
    train_samples = []
    test_samples = []
    df = pd.DataFrame()
    df_train = pd.DataFrame()
    df_test = pd.DataFrame()
    
    counter = 0
    columns = []
    list_of_emotion_samples = []

    data = pd.DataFrame()

    for emotion in emotions:
        columns.append(emotion)
        train_path = os.path.join(parent_folder, "train")    # Train
        current_samples.append(os.listdir(os.path.join(train_path, emotion)))
        test_path = os.path.join(parent_folder, "test")    # Test
        current_samples[0].extend(os.listdir(os.path.join(test_path, emotion)))
        
        list_of_emotion_samples.append(current_samples)
        df[emotion] =  pd.Series(current_samples)
        data = data.assign(emotion=current_samples)
        current_samples = []

    print(len(list_of_emotion_samples))

    return df

In [7]:
samples_per_emotion = get_all_samples_names_per_emotion_x(parent_folder)

6


In [8]:
def count_samples_per_database(list_of_all_samples):

    tess_count = 0
    cremad_count = 0
    ravdess_count = 0
    savee_count = 0

    for sample in list_of_all_samples:
        if("_" in sample):
            if("OAF" in sample.split("_")[0] or "YAF" in sample.split("_")[0]):     # TESS
                tess_count += 1
            elif(sample.split("_")[0] in ["DC", "JE", "KL", "JK"]):                 # SAVEE
                savee_count += 1
            else:                                                                   # CREMA-D
                cremad_count += 1
        elif("-" in sample):                                                        # RAVDESS
            ravdess_count += 1                                                    

    dictionary = pd.DataFrame({
        "tess_count": [tess_count],
        "cremad_count ": [cremad_count],
        "ravdess_count": [ravdess_count],
        "savee_count": [savee_count]
    })

    return dictionary

In [9]:
def present_results(samples_per_emotion):

    for emotion in emotions:
        print(f"---------------------------------{emotion}---------------------------------")
        print(count_samples_per_database(samples_per_emotion[emotion][0]))

    print("\n_______________________________________________________________________")
    
    for emotion in emotions:
        print(f"{emotion}: {len(samples_per_emotion[emotion][0])}")

In [10]:
samples_per_emotion = get_all_samples_names_per_emotion(parent_folder)

In [11]:
samples_per_emotion

Unnamed: 0,Anger,Disgust,Fear,Happy,Neutral,Sad
0,"[03-01-05-01-02-01-16.wav, 1022_ITS_ANG_XX.wav...","[1073_IOM_DIS_XX.wav, 1066_IOM_DIS_XX.wav, OAF...","[03-01-06-01-02-02-02.wav, 1029_TAI_FEA_XX.wav...","[YAF_wire_happy.wav, OAF_bought_happy.wav, 100...","[1060_ITS_NEU_XX.wav, 1075_ITS_NEU_XX.wav, OAF...","[1039_IEO_SAD_MD.wav, JK_sa01.wav, 1042_ITS_SA..."


In [12]:
present_results(samples_per_emotion)

---------------------------------Anger---------------------------------
   tess_count  cremad_count   ravdess_count  savee_count
0         400           1271            192           60
---------------------------------Disgust---------------------------------
   tess_count  cremad_count   ravdess_count  savee_count
0         400           1271            192           60
---------------------------------Fear---------------------------------
   tess_count  cremad_count   ravdess_count  savee_count
0         400           1271            192           60
---------------------------------Happy---------------------------------
   tess_count  cremad_count   ravdess_count  savee_count
0         400           1271            192           60
---------------------------------Neutral---------------------------------
   tess_count  cremad_count   ravdess_count  savee_count
0         399           1088             96          120
---------------------------------Sad-------------------------------

In [13]:
all_samples = get_all_samples_names(parent_folder)
all_samples = [x for xs in all_samples for x in xs]

samples_number_per_dataset = count_samples_per_database(all_samples)
samples_number_per_dataset

Unnamed: 0,tess_count,cremad_count,ravdess_count,savee_count
0,2399,7443,1056,420
