available_columns = 'emotion', 'gender', 'subset', 'file_path'

In [1]:
import os, json
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

class Loader:
    identifier = None
    
    @classmethod # returns dataframe with dataset info
    def load_dataset(cls): return None

# Load CREMA-D Normal

In [2]:
class NormalCrema(Loader):
    identifier = 'crema_normal'
    
    @classmethod
    def load_dataset(cls):
        components=[]
        path = '/datc/emo/notebooks/source/datasets/crema'
        for file in os.listdir(path):
            component = np.array(file.replace('.', '_').split('_'))
            component = np.array([component[2], None, None, os.path.join(path, file)])
            components.append(component)
        return pd.DataFrame(data=components, columns=['emotion','gender','subset','file_path'])

# Gender split CREMA-D Normal

In [3]:
class MaleSplitCrema(Loader):
    identifier = 'crema_male'
    _crema_d_female_samples = [1002,1003,1004,1006,1007,1008,1009,1010,1012,1013,1018,1020,1021,1024,1025,1028,1029,1030,1037,1043,1046,1047,1049,
          1052,1053,1054,1055,1056,1058,1060,1061,1063,1072,1073,1074,1075,1076,1078,1079,1082,1084,1089,1091]
    
    @classmethod
    def load_dataset(cls):
        components=[]
        path = '/datc/emo/notebooks/source/datasets/crema'
        for file in os.listdir(path):
            component = np.array(file.replace('.', '_').split('_'))
            
            if int(component[0]) in cls._crema_d_female_samples:
                continue
            
            component = np.array([component[2], "Male", None, os.path.join(path, file)])
            components.append(component)
        print(len(components))
        
        return pd.DataFrame(data=components, columns=['emotion','gender','subset','file_path'])

In [4]:
class FemaleSplitCrema(Loader):
    identifier = 'crema_male'
    _crema_d_female_samples = [1002,1003,1004,1006,1007,1008,1009,1010,1012,1013,1018,1020,1021,1024,1025,1028,1029,1030,1037,1043,1046,1047,1049,
          1052,1053,1054,1055,1056,1058,1060,1061,1063,1072,1073,1074,1075,1076,1078,1079,1082,1084,1089,1091]
    
    @classmethod
    def load_dataset(cls):
        components=[]
        path = '/datc/emo/notebooks/source/datasets/crema'
        for file in os.listdir(path):
            component = np.array(file.replace('.', '_').split('_'))
            
            if int(component[0]) not in cls._crema_d_female_samples:
                continue
            
            component = np.array([component[2], "Female", None, os.path.join(path, file)])
            components.append(component)
        print(len(components))
        
        return pd.DataFrame(data=components, columns=['emotion','gender','subset','file_path'])

# Load CREMA-D Splitted

In [5]:
class SplittedCrema(Loader):
    identifier = 'crema_splitted'
    
    @classmethod
    def load_dataset(cls):
        components=[]
        path = '/datc/emo/notebooks/source/datasets/crema_splitted'
        subsets = ['Test', 'Train', 'Validate']
        for subset in subsets:
            subset_path = f'{path}/{subset}'
            for file in os.listdir(subset_path):
                component = np.array(file.replace('.', '_').split('_'))
                component = np.array([component[2], None, subset, os.path.join(subset_path, file)])
                components.append(component)
        return pd.DataFrame(data=components, columns=['emotion','gender','subset','file_path'])

# Load RAVDESS Normal

In [6]:
class NormalRavdess(Loader):
    identifier = 'ravdess_normal'
    _emotion_labels = {
      '01':'"neutral"',
      '02':'calm',
      '03':'happy',
      '04':'sad',
      '05':'angry',
      '06':'fearful',
      '07':'disgust',
      '08':'surprised'
    }
    
    @classmethod
    def load_dataset(cls):
        components=[]
        path = '/datc/emo/notebooks/source/datasets/ravdess'
        
        for file in os.listdir(path):
            component = np.array(file.replace('.', '-').split('-'))
            component = np.array([cls._emotion_labels[component[2]], None, None, os.path.join(path, file)])
            components.append(component)
            
        return pd.DataFrame(data=components, columns=['emotion','gender','subset','file_path'])

# Gender split RAVDESS Normal

In [7]:
class MaleRavdess(Loader):
    identifier = 'ravdess_normal'
    _emotion_labels = {
      '01':'"neutral"',
      '02':'calm',
      '03':'happy',
      '04':'sad',
      '05':'angry',
      '06':'fearful',
      '07':'disgust',
      '08':'surprised'
    }
    
    @classmethod
    def load_dataset(cls):
        components=[]
        path = '/datc/emo/notebooks/source/datasets/ravdess'
        
        for file in os.listdir(path):
            component = np.array(file.replace('.', '-').split('-'))
            
            if int(component[6]) % 2 == 0:
                continue
            
            component = np.array([cls._emotion_labels[component[2]], "Male", None, os.path.join(path, file)])
            components.append(component)
            
        return pd.DataFrame(data=components, columns=['emotion','gender','subset','file_path'])

In [8]:
class NormalRavdess(Loader):
    identifier = 'ravdess_normal'
    _emotion_labels = {
      '01':'"neutral"',
      '02':'calm',
      '03':'happy',
      '04':'sad',
      '05':'angry',
      '06':'fearful',
      '07':'disgust',
      '08':'surprised'
    }
    
    @classmethod
    def load_dataset(cls):
        components=[]
        path = '/datc/emo/notebooks/source/datasets/ravdess'
        
        for file in os.listdir(path):
            component = np.array(file.replace('.', '-').split('-'))
            
            if int(component[6]) % 2 != 0:
                continue
                
            component = np.array([cls._emotion_labels[component[2]], "Female", None, os.path.join(path, file)])
            components.append(component)
            
        return pd.DataFrame(data=components, columns=['emotion','gender','subset','file_path'])

# Load RAVDESS Splitted

In [9]:
class SplittedRavdess(Loader):
    identifier = 'ravdess_splitted'
    
    @classmethod
    def load_dataset(cls):
        components=[]
        path = '/datc/emo/notebooks/source/datasets/ravdess_splitted'
        subsets = ['Test', 'Train', 'Validate']
        return pd.DataFrame(data=components, columns=['emotion','gender','subset','file_path'])

# Load JSON CREMA-D

In [10]:
class CremadPLoader(Loader):
    BASE="../datasets/json"
    identifier="Crema-d Processed Loader"
    
    @classmethod
    def load_dataset(self, variant):
        dataset = {}
        train_path=f"{self.BASE}/train/crema-d/train_{variant}_crema-d_preprocessed_data.json"
        test_path=f"{self.BASE}/test/crema-d/test_{variant}_crema-d_preprocessed_data.json"
        
        with open(train_path) as train_data:
            dataset["train"] = json.load(train_data)  
        with open(test_path) as test_data:
            dataset["test"] = json.load(test_data)   
        return dataset

# Load JSON RAVDESS

In [11]:
class RavdessPLoader(Loader):
    BASE="../datasets/json"
    identifier="Ravdess Processed Loader"
    
    @classmethod
    def load_dataset(self, variant): 
        dataset = {}
        train_path=f"{self.BASE}/train/ravdess/train_{variant}_ravdess_preprocessed_data.json"
        test_path=f"{self.BASE}/test/ravdess/test_{variant}_ravdess_preprocessed_data.json"
        
        with open(train_path) as train_data:
            dataset["train"] = json.load(train_data)    
        with open(test_path) as test_data:
            dataset["test"] = json.load(test_data)
        return dataset

# Load JSON COMBINED

In [12]:
class CombinedPLoader(Loader):
    identifier="Combined Processed Loader"
    
    @classmethod
    def load_dataset(self, variant):
        crema = CremadPLoader.load_dataset(variant)
        crema_df = pd.DataFrame.from_dict(crema)

        ravdess = RavdessPLoader.load_dataset(variant)
        ravdess_df = pd.DataFrame.from_dict(ravdess)
        
        combined_df = pd.DataFrame()
        combined_df['train'] = crema_df['train'] + ravdess_df['train']
        combined_df['test'] = crema_df['test'] + ravdess_df['test']
  
        return combined_df

# Load JSON Positive and Negative RAVDESS

In [None]:
class RavdessPosNegLoader(Loader):
    BASE="../datasets/json"
    identifier="Ravdess Positive and Negative Processed Loader"
    
    @classmethod
    def load_dataset(self, variant): 
        dataset = {}
        train_path=f"{self.BASE}/train/ravdess/PN/train_{variant}_ravdess_preprocessed_data.json"
        test_path=f"{self.BASE}/test/ravdess/PN/test_{variant}_ravdess_preprocessed_data.json"
        
        with open(train_path) as train_data:
            dataset["train"] = json.load(train_data)    
        with open(test_path) as test_data:
            dataset["test"] = json.load(test_data)
        return dataset

# Load JSON Positive and Negative CREMA-D

In [None]:
class CremadPosNegLoader(Loader):
    BASE="../datasets/json"
    identifier="Crema-d Positive and Negative Processed Loader"
    
    @classmethod
    def load_dataset(self, variant):
        dataset = {}
        train_path=f"{self.BASE}/train/crema-d/PN/train_{variant}_crema-d_preprocessed_data.json"
        test_path=f"{self.BASE}/test/crema-d/PN/test_{variant}_crema-d_preprocessed_data.json"
        
        with open(train_path) as train_data:
            dataset["train"] = json.load(train_data)  
        with open(test_path) as test_data:
            dataset["test"] = json.load(test_data)   
        return dataset

# Load SAVEE Normal

In [4]:
class NormalSAVEE(Loader):
    identifier = 'savee_normal'
    
    @classmethod
    def load_dataset(cls):
        emotion = []
        gender = []
        paths = []
        subset = []
        path = '/data/emo/notebooks/source/datasets/SAVEE/'
        for i in os.listdir(path):
                if i[-8:-6] == '_a':
                    emotion.append('angry')
                elif i[-8:-6] == '_d':
                    emotion.append('disgust')
                elif i[-8:-6] == '_f':
                    emotion.append('fear')
                elif i[-8:-6] == '_h':
                    emotion.append('happy')
                elif i[-8:-6] == '_n':
                    emotion.append('neutral')
                elif i[-8:-6] == 'sa':
                    emotion.append('sad')
                elif i[-8:-6] == 'su':
                    emotion.append('surprise')
                else:
                    emotion.append('error')
                paths.append(path + i)
                gender.append('male')
                subset.append(None)
        SAVEE_df = pd.DataFrame(emotion, columns=['emotion'])
        SAVEE_df = pd.concat([SAVEE_df, pd.DataFrame(gender, columns=['gender'])], axis=1)
        SAVEE_df = pd.concat([SAVEE_df, pd.DataFrame(subset, columns=['subset'])], axis=1)
        SAVEE_df = pd.concat([SAVEE_df, pd.DataFrame(paths, columns=['file_path'])], axis=1)
        
        return pd.DataFrame(data=SAVEE_df, columns=['emotion','gender','subset','file_path'])

# Load SAVEE Splitted

In [27]:
class SplittedSavee(Loader):
    identifier = 'savee_splitted'
    
    @classmethod
    def load_dataset(cls):
        emotion = []
        gender = []
        paths = []
        subsetx = []
        path = '/datc/emo/notebooks/source/datasets/savee_splitted'
        subsets = ['Test', 'Train', 'Validate']
        for subset in subsets:
            subset_path = f'{path}/{subset}'
            for i in os.listdir(subset_path):
                if i[-8:-6] == '_a':
                    emotion.append('angry')
                elif i[-8:-6] == '_d':
                    emotion.append('disgust')
                elif i[-8:-6] == '_f':
                    emotion.append('fear')
                elif i[-8:-6] == '_h':
                    emotion.append('happy')
                elif i[-8:-6] == '_n':
                    emotion.append('neutral')
                elif i[-8:-6] == 'sa':
                    emotion.append('sad')
                elif i[-8:-6] == 'su':
                    emotion.append('surprise')
                else:
                    emotion.append('error')
                paths.append(path + i)
                gender.append('male')
                subsetx.append(subset)
        SAVEE_df = pd.DataFrame(emotion, columns=['emotion'])
        SAVEE_df = pd.concat([SAVEE_df, pd.DataFrame(gender, columns=['gender'])], axis=1)
        SAVEE_df = pd.concat([SAVEE_df, pd.DataFrame(subsetx, columns=['subset'])], axis=1)
        SAVEE_df = pd.concat([SAVEE_df, pd.DataFrame(paths, columns=['file_path'])], axis=1)
        return pd.DataFrame(data=SAVEE_df, columns=['emotion','gender','subset','file_path'])

In [33]:
for data in SplittedSavee.load_dataset().values:
    print(data)

420
['neutral' 'male' 'Test'
 '/datc/emo/notebooks/source/datasets/savee_splittedDC_n10.wav']
['angry' 'male' 'Test'
 '/datc/emo/notebooks/source/datasets/savee_splittedDC_a08.wav']
['disgust' 'male' 'Test'
 '/datc/emo/notebooks/source/datasets/savee_splittedJE_d03.wav']
['angry' 'male' 'Test'
 '/datc/emo/notebooks/source/datasets/savee_splittedJE_a06.wav']
['neutral' 'male' 'Test'
 '/datc/emo/notebooks/source/datasets/savee_splittedJE_n17.wav']
['happy' 'male' 'Test'
 '/datc/emo/notebooks/source/datasets/savee_splittedJK_h03.wav']
['sad' 'male' 'Test'
 '/datc/emo/notebooks/source/datasets/savee_splittedDC_sa02.wav']
['fear' 'male' 'Test'
 '/datc/emo/notebooks/source/datasets/savee_splittedKL_f05.wav']
['happy' 'male' 'Test'
 '/datc/emo/notebooks/source/datasets/savee_splittedJK_h02.wav']
['happy' 'male' 'Test'
 '/datc/emo/notebooks/source/datasets/savee_splittedDC_h09.wav']
['angry' 'male' 'Test'
 '/datc/emo/notebooks/source/datasets/savee_splittedDC_a06.wav']
['disgust' 'male' 'Test'

# Load TESS Normal


In [5]:
class NormalTESS(Loader):
    identifier = 'tess_normal'
    
    @classmethod
    def load_dataset(cls):
        emotion = []
        gender = []
        paths = []
        subset = []
        path = '/data/emo/notebooks/source/datasets/TESS/'
        for i in os.listdir(path):
            fname = os.listdir(path + i)
            for f in fname:
                if i == 'OAF_angry' or i == 'YAF_angry':
                    emotion.append('angry')
                elif i == 'OAF_disgust' or i == 'YAF_disgust':
                    emotion.append('disgust')
                elif i == 'OAF_Fear' or i == 'YAF_fear':
                    emotion.append('fear')
                elif i == 'OAF_happy' or i == 'YAF_happy':
                    emotion.append('happy')
                elif i == 'OAF_neutral' or i == 'YAF_neutral':
                    emotion.append('neutral')
                elif i == 'OAF_Pleasant_surprise' or i == 'YAF_pleasant_surprised':
                    emotion.append('surprise')
                elif i == 'OAF_Sad' or i == 'YAF_sad':
                    emotion.append('sad')
                else:
                    emotion.append('Unknown')
                paths.append(path + i + "/" + f)
                gender.append('female')
                subset.append(None)
        TESS_df = pd.DataFrame(emotion, columns=['emotion'])
        TESS_df = pd.concat([TESS_df, pd.DataFrame(gender, columns=['gender'])], axis=1)
        TESS_df = pd.concat([TESS_df, pd.DataFrame(subset, columns=['subset'])], axis=1)
        TESS_df = pd.concat([TESS_df, pd.DataFrame(paths, columns=['file_path'])], axis=1)
        
        return pd.DataFrame(data=TESS_df, columns=['emotion','gender','subset','file_path'])

# Load TESS Splitted

In [46]:
class SplittedTess(Loader):
    identifier = 'savee_splitted'
    
    @classmethod
    def load_dataset(cls):
        emotion = []
        gender = []
        paths = []
        subsetx = []
        path = '/datc/emo/notebooks/source/datasets/tess_splitted'
        subsets = ['Test', 'Train', 'Validate']
        for subset in subsets:
            subset_path = f'{path}/{subset}'
            for i in os.listdir(subset_path):
                if 'angry' in i:
                    emotion.append('angry')
                elif 'disgust' in i:
                    emotion.append('disgust')
                elif 'fear' in i:
                    emotion.append('fear')
                elif 'happy' in i:
                    emotion.append('happy')
                elif 'neutral' in i:
                    emotion.append('neutral')
                elif 'suprised' in i:
                    emotion.append('surprise')
                elif 'sad' in i:
                    emotion.append('sad')
                else:
                    emotion.append('Unknown')
                paths.append(subset_path)
                gender.append('female')
                subsetx.append(None)
        TESS_df = pd.DataFrame(emotion, columns=['emotion'])
        TESS_df = pd.concat([TESS_df, pd.DataFrame(gender, columns=['gender'])], axis=1)
        TESS_df = pd.concat([TESS_df, pd.DataFrame(subsetx, columns=['subset'])], axis=1)
        TESS_df = pd.concat([TESS_df, pd.DataFrame(paths, columns=['file_path'])], axis=1)
        return pd.DataFrame(data=TESS_df, columns=['emotion','gender','subset','file_path'])