In [117]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import os
import glob
import tqdm



In [118]:
crema_dir = './data/CREMA-D-master'
crema_filepath = glob.glob(os.path.join(crema_dir, "AudioWAV", "*.wav"))
crema_filename = os.listdir(os.path.join(crema_dir, "AudioWAV"))

print(crema_filename[:5])

['1001_DFA_ANG_XX.wav', '1001_DFA_DIS_XX.wav', '1001_DFA_FEA_XX.wav', '1001_DFA_HAP_XX.wav', '1001_DFA_NEU_XX.wav']


In [119]:
crema_count = len(crema_filename)
print("The Number of CREMA-D Dataset:", crema_count)

The Number of CREMA-D Dataset: 7442


In [120]:
# CSV metadata load
meta_data_list = glob.glob(os.path.join(crema_dir, '*.csv')) + glob.glob(os.path.join(crema_dir, 'processedResults', '*.csv'))

for csv in meta_data_list:
    csv_file = pd.read_csv(csv, low_memory=False)
    print(csv)
    print(csv_file.head())
    print("-"*100)

./data/CREMA-D-master\finishedEmoResponses.csv
   Unnamed: 0         localid  sessionNums  queryType  questNum  pos      ttr  \
0           4  SSI_1090000584            1          1        91    4   4748.0   
1           8  SSI_1090000584            1          1        92    8   7521.0   
2          12  SSI_1090000584            1          1         1   12  10475.0   
3          16  SSI_1090000584            1          1         2   16   3914.0   
4          20  SSI_1090000584            1          1         3   20   3265.0   

   numTries  clipNum         clipName  
0         0     7443  ANG_HI_practice  
1         0     7444  SAD_MD_practice  
2         0      768  1010_MTI_SAD_XX  
3         0     2981  1037_IWL_HAP_XX  
4         0     1736  1022_IWW_FEA_XX  
----------------------------------------------------------------------------------------------------
./data/CREMA-D-master\finishedResponses.csv
   Unnamed: 0         localid  pos   ans    ttr  queryType  numTries  clipNum  \


In [121]:
feature_list = []
for fn in crema_filename:
    feature_list.append(fn.split(".wav")[0].split("_"))

In [122]:
print(np.shape(feature_list))
print(feature_list[:10])

(7442, 4)
[['1001', 'DFA', 'ANG', 'XX'], ['1001', 'DFA', 'DIS', 'XX'], ['1001', 'DFA', 'FEA', 'XX'], ['1001', 'DFA', 'HAP', 'XX'], ['1001', 'DFA', 'NEU', 'XX'], ['1001', 'DFA', 'SAD', 'XX'], ['1001', 'IEO', 'ANG', 'HI'], ['1001', 'IEO', 'ANG', 'LO'], ['1001', 'IEO', 'ANG', 'MD'], ['1001', 'IEO', 'DIS', 'HI']]


In [123]:
crema_ext = [os.path.splitext(fn)[-1] for fn in crema_filename]
crema_speaker = ["Crema_"+str(f[0]) for f in feature_list]

In [124]:
crema_sub_df = pd.read_csv(os.path.join(crema_dir, "VideoDemographics.csv"))

crema_age = []
crema_gender = []
crema_race = []
crema_accent = []
for f in feature_list:
    row = crema_sub_df['ActorID'] == int(f[0])
    
    crema_age.append(crema_sub_df['Age'][row].values[0])
    crema_gender.append(crema_sub_df['Sex'][row].values[0])
    crema_race.append(crema_sub_df['Race'][row].values[0])
    crema_accent.append(crema_sub_df['Ethnicity'][row].values[0])

In [125]:
crema_text_list = {"IEO":"It's eleven o'clock", "TIE":"That is exactly what happened", "IOM":"I'm on my way to the meeting",
                  "IWW":"I wonder what this is about", "TAI":"The airplane is almost full", "MTI":"Maybe tomorrow it will be cold",
                  "IWL":"I would like a new alarm clock", "ITH":"I think I have a doctor's appointment",
                  "DFA":"Don't forget a jacket", "ITS":"I think I've seen this before", "TSI":"The surface is slick",
                  "WSI":"We'll stop in a couple of minutes"}
crema_text = [crema_text_list[f[1]] for f in feature_list]

crema_emotion_list = {"ANG":"Anger", "DIS":"Disgust", "FEA":"Fear", "HAP":"Happy", "NEU":"Neutral", "SAD":"Sad"}
crema_emotion = [crema_emotion_list[f[2]] for f in feature_list]

crema_level_list = {"LO":"Low", "MD":"Medium", "HI":"High", "XX":"Unspecified"}
crema_level = [crema_level_list[f[3]] for f in feature_list]

In [126]:
crema_df = pd.DataFrame(index = range(0, crema_count), 
                  columns = ['Id', 'Dataset', 'Filepath', 'Filename', 'Ext', 'Speaker', 'Text', 
                             'Gender', 'Age', 'Race', 'Accent', 'Emotion'])

In [127]:
crema_df['Dataset'] = ['CREMA-D'] * crema_count
crema_df['Filepath'] = crema_filepath
crema_df['Filename'] = crema_filename
crema_df['Ext'] = crema_ext
crema_df['Speaker'] = crema_speaker
crema_df['Text'] = crema_text
crema_df['Gender'] = crema_gender
crema_df['Age'] = crema_age
crema_df['Race'] = crema_race
crema_df['Accent'] = crema_accent
crema_df['Intensity/Level'] = crema_level
crema_df['Emotion'] = crema_emotion

In [None]:
crema_df.head()
crema_df.to_csv("./dataset/crema-dataset.csv")


OSError: Cannot save file into a non-existent directory: '\dataset'