In [16]:
import pandas as pd
import numpy as np
import pathlib
from tqdm.notebook import tqdm
import librosa
import awkward as ak
import matplotlib.pyplot as plt

In [17]:
dataset_folder = pathlib.Path("RavdessAudio/RavdessAudioOnly")

In [18]:
!jupyter nbextension enable --py widgetsnbextension

Enabling notebook extension jupyter-js-widgets/extension...
      - Validating: ok


In [19]:
file_list = list(dataset_folder.iterdir())
for i, path in enumerate(file_list):
    if path.suffix != ".wav":
        file_list.pop(i)
file_list = sorted(file_list)
file_list

[WindowsPath('RavdessAudio/RavdessAudioOnly/03-01-01-01-01-01-01.wav'),
 WindowsPath('RavdessAudio/RavdessAudioOnly/03-01-01-01-01-01-02.wav'),
 WindowsPath('RavdessAudio/RavdessAudioOnly/03-01-01-01-01-01-03.wav'),
 WindowsPath('RavdessAudio/RavdessAudioOnly/03-01-01-01-01-01-04.wav'),
 WindowsPath('RavdessAudio/RavdessAudioOnly/03-01-01-01-01-01-05.wav'),
 WindowsPath('RavdessAudio/RavdessAudioOnly/03-01-01-01-01-01-06.wav'),
 WindowsPath('RavdessAudio/RavdessAudioOnly/03-01-01-01-01-01-07.wav'),
 WindowsPath('RavdessAudio/RavdessAudioOnly/03-01-01-01-01-01-08.wav'),
 WindowsPath('RavdessAudio/RavdessAudioOnly/03-01-01-01-01-01-09.wav'),
 WindowsPath('RavdessAudio/RavdessAudioOnly/03-01-01-01-01-01-10.wav'),
 WindowsPath('RavdessAudio/RavdessAudioOnly/03-01-01-01-01-01-11.wav'),
 WindowsPath('RavdessAudio/RavdessAudioOnly/03-01-01-01-01-01-12.wav'),
 WindowsPath('RavdessAudio/RavdessAudioOnly/03-01-01-01-01-01-13.wav'),
 WindowsPath('RavdessAudio/RavdessAudioOnly/03-01-01-01-01-01-14

In [20]:
array = list()
for path in tqdm(file_list):
    librosa_audio_segment, sr = librosa.load(path, sr=None)
    array.append([librosa_audio_segment])

  0%|          | 0/2452 [00:00<?, ?it/s]

In [21]:
b = ak.ArrayBuilder()
for path in tqdm(file_list):
    librosa_audio_segment, sr = librosa.load(path, sr=None)
    b.begin_list()
    b.begin_list()
    for value in librosa_audio_segment:
        b.real(value)
    b.end_list() 
    b.end_list() 
array = b.snapshot()

  0%|          | 0/2452 [00:00<?, ?it/s]

MemoryError: Unable to allocate 3.59 GiB for an array with shape (3853039728,) and data type uint8

In [None]:
CATEGORICAL_FEATURES_NAMES = {
    "modality": {"01": "full-AV", "02": "video-only", "03": "audio-only"}, 
    "vocal_channel": {"01": "speech", "02": "song"},
    "emotion": {"01" : "neutral", "02" : "calm", "03" : "happy", "04" : "sad", "05" : "angry", "06" : "fearful", "07" : "disgust", "08" : "surprised"},
    "emotional_intensity": {"01" : "normal", "02" : "strong"},
    "statement": {"01" : "Kids are talking by the door", "02" : "Dogs are sitting by the door"},
    "repetition": {"01" : "1st", "02" : "2nd"},
    "actor": {str(i).zfill(2): str(i).zfill(2) for i in range(1, 25)}
}

In [None]:
df = list()
file_names = list()
for path in file_list:
    stem = path.stem.split("-")
    file_names.append(path.name)
    df.append(stem)
df = pd.DataFrame(df)
df.columns = list(CATEGORICAL_FEATURES_NAMES.keys())

for column in df.columns:
    df = df.replace(CATEGORICAL_FEATURES_NAMES)

df["sex"] = ["F" if i % 2 == 0 else "M" for i in df["actor"].astype(int)]
df["filename"] = file_names

In [None]:
df.head()

Unnamed: 0,modality,vocal_channel,emotion,emotional_intensity,statement,repetition,actor,sex,filename
0,audio-only,speech,neutral,normal,Kids are talking by the door,1st,1,M,03-01-01-01-01-01-01.wav
1,audio-only,speech,neutral,normal,Kids are talking by the door,1st,2,F,03-01-01-01-01-01-02.wav
2,audio-only,speech,neutral,normal,Kids are talking by the door,1st,3,M,03-01-01-01-01-01-03.wav
3,audio-only,speech,neutral,normal,Kids are talking by the door,1st,4,F,03-01-01-01-01-01-04.wav
4,audio-only,speech,neutral,normal,Kids are talking by the door,1st,5,M,03-01-01-01-01-01-05.wav


In [None]:
df_sorted_actor = df.sort_values(by=["actor", "filename"])
idxs_sorted = list(df_sorted_actor.index)

In [None]:
idx_train = list(df_sorted_actor[df_sorted_actor.actor.astype(int) < 19].index)
idx_test = list(df_sorted_actor[df_sorted_actor.actor.astype(int) >= 19].index)

In [None]:
X_train = array[idx_train]
X_test = array[idx_test]
Y_train = df_sorted_actor[df_sorted_actor.actor.astype(int) < 19]
Y_test = df_sorted_actor[df_sorted_actor.actor.astype(int) >= 19]

In [None]:
def pad_X(X, m_max, nan_value=0):
    return ak.fill_none(ak.pad_none(X, m_max, axis=2, clip=True), value=nan_value)

In [None]:
# find the max length of X_train
maximum = 0
for ts in X_train:
    length = len(np.asarray(np.ravel(ts)))
    if length > maximum:
        maximum = length
maximum

304304

In [None]:
%%time
X_train = (np.squeeze(np.array(pad_X(X_train, maximum, np.nan)))).astype((np.float32))
X_test = (np.squeeze(np.array(pad_X(X_test, maximum, np.nan)))).astype((np.float32))

MemoryError: Unable to allocate 4.14 GiB for an array with shape (556267712,) and data type float64

In [None]:
print(X_train.shape)
print(X_test.shape)
print(Y_train.shape)
print(Y_test.shape)

(1828, 304304)
(624, 304304)
(1828, 9)
(624, 9)


In [None]:
np.save("RavdessAudioOnlyNumpy__X_train32.npy", X_train)
np.save("RavdessAudioOnlyNumpy__X_test32.npy", X_test)

In [None]:
Y_train.to_csv("RavdessAudioOnlyNumpy__Y_train.csv", index=False)
Y_test.to_csv("RavdessAudioOnlyNumpy__Y_test.csv", index=False)