# Extracting 1D features (RAVDESS-dataset)

In [5]:
import os
import librosa
import librosa.display
import numpy as np
import matplotlib.pyplot as plt
import IPython.display as ipd
from tqdm import tqdm

In [12]:
def process_files(src_folder, target_folder, n_mfccs=20):
    x_data = os.path.join(target_folder, "x_data")
    print(x_data)
    
    if not os.path.exists(x_data):
        os.mkdir(x_data)
    
    subfolders = [f.path for f in os.scandir(src_folder) if f.is_dir()]
    i = 0
    
    for subfolder in tqdm(subfolders): 
        files = os.listdir(subfolder)
        sub_labels = []
        sub_actors = []
        j = 0
        
        # for _, _, files in os.walk(src_folder):
        for file in files:
            if file.endswith(".wav"):

                # Make the five spectral features, join in tensor and save tensor as file
                y, sr = librosa.load(subfolder + '/' + file)

                #Spectrogram
                S = np.abs(librosa.stft(y))
                Y = S**2

                # mel-scaled spectrogram
                mel_spectrogram = librosa.feature.melspectrogram(S=Y, sr=sr)
                # print("Mel-spectrogram:", mel_spectrogram.shape)

                # MFCC
                mfcc = librosa.feature.mfcc(S=librosa.power_to_db(mel_spectrogram), n_mfcc=n_mfccs)
                # print("MFCC:", mfcc.shape)

                # Chromagram (using STFT)
                chromagram = librosa.feature.chroma_stft(S=S, sr=sr)
                # print("Chromagram:", chromagram.shape)

                # Spectral contrast feature
                contrast = librosa.feature.spectral_contrast(S=S, sr=sr)
                # print("Contrast:", contrast.shape)

                # Tonnetz representation
                y = librosa.effects.harmonic(y)
                tonnetz = librosa.feature.tonnetz(y=y, sr=sr)
                # print("Tonnetz:", tonnetz.shape)

                # Concatenate spectral features
                concat_features = np.concatenate((mel_spectrogram, mfcc, chromagram, contrast, tonnetz), axis=0)
                #print("CONCAT:", concat_features.shape, "\n")
                
                concat_features = np.mean(concat_features, axis=1)
                #print("MEAN:", concat_features.shape, "\n")
                
                

                sub_labels.append(int(file[7]))
                sub_actors.append(int(file[-6:-4]))
                x_file = "x"+str(i)
                np.save(os.path.join(x_data, x_file), concat_features)
                i += 1
                j += 1
                
        if i == j:
            labels = sub_labels
            actors = sub_actors
        else:
            labels = labels + sub_labels
            actors = actors + sub_actors
            
    np.save(os.path.join(target_folder, "y"), np.array(labels, dtype=np.int8))
    np.save(os.path.join(target_folder, "actors"), np.array(actors, dtype=np.int8))

 ## Processing speech

In [15]:
src_folder = 'dataset/Audio_Speech_Actors_01-24/'
#target_folder = '1D_processed_data/speech/'
target_folder = '1D_processed_data/v2/speech/'

In [16]:
process_files(src_folder, target_folder, n_mfccs=40)

1D_processed_data/v2/speech/x_data
hey


100%|██████████| 24/24 [10:16<00:00, 25.68s/it]


In [17]:
y = np.load(target_folder + 'y.npy')
folder = target_folder + 'x_data'
xes = [file for file in os.listdir(folder) if file.endswith(".npy")]
actors = np.load(target_folder + 'actors.npy')

print(y.shape)
print(actors.shape)
print(len(xes))

(1056,)
(1056,)
1056


## Processing song

In [21]:
src_folder = 'dataset/Audio_Song_Actors_01-24'
# target_folder = '1D_processed_data/song/'
target_folder = '1D_processed_data/v2/song/'

In [19]:
process_files(src_folder, target_folder, n_mfccs=40)

1D_processed_data/v2/song/x_data
hey


100%|██████████| 24/24 [11:01<00:00, 27.58s/it]


In [22]:
y = np.load(target_folder + 'y.npy')
folder = target_folder + 'x_data'
xes = [file for file in os.listdir(folder) if file.endswith(".npy")]
actors = np.load(target_folder + 'actors.npy')

print(y.shape)
print(actors.shape)
print(len(xes))

(1012,)
(1012,)
1012
