In [1]:
# Importing necessary libraries
import os  # Operating system functionalities
import librosa  # Audio processing library
import wave  # Module for reading and writing WAV files
import numpy as np  # Numerical operations library
import pandas as pd  # Data manipulation library
import matplotlib.pyplot as plt  # Plotting library

# Importing components for Dividing into the Training Set and the Testing Set
from sklearn.model_selection import train_test_split  # Splitting the dataset for training and testing

# Importing components for Long Short-Term Memory (LSTM) Classifier
import keras  # High-level neural networks API
from tensorflow.keras.utils import to_categorical  # Utility for one-hot encoding
from keras.models import Sequential  # Sequential model for stacking layers
from keras.layers import *  # Different layers for building neural networks
from tensorflow.keras.optimizers import RMSprop

In [2]:
def extract_mfcc(wav_file_name):
    '''This function retrieves the mean of MFCC features from an input WAV file located 
    at the specified path. The input is the path to the WAV file, and the output is 
    the resulting MFCC features.'''
    
    # Loading the WAV file using librosa and obtaining the audio signal (y) and sampling rate (sr)
    y, sr = librosa.load(wav_file_name)
    
    # Extracting MFCC features with a total of 40 coefficients, and computing the mean across dimensions
    mfccs = np.mean(librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40).T, axis=0)
    
    # Returning the resulting MFCC features
    return mfccs

In [3]:
import os

In [4]:
# Lists to store labels and extracted MFCC features for the Ravdess emotional speech dataset
ravdess_speech_labels = []  
ravdess_speech_data = []

# Iterating through all the files in the archive directory
for dirname, _, filenames in os.walk('/Users/violinadoley/Downloads/archive'):
    for filename in filenames:
        # Ensure the file is a WAV file
        if filename.endswith('.wav'):
            # Extracting emotion label from the filename and converting to an integer
            ravdess_speech_labels.append(int(filename[7:8]) - 1)
            
            # Obtaining the full path of the WAV file
            wav_file_name = os.path.join(dirname, filename)
            
            # Extracting MFCC features from the WAV file using the previously defined function
            ravdess_speech_data.append(extract_mfcc(wav_file_name))


In [5]:
# Converting the list of MFCC features into a NumPy array
ravdess_speech_data_array = np.asarray(ravdess_speech_data)

# Converting the list of emotion labels into a NumPy array
ravdess_speech_label_array = np.array(ravdess_speech_labels)

# Converting the integer labels into categorical format using one-hot encoding
labels_categorical = to_categorical(ravdess_speech_label_array)

# Displaying the shapes of the MFCC data array and the categorical label array
ravdess_speech_data_array.shape, labels_categorical.shape


ValueError: zero-size array to reduction operation maximum which has no identity