# Extract MMFCC Features from .wav audio files using Google Colab GPU

In [23]:
# import packages
import os
import librosa
import numpy as np
import pickle
from tqdm import tqdm

ModuleNotFoundError: No module named 'librosa'

setting up directories

In [None]:
base_dir = r'AUDIO_PATH'    # path with hate_audio.wav and non_hate_audio.wav
hate_audio_dir = os.path.join(base_dir, 'hate_audio_wav')
non_hate_audio_dir = os.path.join(base_dir, 'non_hate_audio_wav')
output_dir = os.path.join(base_dir, 'MFCCF')
os.makedirs(output_dir, exist_ok=True)

Defining method extracting mfcc

In [None]:
def extract_mfccs(file_path):
    """
    Extract MFCC features from a single audio file.
    @Params: file_path(str): The path to the audio file.
    @Output: np.ndarray: A 1D array of averaged MFCC features, or a zero-filled array if an error occurs.
    """
    try:
        audio, sr = librosa.load(file_path)
        mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=40)
        mfccs_mean = np.mean(mfccs.T, axis=0)
        return mfccs_mean
    except Exception as e:
        print(f"Error processing {file_path}: {e}")
        return np.zeros(40)

#MFCC mean: mfccs_mean refers to the mean of the MFCCs across the time axis. After computing the MFCCs for an audio file, these are typically represented as a matrix where each row corresponds to a cepstral coefficient and each column to a frame in the audio. By computing the mean of each coefficient over all frames (np.mean(mfccs.T, axis=0)), you get mfccs_mean, a simplified representation of the audio's overall spectral shape. This averaging is often done to reduce the data size and focus on the most prominent features of the sound.

#sr: sr stands for sample rate. It refers to the number of samples of audio carried per second, measured in Hertz (Hz). A common sample rate for audio files is 44,100 Hz, which means that the audio file contains 44,100 samples per second. This is a standard value for CD-quality audio, which is enough to capture the audible frequency range for humans (approximately 20 Hz to 20 kHz).

Defining method browsing through audio folder and processing audio files

In [None]:
def process_directory(directory, label):
    """
    Process all audio files in a directory and save MFCC features to separate pickle files.

    @Params: directory(str) diretory containing audio files.
        label(str): Label associated with the audio files ('hate' or 'non_hate').
    @Output: generate pickle file for each audio file in the directory.
    """
    for filename in tqdm(os.listdir(directory)):
        if filename.endswith('.wav'):
            file_path = os.path.join(directory, filename)
            mfccs_mean = extract_mfccs(file_path)
            pickle_filename = f"{os.path.splitext(filename)[0]}_{label}_mfcc.p"
            pickle_path = os.path.join(output_dir, pickle_filename)
            with open(pickle_path, 'wb') as fp:
                pickle.dump(mfccs_mean, fp)

Process audio and extract features

In [None]:
process_directory(hate_audio_dir, 'hate')
process_directory(non_hate_audio_dir, 'non_hate')

100%|██████████| 427/427 [00:55<00:00,  7.70it/s]
100%|██████████| 639/639 [01:14<00:00,  8.62it/s]
