In [1]:
import os
import librosa
import math
import json

In [3]:
DATASET_PATH = "data"
JSON_PATH = "data_0_0003.json"
SAMPLE_RATE = 22050
DURATION = 5 # measured in seconds
SAMPLES_PER_TRACK = SAMPLE_RATE * DURATION

In [5]:
def save_mfcc(dataset_path, json_path, n_mfcc=13, n_fft=2048, hop_length=512, num_segments=1):

    # Dictionary to store data
    data = {
        "mapping": [],
        "mfcc": [],
        "labels": []
    }

    num_samples_per_segment = int(SAMPLES_PER_TRACK / num_segments);
    expected_num_mfcc_vectors_per_segment = math.ceil(num_samples_per_segment / hop_length) # ceil => ex:- 1.2 -> 2

    # loop through all the genres
    for i, (dir_path, dir_names, file_names) in enumerate(os.walk(dataset_path)):

        # ensure we are not at the root class
        if dir_path is not dataset_path:

            # save the semantic label
            dir_path_components = dir_path.split("\\")
            semantic_label = dir_path_components[-1]
            data["mapping"].append(semantic_label)
            print("\nProcessing {}".format(semantic_label))

            # Process files for a specific genre
            for f in file_names:
                
                # load audio file
                file_path = os.path.join(dir_path, f)
                # signal, sr = librosa.load(file_path, sr=SAMPLE_RATE, duration=DURATION)
                
                try:
                    signal, sr = librosa.load(file_path, sr=SAMPLE_RATE, duration=DURATION)
                except Exception as e:
                    # Handle the exception
                    print(f"An error occurred: {e}")
                    continue
                
                # process segments, extracting MFCCS and storing the data
                for s in range(num_segments):
                    start_sample = num_samples_per_segment * s # 
                    finish_sample = start_sample + num_samples_per_segment
                    
 
                    mfcc = librosa.feature.mfcc(y=signal[start_sample:finish_sample],
                                                sr=sr,
                                                n_fft=n_fft,
                                                n_mfcc=n_mfcc,
                                                hop_length=hop_length
                                               )
                    mfcc = mfcc.T

                    # store MFCC for segment if it has the expected length
                    # print(f"SAMPLES_PER_TRACK: {SAMPLES_PER_TRACK} - {len(mfcc)}")
                    
                    if len(mfcc) == expected_num_mfcc_vectors_per_segment:
                        data["mfcc"].append(mfcc.tolist())
                        data["labels"].append(i-1)
                        print("label: {}, {}, segment:{}".format(i-1, file_path, s+1))

    with open(json_path, "w") as fp:
        json.dump(data, fp, indent=4)
        print(f"\n✅ MFCC data successfully saved to {json_path}")

In [74]:
def save_mfcc_without_seg(dataset_path, json_path, n_mfcc=13, n_fft=2048, hop_length=512):

    # Dictionary to store data
    data = {
        "mapping": [],
        "mfcc": [],
        "labels": []
    }

    # loop through all the genres
    for i, (dir_path, dir_names, file_names) in enumerate(os.walk(dataset_path)):

        # ensure we are not at the root class
        if dir_path is not dataset_path:

            # save the semantic label
            dir_path_components = dir_path.split("\\")
            semantic_label = dir_path_components[-1]
            data["mapping"].append(semantic_label)
            print("\nProcessing {}".format(semantic_label))

            # Process files for a specific genre
            for f in file_names:
                
                # load audio file
                file_path = os.path.join(dir_path, f)
                # signal, sr = librosa.load(file_path, sr=SAMPLE_RATE, duration=DURATION)
                
                try:
                    signal, sr = librosa.load(file_path, sr=SAMPLE_RATE, duration=DURATION)
                except Exception as e:
                    # Handle the exception
                    print(f"An error occurred: {e}")
                    continue
                
                # process extracting MFCCS and storing the data
                mfcc = librosa.feature.mfcc(y=signal,
                                                sr=sr,
                                                n_fft=n_fft,
                                                n_mfcc=n_mfcc,
                                                hop_length=hop_length
                                               )
                mfcc = mfcc.T

                # Store MFCC if it has the expected length
                print(f"SAMPLES_PER_TRACK: {SAMPLES_PER_TRACK} - {len(mfcc)} :: {mfcc.shape}")
                # if len(mfcc) == SAMPLES_PER_TRACK:
                data["mfcc"].append(mfcc.tolist())
                data["labels"].append(i-1)
                print("label: {}, {}".format(i-1, file_path))

    with open(json_path, "w") as fp:
        json.dump(data, fp, indent=4)
        print(f"\n✅ MFCC data successfully saved to {json_path}")

In [76]:
save_mfcc_without_seg(DATASET_PATH, JSON_PATH)


Processing abnormal
SAMPLES_PER_TRACK: 110250 - 216 :: (216, 13)
label: 0, data\abnormal\a0003.wav
SAMPLES_PER_TRACK: 110250 - 216 :: (216, 13)
label: 0, data\abnormal\a0004.wav
SAMPLES_PER_TRACK: 110250 - 216 :: (216, 13)
label: 0, data\abnormal\a0005.wav
SAMPLES_PER_TRACK: 110250 - 216 :: (216, 13)
label: 0, data\abnormal\a0006.wav
SAMPLES_PER_TRACK: 110250 - 216 :: (216, 13)
label: 0, data\abnormal\a0008.wav
SAMPLES_PER_TRACK: 110250 - 216 :: (216, 13)
label: 0, data\abnormal\a0010.wav
SAMPLES_PER_TRACK: 110250 - 216 :: (216, 13)
label: 0, data\abnormal\a0013.wav
SAMPLES_PER_TRACK: 110250 - 216 :: (216, 13)
label: 0, data\abnormal\a0014.wav
SAMPLES_PER_TRACK: 110250 - 216 :: (216, 13)
label: 0, data\abnormal\a0015.wav
SAMPLES_PER_TRACK: 110250 - 216 :: (216, 13)
label: 0, data\abnormal\a0017.wav
SAMPLES_PER_TRACK: 110250 - 216 :: (216, 13)
label: 0, data\abnormal\a0018.wav
SAMPLES_PER_TRACK: 110250 - 216 :: (216, 13)
label: 0, data\abnormal\a0020.wav
SAMPLES_PER_TRACK: 110250 - 216

KeyboardInterrupt: 