In [1]:
# %% [code]
import json 
import os
import math
import librosa
import warnings
warnings.simplefilter("ignore")

# %% [code]
DATASET_PATH = "../input/birdsong-recognition/train_audio/"
JSON_PATH = "dataset.json"
SAMPLE_RATE = 22050

TO_PROCESS = ["aldfly", "dowwoo","hamfly","robgro","scatan"] # selection of species (to diminish computation time)

# %% [code]
def save_mfcc(dataset_path, json_path, num_mfcc=13, n_fft=2048, hop_length=512, segment_duration=6):
    """Extracts MFCCs from music dataset and saves them into a json file along witgh genre labels.

        :param dataset_path (str): Path to dataset
        :param json_path (str): Path to js on file used to save MFCCs
        :param num_mfcc (int): Number of  coefficients to extract
        :param n_fft (int): Interval we consider to apply FFT. Measured in # of samples
        :param hop_length (int): Sliding window for FFT. Measured in # of samples
        :param: num_segments (int): Number of segments we want to divide sample tracks into
        :return:
        """

    # dictionary to store mapping, labels, and MFCCs
    data = {
        "mapping": [], # genres
        "labels": [], # a number (corresponding to a genres) : targets that we expect
        "mfcc": [] # will bbe the inputs
    }
    

    # loop through all genre sub-folder
    for i, (dirpath, dirnames, filenames) in enumerate(os.walk(dataset_path)):

        # ensure we're processing a genre sub-folder level
        if dirpath is not dataset_path:

            # save genre label (i.e., sub-folder name) in the mapping
            semantic_label = dirpath.split('/')[-1]
            
            # Proceed to data extraction only for few species
            if semantic_label in TO_PROCESS:
            
                data["mapping"].append(semantic_label)
                print("\nProcessing: {}".format(semantic_label))

                # process all audio files in genre sub-dir
                
                num_file = 0
                for f in filenames:
                    num_file += 1
                    
                    # audio file
                    file_path = os.path.join(dirpath, f)
                    signal, sample_rate = librosa.load(file_path, sr=SAMPLE_RATE) # audio file in array

                    audio_duration = librosa.get_duration(signal, sr=SAMPLE_RATE) # different duration for each sample
                    num_segments = int(audio_duration // segment_duration) # number of segments the audio can be cut into
                    # we want audios of the same duration to allow comparisons

                    samples_per_segment = int(SAMPLE_RATE * segment_duration)
                    num_mfcc_vectors_per_segment = math.ceil(samples_per_segment / hop_length)
                    # that will be what we study

                    # process all segments of audio file
                    for d in range(num_segments):

                        # calculate start and finish sample for current segment
                        start = samples_per_segment * d  # sample at which the segment begin
                        finish = start + samples_per_segment # sample at which the segment stops

                        # extract mfcc (what we will use)
                        mfcc = librosa.feature.mfcc(signal[start:finish], 
                                                    sample_rate, 
                                                    n_mfcc=num_mfcc, 
                                                    n_fft=n_fft, 
                                                    hop_length=hop_length)
                        mfcc = mfcc.T


                        # store only mfcc feature with expected number of vectors
                        if len(mfcc) == num_mfcc_vectors_per_segment: 
                            data["mfcc"].append(mfcc.tolist())
                            data["labels"].append(i-1)
                            # print("{}, segment:{}".format(file_path, d+1), end = '\r', flush=True)
                            print("processing file {} on {}".format(num_file,len(filenames)), end = '\r', flush=True)

    # save MFCCs to json file
    with open(json_path, "w") as fp:
        json.dump(data, fp, indent = 4)
        print("Data successfully saved !")       


# %% [code]
##### Main program #####
save_mfcc(DATASET_PATH, JSON_PATH, segment_duration=6)


# data generated with a selection of species from folders : TO_PROCESS = ["aldfly", "dowwoo","hamfly","robgro","scatan"]

Data successfully saved !
