### This file extracts audio files from directories.
1. The audio files are stored in different folders with their respective class name.
2. The functions will extract melspectrogram and mfcc from each audio files
3. For alignment, the audio files are sliced according the `seg size`(in this sample,seg=30).
4. If the segmentation has sample size less than `seg size` but more than half of it, it will be padded with 0s. Else erased.

In [1]:
import os
import numpy as np
import librosa
import librosa.display
import matplotlib.pyplot as plt


In [2]:
np.__version__

'1.23.3'

In [3]:
root_dir = "/home/assa8945/MMC/Dataset/test_data/MOBY"
# root_dir = "D:\A\Marine Mammal Detection\datasets\AAAI23\\3classes\\audio"
seg = 60
sample_rate = 48000
# save_dir =  "D:\A\Marine Mammal Detection\datasets\\"+"100khz_seg_"+str(seg)+"_mean_padded"
save_dir = "/home/assa8945/MMC/Dataset/features/watkins_full/testsets"
if os.path.exists(save_dir):
    pass
else:
    os.mkdir(save_dir)

In [4]:
def slicing_and_padding_single_data(data,seg=30):
    """_summary_

    Args:
        data_list (_type_): A data list that contains 2D arrays data.
        seg (int, optional): The length of each sliced data. Defaults to 30.
    """
    new_data_list = []

    data_length = data.shape[0] # The length of original 2D data sample.
    seg_counter = 0
    while data_length > 0:
        if data_length>=seg:

            new_data_list.append(data[seg_counter:seg_counter + seg,:])
            data_length -= seg
            seg_counter += seg

        elif data_length >= seg/2:  #when the left data_length is less than seg size but large than half of seg size, pad it up to seg size
            
           # pad the sample mean
            average = data.mean(axis=0)
            missing_length = seg-data_length
            averages_pads = np.tile(average,(missing_length,1))
            padded_data = np.vstack((data[seg_counter:,:],averages_pads))
            # print(padded_data.shape)

            #pad 0s 
            # pad = np.zeros((seg-data_length,data.shape[1]))
            # padded_data = np.vstack((data[seg_counter:,:],pad))


            #pad last frame
            # last_frame = data[-1,:]
            # missing_length = seg-data_length
            # pad = np.tile(last_frame,(missing_length,1))
            # padded_data = np.vstack((data[seg_counter:,:],pad))



            # print(padded_data.shape)
            new_data_list.append(padded_data)
            break
        else: #when the row number is less than half of the segment , pass the rows.
            break
    return new_data_list


In [5]:
def batch_feature_extraction(dir,sample_rate,seg=seg,):
    """_summary_

    Args:
        dir (_type_): Root folder directory that contains differents classes' folders.

    Returns:
        _type_: spectrogram list, mfcc list
    """
 
    # Save the folders' directories to a list according to the classes.
    
    folder_list = []
    class_list = []
    for root,folders,files in os.walk(dir):
        for folder in folders:

            folder_list.append(os.path.join(root,folder))
            class_list.append(folder)
    # print(folder_list)

    print(class_list)
    for folder_dir in folder_list:
        
        specs = []
        mfccs = []
        print('going through folder',folder_dir)
        for file in os.listdir(folder_dir):
            
            if file.endswith(".csv"):
                continue

            audio_name = file
            print('processing file',audio_name)
            audio_dir = os.path.join(folder_dir,audio_name)
            # print(audio_dir)

            y, sr = librosa.load(audio_dir,sr=sample_rate) #default sample rate 22050
            if len(y)<2048:
                print('The length of audio is less than 2048 samples ',audio_dir)
                continue
            
            # Calculate spectrogram
            spec = librosa.feature.melspectrogram(y=y,sr=sr,n_mels=240)
            spec = librosa.power_to_db(spec)
            spec = spec.T
            # print(spec.shape)
            modified_spec = slicing_and_padding_single_data(spec,seg=seg)
            specs.extend(modified_spec)           
            
            # Calculate mfcc
            mfcc = librosa.feature.mfcc(y=y,sr=sr,n_mfcc=40)
            mfcc = mfcc.T
            # print(mfcc.shape)
            modified_mfcc = slicing_and_padding_single_data(mfcc,seg=seg)
            
            mfccs.extend(modified_mfcc)

#-------------------------------------------------------------------------------------------
# Assert that all the data has correct shape
        assert len(specs)==len(mfccs),AssertionError
        print('There are {} sliced and padded data samples under \n{}\n'.format(len(specs),folder_dir))
        
        for i in specs:
            assert i.shape[0]==seg,print(i.shape)
        for j in mfccs:
            assert j.shape[0]==seg,print(j.shape)
#-------------------------------------------------------------------------------------------


#-------------------------------------------------------------------------------------------
# Save the data into csv files to respective directories
        specs = np.array(specs)
        # print(specs.shape)
        mfccs = np.array(mfccs)
        # print(mfccs.shape)
        reshaped_specs = specs.reshape(specs.shape[0],-1)
        reshaped_mfccs = mfccs.reshape(mfccs.shape[0],-1)

        np.savetxt(os.path.join(folder_dir,'mfccs.csv'), reshaped_mfccs, delimiter=",")
        np.savetxt(os.path.join(folder_dir,'specs.csv'), reshaped_specs, delimiter=",")


batch_feature_extraction(root_dir,sample_rate=sample_rate,seg=seg)

['Fin Whale', 'Bowhead Whale']
going through folder /home/assa8945/MMC/Dataset/test_data/MOBY/Fin Whale
processing file 93-001-1723.ch11.wav
processing file 93-002-1114.ch07.wav
processing file 93-055-0536.ch10.wav
processing file 93-001-1450.ch11.wav
processing file 93-042-1013.ch06.wav
processing file 93-050-1654.ch10.wav
processing file 93-002-0244.ch11.wav
processing file 93-002-0830.ch11.wav
processing file 93-040-1235.ch10.wav
processing file 93-050-1234.ch10.wav
processing file 93-001-1217.ch04.wav
processing file 93-002-0551.ch11.wav
processing file 93-042-0015.ch10.wav
processing file 93-041-0244.ch16.wav
processing file 93-050-1942.ch10.wav
There are 117955 sliced and padded data samples under 
/home/assa8945/MMC/Dataset/test_data/MOBY/Fin Whale

going through folder /home/assa8945/MMC/Dataset/test_data/MOBY/Bowhead Whale
processing file B88042519.1936.wav
processing file B88041902.0347.wav
processing file B88042107.0509b.wav
processing file B88041903.0424.wav
processing file

In [6]:
def merge_and_labels(root_dir,seg):
    folders = os.listdir(root_dir)
    labels = []
    collected_specs = np.empty((1,seg,240))
    collected_mfccs = np.empty((1,seg,40))
    collected_y = np.empty((1,1))
    for folder in folders:
        if folder.endswith('csv'):
            continue 
        folder_dir = os.path.join(root_dir,folder)
        
        specs = np.loadtxt(os.path.join(folder_dir,"specs.csv"), delimiter=",")
        mfccs = np.loadtxt(os.path.join(folder_dir,"mfccs.csv"), delimiter=",")
        
        # Restore data to 3D
        specs = specs.reshape(-1,seg,240)
        mfccs = mfccs.reshape(-1,seg,40)
        y = np.full((specs.shape[0],1),folder)

        # Stack them up
        collected_specs = np.vstack((collected_specs,specs))
        collected_mfccs = np.vstack((collected_mfccs,mfccs))
        collected_y = np.vstack((collected_y,y))
        
        labels.append((folder,folder))

    # Remove the empty sample 0 (created for initialization)    
    collected_specs = np.delete(collected_specs,0,0)
    collected_mfccs = np.delete(collected_mfccs,0,0)
    collected_y = np.delete(collected_y,0,0)


    print(labels) 
    
    print('\nThe overall mfcc data has shape of ',collected_mfccs.shape)

    print('\nThe overall spectrogram data has shape of ',collected_specs.shape)
    print('\nThe labels have shape of ',collected_y.shape)

    return collected_specs,collected_mfccs,collected_y

collected_specs,collected_mfccs,collected_y = merge_and_labels(root_dir,seg)

[('Fin Whale', 'Fin Whale'), ('Bowhead Whale', 'Bowhead Whale')]

The overall mfcc data has shape of  (124158, 60, 40)

The overall spectrogram data has shape of  (124158, 60, 240)

The labels have shape of  (124158, 1)


In [7]:
# The 3D arrays are reshaped into 2D arrays so can be save to csv files
reshaped_mfccs=collected_mfccs.reshape(collected_mfccs.shape[0],-1)
reshaped_specs=collected_specs.reshape(collected_specs.shape[0],-1)
print(collected_mfccs.shape)

if os.path.exists(save_dir):
    pass
else:
    os.mkdir(save_dir)

print("saving data to: ",save_dir)

np.savez(os.path.join(save_dir,'mfccs.npy.npz'),*collected_mfccs)
np.savez(os.path.join(save_dir,'specs.npy.npz'),*collected_specs)
np.savez(os.path.join(save_dir,'specs_labeled.npy.npz'),collected_specs,collected_y)
# np.savetxt(os.path.join(save_dir,'labels.csv'), collected_y, delimiter=",")

print('completed')

(124158, 60, 40)
saving data to:  /home/assa8945/MMC/Dataset/features/watkins_full/testsets
completed
