### This file extracts audio files from directories.
将所有文件夹内的音频提取特征，并每一类分别存储到一个路径

In [16]:
import os
import numpy as np
import librosa
import librosa.display
import matplotlib.pyplot as plt


In [25]:
seg = 256
sample_rate = 8000
n_mels = 256
n_fft = 512
hop_length = 256
root_dir = "/data2/xiangrui_d2/MMC/Dataset/Waktins12/wav/train"
save_dir = "/data2/xiangrui_d2/MMC/Dataset/Waktins12/features/train"

if os.path.exists(save_dir):
    pass
else:
    os.mkdir(save_dir)

In [18]:
def slicing_and_padding_single_data(data,seg=30):
    """_summary_

    Args:
        data_list (_type_): A data list that contains 2D arrays data.
        seg (int, optional): The length of each sliced data. Defaults to 30.
    """
    new_data_list = []

    data_length = data.shape[0] # The length of original 2D data sample.
    seg_counter = 0
    while data_length > 0:
        if data_length>=seg:

            new_data_list.append(data[seg_counter:seg_counter + seg,:])
            data_length -= seg
            seg_counter += seg

        elif data_length >= seg/2:  #when the left data_length is less than seg size but large than half of seg size, pad it up to seg size
            
            # pad the sample mean
            # average = data.mean(axis=0)
            # missing_length = seg-data_length
            # averages_pads = np.tile(average,(missing_length,1))
            # padded_data = np.vstack((data[seg_counter:,:],averages_pads))
            # print(padded_data.shape)
        
            #pad 0s 
            pad = np.zeros((seg-data_length,data.shape[1]))
            padded_data = np.vstack((data[seg_counter:,:],pad))

            # print(padded_data.shape)
            new_data_list.append(padded_data)
            break
        else: #when the row number is less than half of the segment , pass the rows.
            break
    return new_data_list


In [19]:
def batch_feature_extraction(dir,sample_rate,seg=seg,):
    """_summary_

    Args:
        dir (_type_): Root folder directory that contains differents classes' folders.

    Returns:
        _type_: spectrogram list, mfcc list
    """
 
    # Save the folders' directories to a list according to the classes.
    
    folder_list = []
    class_list = []
    for root,folders,files in os.walk(dir):
        for folder in folders:

            folder_list.append(os.path.join(root,folder))
            class_list.append(folder)

    print(class_list)
    for folder_dir,class_name in zip(folder_list,class_list):
        
        specs = []
        # mfccs = []
        print('going through folder',folder_dir)
        for file in os.listdir(folder_dir):
            
            if file.endswith(".csv"):
                continue

            audio_name = file
            audio_dir = os.path.join(folder_dir,audio_name)
            # print(audio_dir)

            y, sr = librosa.load(audio_dir,sr=sample_rate) #default sample rate 22050
            if len(y)<2048:
                print('The length of audio is less than 2048 samples ',audio_dir)
                continue
            
            # Calculate spectrogram
            spec = librosa.feature.melspectrogram(y=y,sr=sr,n_mels=n_mels,n_fft=n_fft,hop_length=hop_length)
            spec = librosa.power_to_db(spec)
            spec = spec.T
            # print(spec.shape)
            modified_spec = slicing_and_padding_single_data(spec,seg=seg)
            specs.extend(modified_spec)           
        

#-------------------------------------------------------------------------------------------
# Assert that all the data has correct shape
        print('There are {} sliced and padded data samples under \n{}\n'.format(len(specs),folder_dir))
        
        for i in specs:
            assert i.shape[0]==seg,print(i.shape)

#-------------------------------------------------------------------------------------------
# Save the data into csv files to respective directories
        specs = np.array(specs)

        # reshaped_specs = specs.reshape(specs.shape[0],-1)
        current_labels = np.full((specs.shape[0],1),class_name)
        np.savez(os.path.join(save_dir,class_name+'_specs.npy'),specs,current_labels)
        # np.savetxt(os.path.join(folder_dir,'specs.csv'), reshaped_specs, delimiter=",")


batch_feature_extraction(root_dir,sample_rate=sample_rate,seg=seg)

['Common Dolphin', 'Finback Whale', 'Killer Whale', 'Bottlenose Dolphin', 'sperm whale', 'Pantropical Spotted Dolphin', 'Bowhead Whale', 'Short-Finned (Pacific) Pilot Whale', 'Spinner Dolphin', 'Weddell Seal', 'Long-Finned Pilot Whale', 'Humpback Whale']
going through folder /data2/xiangrui_d2/MMC/Dataset/Waktins12/wav/test/Common Dolphin
The length of audio is less than 2048 samples  /data2/xiangrui_d2/MMC/Dataset/Waktins12/wav/test/Common Dolphin/87003016.wav
The length of audio is less than 2048 samples  /data2/xiangrui_d2/MMC/Dataset/Waktins12/wav/test/Common Dolphin/58014004.wav
The length of audio is less than 2048 samples  /data2/xiangrui_d2/MMC/Dataset/Waktins12/wav/test/Common Dolphin/7500304S.wav
There are 29 sliced and padded data samples under 
/data2/xiangrui_d2/MMC/Dataset/Waktins12/wav/test/Common Dolphin

going through folder /data2/xiangrui_d2/MMC/Dataset/Waktins12/wav/test/Finback Whale
There are 519 sliced and padded data samples under 
/data2/xiangrui_d2/MMC/Dataset

In [20]:
merged_X = np.zeros((1,seg,n_mels))
merged_y = np.zeros((1,1))

for file in os.listdir(save_dir):
    curr_npy = os.path.join(save_dir,file)
    # print(curr_npy)
    curr_data = np.load(curr_npy)
    X = curr_data['arr_0']
    y = curr_data['arr_1']
    if X.shape[0] ==0:
        continue
    merged_X = np.vstack((merged_X,X))
    merged_y = np.vstack((merged_y,y))
print(merged_X.shape)
print(merged_y.shape)

(2949, 256, 256)
(2949, 1)


In [23]:
def combine_npz(root_dir,seg,n_mels):
    """ Combine all the npz files in the root_dir into one npz file

    Args:
        root_dir (_type_): _description_
    """

    combined_X = np.zeros((1,seg,n_mels))
    combined_y = np.zeros((1,1))
    npz_list = os.listdir(root_dir)
    for npz in npz_list:
        curr_data = np.load(os.path.join(root_dir,npz))
        curr_X = curr_data['arr_0']
        curr_y = curr_data['arr_1']
        combined_X = np.vstack((combined_X,curr_X))
        combined_y = np.vstack((combined_y,curr_y))
    combined_X = np.delete(combined_X,0,0)
    combined_y = np.delete(combined_y,0,0)
    print(combined_X.shape)
    print(combined_y.shape)
    np.savez(os.path.join(root_dir,'combined_data.npz'),combined_X,combined_y)

In [26]:
combine_npz(save_dir,seg=seg,n_mels=n_mels)

(6398, 256, 256)
(6398, 1)
