# IMPORTS FOR AUDIO HANDLING

In [3]:
# For handling file handling: 
import os

# For handling audio:
import librosa

# For handling arrays:
import numpy as np

# For handling plotting:
import matplotlib.pyplot as plt

# Getting all audio file names within folder

In [None]:
def get_file_names(folder):
    os.chdir(folder)

    # Iterate through all files in the current directory: 
    all_file_paths = []
    for file in os.listdir():
        file_path = f'{file}'
        # Only appending audio file names:
        if file_path[-3:] == 'wav' or file_path[-3:] == 'mp3':
            all_file_paths.append(f'{file}')
        
    
    # Navigating back to the previous folder:
    os.chdir('..')

    return all_file_paths

# Displaying random audio data for verification

Displaying a random audio spectrogram/melspectrogram/MFCCs array in `data` (for verifying the code's success)...

In [None]:
def display_random(data, title, xlabel, ylabel, sr=22050, hop_length=512):
    # Checking a random image from `data`...
    librosa.display.specshow(data[np.random.randint(0, len(data))], sr=sr, hop_length=hop_length)
    plt.title(title)
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    plt.colorbar()
    # NOTE: Mel bands are represented by colour
    plt.show()

# Extracting and saving audio data from audio files

In [2]:
# HELPER 1: Obtaining melspectrogram for given track:
def get_melspectrogram_for_track(audio_folder, track, sr, n_fft, hop_length, n_mels):
    # Time-domain signal:
    signal, sr = librosa.load(audio_folder + '/' + track, sr=sr)
    # Short-time Fourier transform:
    stft = librosa.core.stft(signal, hop_length=hop_length, n_fft=n_fft)
    # Spectrogram:
    spectrogram = np.abs(stft)
    # Melspectrogram:
    melspectrogram = librosa.feature.melspectrogram(S=spectrogram, sr=sr, n_fft=n_fft, hop_length=hop_length, n_mels=n_mels)
    return melspectrogram

#================================================
# HELPER 2: Obtaining MFCCs for given track:
def get_mfccs_for_track(audio_folder, track, sr, n_fft, hop_length, n_mfcc):
    # Time-domain signal:
    signal, sr = librosa.load(audio_folder + '/' + track, sr=sr)
    # MFCCs:
    mfccs = librosa.feature.mfcc(y=signal, n_fft=n_fft, hop_length=hop_length, n_mfcc=n_mfcc)
    return mfccs

#================================================
# MAIN: Obtaining audio data of given type:
def get_all_audio_data_of_given_type(df, audio_folder, storage_file_name, n_fft, hop_length, sr, n_frames, n_mfcc=None, n_mels=None):
    # NOTE: `n_frames` was chosen based on the average number of frames per audio file

    #------------------------------------
    # DETERMINING AUDIO DATA TYPE TO BE EXTRACTED AND SAVED
    
    # Only handle one audio data type at a time!
    if n_mels != None and n_mfcc != None:
        raise Exception('Desired audio data type unable to be inferred!')

    # Handle at least one audio data type!
    if n_mels != None:
        audio_data_type = 'melspectrogram'
    elif n_mfcc != None:
        audio_data_type = 'MFCC'
    else:
        raise Exception('Desired audio data type unable to be inferred!')
    
    #------------------------------------
    # LOADING/GENERATING AUDIO DATA OF GIVEN TYPE
    
    # Try to load data if available:
    try:
        data = np.load(storage_file_name, allow_pickle=True).tolist() # Remember that the data (if stored) was stored as a dictionary
        if input(f'Regenerate {audio_data_type}s?') == 'Yes':
            raise Exception
        # Returning melspectrograms and parameters as a tuple for convenience:
        return data.values()

    # If not available or if the user wants to regenerate audio data of the given type:
    except:
        # Obtaining all spectrograms first (they may have been stored previously):
        
        all_audio_data = []
        prev_i, max_i = 0, float(len(df['TRACK'])) # For progress bar
        for i, track in enumerate(df['TRACK']):
            # Progress bar with maximum length of 12 dots:
            if i // (max_i / 12) > prev_i // (max_i / 12):
                print('.', end='')
            prev_i = i
            #________________________
            # Obtaining audio data of the given type:
            if n_mels != None:
                audio_data = get_melspectrogram_for_track(audio_folder, track, sr, n_fft, hop_length, n_mels)
            if n_mfcc != None:
                audio_data = get_mfccs_for_track(audio_folder, track, sr, n_fft, hop_length, n_mfcc)

            #............
            # Making sure each audio data array obtained has the same dimensionality...
            # NOTE: This is so the dataset as a whole, i.e. `all_audio_data` can be converted to an array and easily stored/handled
            
            # Pad audio data array if necessary:
            if audio_data.shape[1] < n_frames:
                audio_data = np.pad(audio_data, ((0, 0), (0, n_frames-audio_data.shape[1])))
            # Truncate audio data array if necessary
            audio_data = audio_data[:, :n_frames]
            #............     
            
            # Append audio data to the list:
            all_audio_data.append(audio_data)
        
        #------------------------------------
        # STORING GENERATED DATA
        
        print(f'\nRegenerated {audio_data_type}s!')

        # Storing parameters:
        params = {'n_fft':n_fft, 'hop_length':hop_length, 'sr':sr, 'n_frames':n_frames}
        if n_mels != None:
            params['n_mels'] = n_mels
        if n_mfcc != None:
            params['n_mfcc'] = n_mfcc
    
        # Saving the obtained data for future use:
        data = {'all_audio_data':np.array(all_audio_data), 'params':params}
        try:
            np.save(storage_file_name, data)
        except:
            print('File was too large to be saved. You can save the return value using the module `pickle`.')

        # Returning melspectrograms and parameters as a tuple for convenience:
        return data.values()

Wrapper functions for obtaining audio data of a particular type...

In [None]:
def get_all_mfccs(df, audio_folder, mfcc_storage_file_name, n_fft=2048, hop_length=512, sr=22050, n_frames=1294, n_mfcc=40):
    # NOTE: `n_frames` was chosen based on the average number of frames per audio file
    return get_all_audio_data_of_given_type(df, audio_folder, mfcc_storage_file_name, n_fft, hop_length, sr, n_frames, n_mfcc=n_mfcc)

#================================================
def get_all_melspectrograms(df, audio_folder, melspectrogram_storage_file_name, n_fft=2048, hop_length=512, sr=22050, n_frames=1294, n_mels=512):
    # NOTE: `n_frames` was chosen based on the average number of frames per audio file
    return get_all_audio_data_of_given_type(df, audio_folder, melspectrogram_storage_file_name, n_fft, hop_length, sr, n_frames, n_mels=n_mels)