In [1]:
import os
import numpy as np
import librosa
import librosa.display
import tensorflow as tf
from tensorflow import keras
import matplotlib.pyplot as plt
import pandas as pd
import soundfile as sf
import gc
from multiprocessing import Pool
import worker

In [2]:
ABS_PATH = os.path.abspath(os.path.join('D:', '/Midhun'))
ground_truth = pd.read_csv(ABS_PATH+'/FSD50K/FSD50K.ground_truth/dev.csv')

In [4]:
# label distress labels to True and the rest to False

red_labels = {'Alarm', 'Gasp', 'Crying_and_sobbing', 'Hiss', 
              'Screech', 'Shatter', 'Yell', 'Siren', 'Sigh',
              'Screaming', 'Growling', 'Shout', 'Boom', 'Crackle', 'Explosion'}
pattern = '|'.join(red_labels)

    
value = ground_truth.labels.str.contains(pattern)
value
new_GT = pd.merge(ground_truth, value, right_index=True, left_index=True)
new_GT.columns = ['fname', 'grouped_labels', 'mids', 'split', 'labels']
new_GT

Unnamed: 0,fname,grouped_labels,mids,split,labels
0,64760,"Electric_guitar,Guitar,Plucked_string_instrume...","/m/02sgy,/m/0342h,/m/0fx80y,/m/04szw,/m/04rlf",train,False
1,16399,"Electric_guitar,Guitar,Plucked_string_instrume...","/m/02sgy,/m/0342h,/m/0fx80y,/m/04szw,/m/04rlf",train,False
2,16401,"Electric_guitar,Guitar,Plucked_string_instrume...","/m/02sgy,/m/0342h,/m/0fx80y,/m/04szw,/m/04rlf",train,False
3,16402,"Electric_guitar,Guitar,Plucked_string_instrume...","/m/02sgy,/m/0342h,/m/0fx80y,/m/04szw,/m/04rlf",train,False
4,16404,"Electric_guitar,Guitar,Plucked_string_instrume...","/m/02sgy,/m/0342h,/m/0fx80y,/m/04szw,/m/04rlf",train,False
...,...,...,...,...,...
40961,102863,"Fowl,Livestock_and_farm_animals_and_working_an...","/m/025rv6n,/m/0ch8v,/m/0jbk",train,False
40962,389607,"Fowl,Livestock_and_farm_animals_and_working_an...","/m/025rv6n,/m/0ch8v,/m/0jbk",train,False
40963,90091,"Fowl,Livestock_and_farm_animals_and_working_an...","/m/025rv6n,/m/0ch8v,/m/0jbk",train,False
40964,244718,"Fowl,Livestock_and_farm_animals_and_working_an...","/m/025rv6n,/m/0ch8v,/m/0jbk",train,False


In [5]:
new_GT.groupby(['labels']).size()

labels
False    36962
True      4004
dtype: int64

In [3]:
DATA_PATH = ABS_PATH + '/FSD50K/FSD50K.dev_audio/'
SAMPLE_RATE = 44100

In [11]:
os.makedirs(ABS_PATH+'/SplitAudio')
os.makedirs(ABS_PATH+'/SplitAudio/train')
os.makedirs(ABS_PATH+'/SplitAudio/test')
os.makedirs(ABS_PATH+'/SplitAudio/train/Danger')
os.makedirs(ABS_PATH+'/SplitAudio/train/Other')
os.makedirs(ABS_PATH+'/SplitAudio/test/Danger')
os.makedirs(ABS_PATH+'/SplitAudio/test/Other')

In [6]:
def store_wav(data, is_train, is_distressed, file_name, file_identifier):
    if is_train:
        if is_distressed:
            save_location = TRAIN_PATH + 'Danger/' + file_name + str(file_identifier) + '.wav'
        elif not is_distressed:
            save_location = TRAIN_PATH + 'Other/' + file_name + str(file_identifier) + '.wav'
    elif not is_train:
        if is_distressed:
            save_location = TEST_PATH + 'Danger/' + file_name + str(file_identifier) +'.wav'
        elif not is_distressed:
            save_location = TEST_PATH + 'Other/' + file_name + str(file_identifier) +'.wav'
                
    sf.write(file=save_location, data=data, samplerate=SAMPLE_RATE)
    

In [7]:

def slice_data(start, end, raw_data,  sample_rate):
    max_ind = len(raw_data) 
    start_ind = min(int(start * sample_rate), max_ind)
    end_ind = min(int(end * sample_rate), max_ind)
    return raw_data[start_ind: end_ind]


In [4]:
TRAIN_PATH = ABS_PATH + '/SplitAudio/train/'
TEST_PATH = ABS_PATH + '/SplitAudio/test/'
CLIP_LEN = 3
a_len = 44100 * CLIP_LEN

In [10]:
for i, (dirpath, dirnames,  filenames) in enumerate(os.walk(DATA_PATH)):
    #For each file in directory
    for f in filenames:
        file_path = os.path.join(dirpath, f)
        signal, sr = librosa.load(file_path, sr=SAMPLE_RATE)
        duration = librosa.get_duration(y=signal, sr=SAMPLE_RATE)
        start = 0
        end = CLIP_LEN
        file_identifier = 0
        #Removing too small & too large files        
        if(duration<=30 and duration>=3):
            file_name = f.replace(".wav", "")
            #Locating meta data
            row_focus = new_GT.loc[new_GT['fname'] == int(file_name)]
            is_distressed = row_focus.iloc[0]['labels']
            is_train = row_focus.iloc[0]['split'] == 'train'
            #Creating and storing 3s clippings
            while end<duration:
                sliced_data = slice_data(start=start, end=end, raw_data=signal, sample_rate=44100)
                start = end
                end = end + CLIP_LEN                
                store_wav(sliced_data, is_train, is_distressed, file_name, file_identifier)
                file_identifier = file_identifier + 1
            #Adding silence padding to the shorter final clipping
#             if (duration-end) >= (CLIP_LEN/2):
#                 end = duration
#                 sliced_data = slice_data(start=start, end=end, raw_data=signal, sample_rate=44100)
#                 padded_data = librosa.util.pad_center(sliced_data, a_len)
#                 store_wav(padded_data, is_train, is_distressed, file_name, file_identifier)        
            

            

In [42]:
os.makedirs(ABS_PATH+'/MelSpectrograms')
os.makedirs(ABS_PATH+'/MelSpectrograms/train')
os.makedirs(ABS_PATH+'/MelSpectrograms/test')
os.makedirs(ABS_PATH+'/MelSpectrograms/train/Danger')
os.makedirs(ABS_PATH+'/MelSpectrograms/train/Other')
os.makedirs(ABS_PATH+'/MelSpectrograms/test/Danger')
os.makedirs(ABS_PATH+'/MelSpectrograms/test/Other')

In [10]:
import gc
gc.collect()

823073

In [5]:
import time, sys
from IPython.display import clear_output

def update_progress(progress):
    bar_length = 20
    if isinstance(progress, int):
        progress = float(progress)
    if not isinstance(progress, float):
        progress = 0
    if progress < 0:
        progress = 0
    if progress >= 1:
        progress = 1

    block = int(round(bar_length * progress))

    clear_output(wait = True)
    text = "Progress: [{0}] {1:.1f}%".format( "#" * block + "-" * (bar_length - block), progress * 100)
    print(text)

In [20]:
# Moved to worker.py
def create_mel_spectrogram(args):
    cmap_type, file_path, save_loc = args
    base_name = os.path.basename(file_path)
    img_file_name = base_name.replace(".wav", "")
    img_file_path = os.path.join(save_loc, img_file_name + '.png')
    if os.path.exists(img_file_path):
        return

    signal, sr = librosa.load(file_path, sr=SAMPLE_RATE)
    duration = librosa.get_duration(y=signal, sr=SAMPLE_RATE)
    if duration != CLIP_LEN:
        return

    plt.figure(figsize=(10,3))
    stft = librosa.stft(signal)
    log_spectrogram = librosa.amplitude_to_db(abs(stft))
    librosa.display.specshow(log_spectrogram, sr=SAMPLE_RATE, cmap=cmap_type, x_axis='time', y_axis='hz')
    plt.ylim(0, 10000)
    plt.savefig(img_file_path)
    plt.clf()
    plt.close()
    

In [6]:
def save_mel_spectrogram(cmap_type, path, class_path, save_loc):
        for i, (dirpath, dirnames,  filenames) in enumerate(os.walk(path + '/' + class_path)):
            args = ((cmap_type, os.path.join(dirpath, f), save_loc) for f in filenames)
            num_tasks = len(filenames)
            if __name__ ==  '__main__': 
                with Pool() as p:
                    for i, _ in enumerate(p.imap_unordered(worker.create_mel_spectrogram, args), 1):
                        update_progress(i / num_tasks)


In [9]:
save_mel_spectrogram(cmap_type = 'magma', path = TRAIN_PATH, class_path='Danger', save_loc=ABS_PATH+'/MelSpectrograms/train/Danger/')

In [7]:
save_mel_spectrogram(cmap_type = 'magma', path = TRAIN_PATH, class_path='Other', save_loc=ABS_PATH+'/MelSpectrograms/train/Other/')

Progress: [####################] 100.0%


In [20]:
save_mel_spectrogram(cmap_type = 'magma', path = TEST_PATH, class_path='Other', save_loc=ABS_PATH+'/MelSpectrograms/test/Other/')


1192156

In [None]:
save_mel_spectrogram(cmap_type = 'magma', path = TEST_PATH, class_path='Danger', save_loc=ABS_PATH+'/MelSpectrograms/test/Danger/')
