In [1]:
import os
import numpy as np
import librosa
import librosa.display
import tensorflow as tf
from tensorflow import keras
import matplotlib.pyplot as plt
import pandas as pd
import soundfile as sf

In [2]:
ABS_PATH = os.path.abspath(os.path.join('D:', '/Data Playground/Academics/Research Project'))
ground_truth = pd.read_csv(ABS_PATH+'/FSD50K/FSD50K.ground_truth/eval.csv')

In [3]:
# label animal labels to True and the rest to False

animal_labels = {'Bark', 'Animal', 'Cat', 'Dog', 'pets', 'Purr',
                 'wild', 'domestic_animals', 'farm_animals', 'Meow', 'working_animals', 'livestock' }
pattern = '|'.join(animal_labels)

    
value = ground_truth.labels.str.contains(pattern)
value
new_GT = pd.merge(ground_truth, value, right_index=True, left_index=True)
new_GT.columns = ['fname', 'grouped_labels', 'mids', 'split', 'labels']
new_GT

Unnamed: 0,fname,grouped_labels,mids,split,labels
0,64760,"Electric_guitar,Guitar,Plucked_string_instrume...","/m/02sgy,/m/0342h,/m/0fx80y,/m/04szw,/m/04rlf",train,False
1,16399,"Electric_guitar,Guitar,Plucked_string_instrume...","/m/02sgy,/m/0342h,/m/0fx80y,/m/04szw,/m/04rlf",train,False
2,16401,"Electric_guitar,Guitar,Plucked_string_instrume...","/m/02sgy,/m/0342h,/m/0fx80y,/m/04szw,/m/04rlf",train,False
3,16402,"Electric_guitar,Guitar,Plucked_string_instrume...","/m/02sgy,/m/0342h,/m/0fx80y,/m/04szw,/m/04rlf",train,False
4,16404,"Electric_guitar,Guitar,Plucked_string_instrume...","/m/02sgy,/m/0342h,/m/0fx80y,/m/04szw,/m/04rlf",train,False
...,...,...,...,...,...
40961,102863,"Fowl,Livestock_and_farm_animals_and_working_an...","/m/025rv6n,/m/0ch8v,/m/0jbk",train,True
40962,389607,"Fowl,Livestock_and_farm_animals_and_working_an...","/m/025rv6n,/m/0ch8v,/m/0jbk",train,True
40963,90091,"Fowl,Livestock_and_farm_animals_and_working_an...","/m/025rv6n,/m/0ch8v,/m/0jbk",train,True
40964,244718,"Fowl,Livestock_and_farm_animals_and_working_an...","/m/025rv6n,/m/0ch8v,/m/0jbk",train,True


In [4]:
new_GT.groupby(['labels']).size()

labels
False    37691
True      3275
dtype: int64

In [5]:
DATA_PATH = ABS_PATH + '/FSD50K/FSD50K.dev_audio/'
SAMPLE_RATE = 44100
EVAL_DATA_PATH = ABS_PATH + '/FSD50K/FSD50K.eval_audio/'

In [6]:
os.makedirs(ABS_PATH+'/SplitAudio/test/Animals')
os.makedirs(ABS_PATH+'/SplitAudio/train/Animals')


In [6]:
TRAIN_PATH = ABS_PATH + '/SplitAudio/train/'
TEST_PATH = ABS_PATH + '/SplitAudio/test/'
CLIP_LEN = 3
a_len = 44100 * CLIP_LEN

In [7]:
def slice_data(start, end, raw_data,  sample_rate):
    max_ind = len(raw_data) 
    start_ind = min(int(start * sample_rate), max_ind)
    end_ind = min(int(end * sample_rate), max_ind)
    return raw_data[start_ind: end_ind]


In [9]:
for i, (dirpath, dirnames,  filenames) in enumerate(os.walk(EVAL_DATA_PATH)):
    #For each file in directory
    for f in filenames:
        file_path = os.path.join(dirpath, f)
        signal, sr = librosa.load(file_path, sr=SAMPLE_RATE)
        duration = librosa.get_duration(y=signal, sr=SAMPLE_RATE)
        start = 0
        end = CLIP_LEN
        file_identifier = 0
        #Removing too small & too large files        
        if(duration<=30 and duration>=3):
            file_name = f.replace(".wav", "")
            #Locating meta data
            row_focus = new_GT.loc[new_GT['fname'] == int(file_name)]
            is_animal = row_focus.iloc[0]['labels']
            #Creating and storing 3s clippings
            if is_animal:
                while end<duration:
                    sliced_data = slice_data(start=start, end=end, raw_data=signal, sample_rate=44100)
                    start = end
                    end = end + CLIP_LEN  
                    save_location = TRAIN_PATH + 'Animals/' + file_name + str(file_identifier) +'.wav'
                    sf.write(file=save_location, data=sliced_data, samplerate=SAMPLE_RATE)
                    file_identifier = file_identifier + 1
            #Adding silence padding to the shorter final clipping
#             if (duration-end) >= (CLIP_LEN/2):
#                 end = duration
#                 sliced_data = slice_data(start=start, end=end, raw_data=signal, sample_rate=44100)
#                 padded_data = librosa.util.pad_center(sliced_data, a_len)
#                 store_wav(padded_data, is_train, is_distressed, file_name, file_identifier)        
            

            

In [10]:
os.makedirs(ABS_PATH+'/MelSpectrograms/test/Animals')

In [11]:
def save_mel_spectrogram(cmap_type, path, class_path, save_loc):
        for i, (dirpath, dirnames,  filenames) in enumerate(os.walk(path + '/' + class_path)):
            for f in filenames:
                file_path = os.path.join(dirpath, f)
                signal, sr = librosa.load(file_path, sr=SAMPLE_RATE)
                duration = librosa.get_duration(y=signal, sr=SAMPLE_RATE)
                if duration == CLIP_LEN:
                    plt.figure(figsize=(10,3))
                    stft = librosa.stft(signal)
                    log_spectrogram = librosa.amplitude_to_db(abs(stft))
                    librosa.display.specshow(log_spectrogram, sr=SAMPLE_RATE, cmap=cmap_type, x_axis='time', y_axis='hz')
                    plt.ylim(0, 10000)
                    file_name = f.replace(".wav", "")
                    plt.savefig(save_loc + file_name + '.png')
                    plt.clf()
                    plt.close(plt.gcf())

In [12]:
save_mel_spectrogram(cmap_type = 'magma', path = TEST_PATH, class_path='Animals', save_loc=ABS_PATH+'/MelSpectrograms/test/Animals/')

In [13]:
import gc
gc.collect()

1176390