In [None]:
%reload_ext autoreload
%autoreload 2

### Threshold Noise from large WAVs
- data are linked at https://figshare.com/articles/BirdsongRecognition/3470165
- Part of the function of this notebook is to segment bouts of song from silence, when you have a microphone continuously recording from a bird
- The script reads each wav file, finds periods of increased noise, and segments it out as a bout
  - There is a also some basic filtering to ignore noise wavs in which birds are shuffling around, wing noises, etc.
  - Params need to be uniquely set to your birds

In [61]:
import numpy as np
import os
from tqdm import tqdm_notebook as tqdm
from glob import glob
from datetime import datetime, timedelta
import pandas as pd 
from sklearn.externals.joblib import Parallel, delayed
from collections import defaultdict
%matplotlib inline

In [62]:
# import local methods from the source code
from avgn import wav_to_bouts
from avgn.utils.paths import DATA_DIR

### Find Each WAV, and the bird attached to the wav
- this just parses out filename structure which is unique to this dataset

In [65]:
# the location of the dataset
input_loc = '/mnt/cube/Datasets/BengaleseFinch/Koumura_Okanoya-paper_data/'

In [6]:
# the format of the file structure
dsets = [(input_loc+'Bird*/Wave/*.wav', 'BF')]

In [7]:
# get a list of wavs given that file structure format
wav_list = np.array([])
dset_list = np.array([])
for search_directory, dset in tqdm(dsets):
    new_wavs = np.array(glob(search_directory))
    dset_list = np.append(dset_list, [dset for i in range(len(new_wavs))])
    wav_list = np.append(wav_list, new_wavs)
print(wav_list[0], len(wav_list))

HBox(children=(IntProgress(value=0, max=1), HTML(value='')))


/mnt/cube/Datasets/BengaleseFinch/Koumura_Okanoya-paper_data/Bird4/Wave/412.wav 2965


In [8]:
# grab the birds name from that list
bird_names = [i.split('/')[6] for i in wav_list]
print(np.unique(bird_names))

['Bird0' 'Bird1' 'Bird10' 'Bird2' 'Bird3' 'Bird4' 'Bird5' 'Bird6' 'Bird7'
 'Bird8' 'Bird9']


#### Extract times for bird / create dataframe of wavs

In [9]:
# find all of the XML files associated with that dataset
bird_xml_locs = glob(input_loc+'/*/Annotation.xml')
bird_xml_locs[:2]

['/mnt/cube/Datasets/BengaleseFinch/Koumura_Okanoya-paper_data/Bird4/Annotation.xml',
 '/mnt/cube/Datasets/BengaleseFinch/Koumura_Okanoya-paper_data/Bird3/Annotation.xml']

#### this part requires some custom parsing of an XML file to get wav time info
- as far as I can tell specific datetime information is not available in the XML

In [19]:
import xml.etree.ElementTree
def Koumura_Okanoya_wavtimes(wav_list):
    # we cant parse anything about datetime from these wavs so we just make something up
    wav_times = []
    wav_loc = wav_list[0]
    n_no_date = 0
    for wav_file in wav_list:
        # default give up method
        dt = datetime(1900, 1, 1, 0, 0) + timedelta(hours=n_no_date)
        n_no_date += 1
        wav_times.append(dt)
    wav_times = np.array(wav_times)
    return wav_times
    
def Koumura_Okanoya_parser(bird_xml_locs, wav_list):
    """ parses XML from Koumura_Okanoya data format
    """
    song_df = pd.DataFrame(
        columns=[
            "bird",
            "WavLoc",
            "WaveFileName",
            "Position",
            "Length",
            "NumNote",
            "NotePositions",
            "NoteLengths",
            "NoteLabels",
        ]
    )
    for bird_loc in tqdm(bird_xml_locs):
        bird_xml = xml.etree.ElementTree.parse(bird_loc).getroot()
        bird = bird_loc.split("/")[-2]
        for element in tqdm(bird_xml.getchildren(), leave=False):
            if element.tag == "Sequence":
                notePositions = []
                noteLengths = []
                noteLabels = []
                for seq_element in element.getchildren():
                    if seq_element.tag == "Position":
                        position = seq_element.text
                    elif seq_element.tag == "Length":
                        length = seq_element.text
                    elif seq_element.tag == "WaveFileName":
                        WaveFileName = seq_element.text
                    elif seq_element.tag == "NumNote":
                        NumNote = seq_element.text
                    elif seq_element.tag == "Note":
                        for note_element in seq_element.getchildren():
                            if note_element.tag == "Label":
                                noteLabels.append(note_element.text)
                            elif note_element.tag == "Position":
                                notePositions.append(note_element.text)
                            elif note_element.tag == "Length":
                                noteLengths.append(note_element.text)
                song_df.loc[len(song_df)] = [
                    bird,
                    input_loc + bird + "/Wave/" + WaveFileName,
                    WaveFileName,
                    position,
                    length,
                    NumNote,
                    notePositions,
                    noteLengths,
                    noteLabels,
                ]
    # we cant parse anything about datetime from these wavs so we just make something up
    wav_times = Koumura_Okanoya_wavtimes(wav_list)
    
    # Make a pandas dataframe corresponding to the files and datetimes
    wav_df = pd.DataFrame.from_dict(
        {
            "filename": wav_list,
            "wav_time": wav_times,
            "dset": dset_list,
            "birdname": bird_names,
        }
    )    
    
    return wav_df, song_df

In [20]:
wav_df, song_df = Koumura_Okanoya_parser(bird_xml_locs, wav_list)

HBox(children=(IntProgress(value=0, max=11), HTML(value='')))

HBox(children=(IntProgress(value=0, max=1964), HTML(value='')))

HBox(children=(IntProgress(value=0, max=2110), HTML(value='')))

HBox(children=(IntProgress(value=0, max=1351), HTML(value='')))

HBox(children=(IntProgress(value=0, max=1486), HTML(value='')))

HBox(children=(IntProgress(value=0, max=412), HTML(value='')))

HBox(children=(IntProgress(value=0, max=572), HTML(value='')))

HBox(children=(IntProgress(value=0, max=419), HTML(value='')))

HBox(children=(IntProgress(value=0, max=1854), HTML(value='')))

HBox(children=(IntProgress(value=0, max=1495), HTML(value='')))

HBox(children=(IntProgress(value=0, max=2501), HTML(value='')))

HBox(children=(IntProgress(value=0, max=1238), HTML(value='')))




In [21]:
song_df[:3]

Unnamed: 0,bird,WavLoc,WaveFileName,Position,Length,NumNote,NotePositions,NoteLengths,NoteLabels
0,Bird4,/mnt/cube/Datasets/BengaleseFinch/Koumura_Okan...,0.wav,32000,60880,13,"[5056, 10240, 15648, 26240, 29760, 33952, 3708...","[2304, 2464, 2848, 2848, 1696, 2336, 2528, 265...","[0, 0, 0, 0, 1, 2, 2, 2, 2, 2, 2, 2, 2]"
1,Bird4,/mnt/cube/Datasets/BengaleseFinch/Koumura_Okan...,0.wav,92880,56064,14,"[512, 4288, 8480, 12896, 17888, 23168, 28096, ...","[2848, 3008, 2880, 2336, 2240, 2944, 3264, 176...","[3, 3, 3, 4, 4, 0, 0, 1, 2, 2, 2, 2, 2, 2]"
2,Bird4,/mnt/cube/Datasets/BengaleseFinch/Koumura_Okan...,0.wav,152624,51312,13,"[704, 5248, 10240, 15520, 19456, 22112, 25856,...","[2880, 2496, 2304, 3264, 1920, 2912, 2752, 262...","[3, 4, 4, 0, 1, 2, 2, 2, 2, 3, 3, 3, 3]"


### Parameters for processing vocalizations
- ***These will almost certainly need to be set based upon species and the quality of your vocalizations***

In [39]:
param_dict = {}

# parameters for
spectrogram_params = {
    # FFT (we create a spectrogram here to filter out noise)
    "num_freq": 1024,  # how many channels to use in a spectrogram
    "sample_rate": 44100,  # what rate are your WAVs sampled at?
    "preemphasis": 0.97,
    "ref_level_db": 20,  # reference db for computing spec
    "min_level_db": -50,  # threshold for spectrograms (lower filters out more noise)
    "frame_shift_ms": 20,  # step size for fft
    "frame_length_ms": 40,  # frame length for fft
}

# parameters for filtering bouts vs noise
bout_filtering_hyperparams = {
    # filtering
    "min_amp_val": 1000,  # the minimum value of a wav's amplitude to be considered containing any sound
    "min_segment_length_s": 1.0,  # How long a bout has to be to count
    "max_segment_length_s": 200.0,  # If a bout is too long, dont count it
    "min_silence_pct": 0.05,  # measure of noise in wav, by threshing the pct of time that the wav is silent
    "vocal_range_Hz": [700, 15000],  # expected range of frequencies for vocalization
}

signal_filter_params = {
    "lowcut": 50,  # Hz # Low cut for our butter bandpass filter
    "highcut": 15000,  # Hz # High cut for our butter bandpass filter
    "rms_window": 0.01,  # seconds # the size of your window
    "rms_stride": 0.01,  # seconds # how big your step size should be for moving the filter
    "noise_thresh": 0.01,  # threshold percent of maximum noise to consider silence
    "segment_padding": 4.0,  # seconds to pad waveform extracted
    "rms_padding": 1.0,  # seconds # how much to pad around vocalizations
}

param_dict["BF"] = {}

for d in [spectrogram_params, bout_filtering_hyperparams, signal_filter_params]:
    for k, v in d.items():
        param_dict["BF"][k] = v

In [40]:
# print the number of wav files found for each individual
for (dset, bird), group in wav_df.groupby(("dset", "birdname")):
    print(dset, bird, len(group))

BF Bird0 135
BF Bird1 315
BF Bird10 94
BF Bird2 339
BF Bird3 402
BF Bird4 441
BF Bird5 335
BF Bird6 235
BF Bird7 310
BF Bird8 142
BF Bird9 217


  


#### Parameters for debugging / speed of preprocessing

In [55]:
skip_created = True # whether to skip song that has already been processed
parallel = True # whether to run this algorithm in parallel (across wav files)
visualize = False # whether to output visualizations of spectrograms to the notebook screen - this is useful for setting parameters - you may also want to edit the code to visualized other aspects of the algorithm
n_parallel = 10 # How many threads to run in parallel (if parallel == True)
verbosity = 1 # how verbose to make the output of the parallelization (higher = more, 0 = none, >50 output is sent to std.out)
verbose=True

In [66]:
species = 'BF'
dataset = "Koumura_Okanoya"
# Where to put the final HDF5 files
save_to_folder = DATA_DIR / species / dataset / 'bouts' 
# whether or not to save spectrogram PNGs to the save_to_folder to visually inspect whether the song segmentation algorithm works
save_spectrograms = True 

In [58]:
# loop through all wavs,
try:
    key_list = ("wav_list", "time_index", "wav_file", "wav_time", "rate")
    for (dset, bird), group in tqdm(wav_df.groupby(("dset", "birdname"))):

        print("processing %s to save at %s" % (bird, save_to_folder))
        bird_data = {key: [] for key in key_list}

        print("total wavs: ", len(group))
    
        # Create a spot to save the data
        bird_folder = save_to_folder / bird

        if parallel:
            with Parallel(n_jobs=n_parallel, verbose=verbosity) as parallel:
                parallel(
                    delayed(wav_to_bouts.process_bird_wav)(
                        bird,
                        filename,
                        wav_time,
                        param_dict[dset],
                        save_to_folder,
                        visualize=visualize,
                        skip_created=skip_created,
                        save_spectrograms=save_spectrograms,
                        verbose=verbose,
                    )
                    for idx, filename, wav_time, dset, bird in tqdm(
                        group.itertuples(), total=len(group)
                    )
                )
        else:
            for idx, filename, wav_time, dset, bird in tqdm(
                group.itertuples(), total=len(group),
            ):
                print(filename)
                wav_to_bouts.process_bird_wav(
                    bird,
                    filename,
                    wav_time,
                    param_dict[dset],
                    save_to_folder,
                    visualize=visualize,
                    skip_created=skip_created,
                    save_spectrograms=save_spectrograms,
                    verbose=verbose,
                )
except KeyboardInterrupt:
    print("interrrupted")

  after removing the cwd from sys.path.


HBox(children=(IntProgress(value=0, max=11), HTML(value='')))

processing Bird0 to save at /mnt/cube/tsainbur/Projects/github_repos/AVGN_419/AVGN/data/bf_wav
total wavs:  135


HBox(children=(IntProgress(value=0, max=135), HTML(value='')))

[Parallel(n_jobs=10)]: Using backend LokyBackend with 10 concurrent workers.
[Parallel(n_jobs=10)]: Done  30 tasks      | elapsed:    6.7s


processing Bird1 to save at /mnt/cube/tsainbur/Projects/github_repos/AVGN_419/AVGN/data/bf_wav
total wavs:  315


[Parallel(n_jobs=10)]: Done 135 out of 135 | elapsed:    8.0s finished


HBox(children=(IntProgress(value=0, max=315), HTML(value='')))

[Parallel(n_jobs=10)]: Using backend LokyBackend with 10 concurrent workers.
[Parallel(n_jobs=10)]: Done  60 tasks      | elapsed:    1.1s


processing Bird10 to save at /mnt/cube/tsainbur/Projects/github_repos/AVGN_419/AVGN/data/bf_wav
total wavs:  94


[Parallel(n_jobs=10)]: Done 296 out of 315 | elapsed:    5.8s remaining:    0.4s
[Parallel(n_jobs=10)]: Done 315 out of 315 | elapsed:    6.0s finished


HBox(children=(IntProgress(value=0, max=94), HTML(value='')))

[Parallel(n_jobs=10)]: Using backend LokyBackend with 10 concurrent workers.


processing Bird2 to save at /mnt/cube/tsainbur/Projects/github_repos/AVGN_419/AVGN/data/bf_wav
total wavs:  339


[Parallel(n_jobs=10)]: Done  94 out of  94 | elapsed:    1.1s finished


HBox(children=(IntProgress(value=0, max=339), HTML(value='')))

[Parallel(n_jobs=10)]: Using backend LokyBackend with 10 concurrent workers.
[Parallel(n_jobs=10)]: Done 100 tasks      | elapsed:    1.5s


processing Bird3 to save at /mnt/cube/tsainbur/Projects/github_repos/AVGN_419/AVGN/data/bf_wav
total wavs:  402


[Parallel(n_jobs=10)]: Done 339 out of 339 | elapsed:    4.6s finished


HBox(children=(IntProgress(value=0, max=402), HTML(value='')))

[Parallel(n_jobs=10)]: Using backend LokyBackend with 10 concurrent workers.
[Parallel(n_jobs=10)]: Done  60 tasks      | elapsed:    0.9s


processing Bird4 to save at /mnt/cube/tsainbur/Projects/github_repos/AVGN_419/AVGN/data/bf_wav
total wavs:  441


[Parallel(n_jobs=10)]: Done 402 out of 402 | elapsed:    4.8s finished


HBox(children=(IntProgress(value=0, max=441), HTML(value='')))

[Parallel(n_jobs=10)]: Using backend LokyBackend with 10 concurrent workers.
[Parallel(n_jobs=10)]: Done  80 tasks      | elapsed:    3.0s
[Parallel(n_jobs=10)]: Done 362 tasks      | elapsed:   14.0s


processing Bird5 to save at /mnt/cube/tsainbur/Projects/github_repos/AVGN_419/AVGN/data/bf_wav
total wavs:  335


[Parallel(n_jobs=10)]: Done 441 out of 441 | elapsed:   17.7s finished


HBox(children=(IntProgress(value=0, max=335), HTML(value='')))

[Parallel(n_jobs=10)]: Using backend LokyBackend with 10 concurrent workers.
[Parallel(n_jobs=10)]: Done  30 tasks      | elapsed:    1.8s
[Parallel(n_jobs=10)]: Done 180 tasks      | elapsed:    8.8s


processing Bird6 to save at /mnt/cube/tsainbur/Projects/github_repos/AVGN_419/AVGN/data/bf_wav
total wavs:  235


[Parallel(n_jobs=10)]: Done 335 out of 335 | elapsed:   16.4s finished


HBox(children=(IntProgress(value=0, max=235), HTML(value='')))

[Parallel(n_jobs=10)]: Using backend LokyBackend with 10 concurrent workers.
[Parallel(n_jobs=10)]: Done  30 tasks      | elapsed:    2.1s
[Parallel(n_jobs=10)]: Done 180 tasks      | elapsed:   11.2s


processing Bird7 to save at /mnt/cube/tsainbur/Projects/github_repos/AVGN_419/AVGN/data/bf_wav
total wavs:  310


[Parallel(n_jobs=10)]: Done 235 out of 235 | elapsed:   14.2s finished


HBox(children=(IntProgress(value=0, max=310), HTML(value='')))

[Parallel(n_jobs=10)]: Using backend LokyBackend with 10 concurrent workers.
[Parallel(n_jobs=10)]: Done  40 tasks      | elapsed:    1.6s


processing Bird8 to save at /mnt/cube/tsainbur/Projects/github_repos/AVGN_419/AVGN/data/bf_wav
total wavs:  142


[Parallel(n_jobs=10)]: Done 310 out of 310 | elapsed:    9.2s finished


HBox(children=(IntProgress(value=0, max=142), HTML(value='')))

[Parallel(n_jobs=10)]: Using backend LokyBackend with 10 concurrent workers.
[Parallel(n_jobs=10)]: Done  60 tasks      | elapsed:    3.1s


processing Bird9 to save at /mnt/cube/tsainbur/Projects/github_repos/AVGN_419/AVGN/data/bf_wav
total wavs:  217


[Parallel(n_jobs=10)]: Done 142 out of 142 | elapsed:    6.6s finished


HBox(children=(IntProgress(value=0, max=217), HTML(value='')))

[Parallel(n_jobs=10)]: Using backend LokyBackend with 10 concurrent workers.
[Parallel(n_jobs=10)]: Done  40 tasks      | elapsed:    2.3s
[Parallel(n_jobs=10)]: Done 217 out of 217 | elapsed:   10.6s finished
