## This notebook extracts syllables from a segmented waveform into spectrograms
- This notebook takes WAV datasets generated by `1.0-segment-song-from-wavs` and segments generated by `2.1-Segment-syllables-make-textgrid`
  - WAVs are expected to be in this format: `2017-04-16_17-27-44-760000.wav` and TextGrids are explected to be in the format `2017-04-16_17-27-44-760000.TextGrid`
- The notebook outputs an HDF5 file which contains metadata about who the individual is, when the syllable was sung, how long the syllable is, which file the syllable comes from

### Import Packages

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
from datetime import datetime
import pandas as pd
import copy
from praatio import tgio

In [3]:
import matplotlib.pyplot as plt
%matplotlib inline
import IPython.display

In [4]:
from avgn.utils.audio import load_wav, int16_to_float32
from avgn.utils.paths import DATA_DIR, ensure_dir
from avgn.utils.general import save_dict_pickle, zero_one_norm, rescale
from avgn.bout_segmentation.dynamic_threshold_segmentation import (
    dynamic_spectrogram,
    contiguous_regions,
    cut_syllables,
    boundaries_to_textgrid,
)
from avgn.visualization.spectrogram import (
    plot_segmentations,
    plot_bout_to_syllable_pipeline,
    visualize_spec
)
from avgn.bout_segmentation.segment_wav_from_textgrid import extract_syllables
from avgn.signalprocessing.spectrogramming import _build_mel_basis, inv_spectrogram



### Parameters for segmenting syllables

In [5]:
# the size of the syllables (pixels*pixels)
syll_size = 32

# parameters for filtering
filtering_params = {
    # filtering
    "highcut": 15000,
    "lowcut": 500,
}

mel_params = {
    "mel_filter": False,  # should a mel filter be used?
    "num_mels": syll_size,  # how many channels to use in the mel-spectrogram
    "fmin_mel": 300,  # low frequency cutoff for mel filter
    "fmax_mel": None,  # high frequency cutoff for mel filter
    "_mel_basis": None,
}

spectrogramming_params = {
    # spectrograms
    "num_freq": 512,  # how many channels to use in a spectrogram
    "num_freq_final": syll_size,  # how many channels to use in the resized spectrogram
    "preemphasis": 0.97,
    "min_silence_for_spec": 0.05,  # minimum length of silence for a spectrogram to be considered a good spectrogram
    "max_vocal_for_spec": 5.0,  # the longest a single vocalization (protosyllable) is allowed to be
    "frame_shift_ms": 0.5,  # step size for fft
    "frame_length_ms": 6,  # frame length for fft
    "min_level_dB": -100,  # minimum threshold db for computing spe
    "min_level_dB_floor": -20,  # (db)
    "spec_thresh_delta_dB": 5,  # (db) what
    "ref_level_dB": 20,  # reference db for computing spec
    
}

bout_threshold_params = {
    # Silence Thresholding
    "silence_threshold": 0.01,  # normalized threshold for silence
    "min_len": 5.0,  # minimum length for a vocalization (fft frames)
}

syllabification_params = {
    # Syllabification
    "min_syll_len_s": 0.03,  # minimum length for a syllable
    "min_num_sylls": 20,  # min number of syllables to be considered a bout
    "max_size_syll": syll_size,  # the size of the syllable
    "resize_samp_fr": int(
        syll_size * 10.0
    ),  # (frames/s) the framerate of the syllable (in compressed spectrogram time components)
}
spectrogram_inversion_params = {
    # spectrogram inversion
    "griffin_lim_iters": 60,
    "power": 1.5,
    # Thresholding out noise
    "mel_noise_filt": 0.15,  # thresholds out low power noise in the spectrum - higher numbers will diminish inversion quality
}

hparams = {"species": "BF", "dataset": "Koumura_Okanoya"}

for d in [
    filtering_params,
    spectrogramming_params,
    bout_threshold_params,
    syllabification_params,
    spectrogram_inversion_params,
    mel_params
]:
    for k, v in d.items():
        hparams[k] = v

In [6]:
# this is used to identify this training instance
now_string = datetime.now().strftime(
    "%Y-%m-%d_%H-%M-%S"
)  
# save the dictionary so that we can reload it for recovering waveforms
dict_save = DATA_DIR / ("parameter_dictionaries/" + now_string + "_dict.pickle")
ensure_dir(dict_save)
save_dict_pickle(hparams, dict_save)
print(dict_save)

/mnt/cube/tsainbur/Projects/github_repos/AVGN_419/AVGN/data/parameter_dictionaries/2019-04-30_14-18-12_dict.pickle


In [7]:
# build a basis function if you are using a mel spectrogram
if hparams['mel_filter']:
    hparams['_mel_basis'] = _build_mel_basis(hparams) 
    

## Segment bouts

#### Run through in parallel

In [8]:
from tqdm.autonotebook import tqdm
import os
from sklearn.externals.joblib import Parallel, delayed
from avgn.bout_segmentation.dynamic_threshold_segmentation import textgrid_from_wav

In [9]:
# find the data bird folders
dataset_location = DATA_DIR / hparams['species'] / hparams['dataset'] / 'bouts'
indv_folders = list(dataset_location.glob('*'))
indv_folders[:3]

[PosixPath('/mnt/cube/tsainbur/Projects/github_repos/AVGN_419/AVGN/data/BF/Koumura_Okanoya/bouts/Bird9'),
 PosixPath('/mnt/cube/tsainbur/Projects/github_repos/AVGN_419/AVGN/data/BF/Koumura_Okanoya/bouts/Bird3'),
 PosixPath('/mnt/cube/tsainbur/Projects/github_repos/AVGN_419/AVGN/data/BF/Koumura_Okanoya/bouts/Bird4')]

In [14]:
# skip creating datasets that already exist
skip_existing = True 

# run through WAVs in parallel
parallel = True 
verbosity = 10 # how verbose parallel should be
n_jobs = 20 # how many jobs to run in parallel

# visualize the output of the algorithm for optimizing parameters
visualize = False 

# whether to save the dataset
save_dataset=True 

# whether or not to output text for debugging
verbose = False

# visualization
nex = 10 # how many example wavs to plot

In [15]:
syllable_data_out = DATA_DIR / hparams['species'] / hparams['dataset'] / 'syllable_spectrograms' / str(syll_size)
ensure_dir(syllable_data_out)
syllable_data_out

PosixPath('/mnt/cube/tsainbur/Projects/github_repos/AVGN_419/AVGN/data/BF/Koumura_Okanoya/syllable_spectrograms/32')

In [16]:
from avgn.bout_segmentation.segment_wav_from_textgrid import prepare_syllable_dataset
import deepdish as dd

In [None]:
# loop through and make individual datasets
for indv_folder in tqdm(indv_folders):

    # get the birds name
    indv_name = indv_folder.name
    print(indv_name)

    # check if the file already exists
    dataset_loc = syllable_data_out / (indv_name + '.hdf5')
    if dataset_loc.is_file() and skip_existing:
        continue
    
    # get wav_list
    wav_list = list((indv_folder / "wavs").glob("*.wav"))
    # get textgrid location
    textgrid_loc = indv_folder / "TextGrids"
    
    # if visualizing, make sure only to show a few elements
    if visualize == True:
        wav_list = wav_list[:nex]

    indv_data = prepare_syllable_dataset(
        indv_name,
        wav_list,
        dataset_loc,
        textgrid_loc,
        hparams=hparams,
        parallel=parallel,
        n_jobs=n_jobs,
        par_verbosity=verbosity,
        verbose=verbose,
        visualize=visualize,
    )
    # convert datetime to numpy datetime so that it can be saved as HDF5
    indv_data['wav_datetime'] = [np.datetime64(i) for i in indv_data['wav_datetime']]
    
    # save the dataset as hdf5
    dd.io.save(dataset_loc, indv_data, compression=None)


HBox(children=(IntProgress(value=0, max=10), HTML(value='')))

Bird9


HBox(children=(IntProgress(value=0, max=154), HTML(value='')))

[Parallel(n_jobs=20)]: Using backend LokyBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done   1 tasks      | elapsed:   27.0s
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:   33.9s
[Parallel(n_jobs=20)]: Done  21 tasks      | elapsed:   55.7s
[Parallel(n_jobs=20)]: Done  32 tasks      | elapsed:  1.2min
[Parallel(n_jobs=20)]: Done  45 tasks      | elapsed:  1.6min
[Parallel(n_jobs=20)]: Done  58 tasks      | elapsed:  2.0min
[Parallel(n_jobs=20)]: Done  73 tasks      | elapsed:  2.3min
[Parallel(n_jobs=20)]: Done  88 tasks      | elapsed:  2.8min
[Parallel(n_jobs=20)]: Done 105 tasks      | elapsed:  3.2min
[Parallel(n_jobs=20)]: Done 131 out of 154 | elapsed:  3.9min remaining:   40.8s
[Parallel(n_jobs=20)]: Done 147 out of 154 | elapsed:  4.3min remaining:   12.2s
[Parallel(n_jobs=20)]: Done 154 out of 154 | elapsed:  4.4min finished


Bird3


HBox(children=(IntProgress(value=0, max=396), HTML(value='')))

[Parallel(n_jobs=20)]: Using backend LokyBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done   1 tasks      | elapsed:    0.2s
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:   22.9s
[Parallel(n_jobs=20)]: Done  21 tasks      | elapsed:   29.0s
[Parallel(n_jobs=20)]: Done  32 tasks      | elapsed:   48.6s
[Parallel(n_jobs=20)]: Done  45 tasks      | elapsed:  1.1min
[Parallel(n_jobs=20)]: Done  58 tasks      | elapsed:  1.3min
[Parallel(n_jobs=20)]: Done  73 tasks      | elapsed:  1.7min
[Parallel(n_jobs=20)]: Done  88 tasks      | elapsed:  2.1min
[Parallel(n_jobs=20)]: Done 105 tasks      | elapsed:  2.5min
[Parallel(n_jobs=20)]: Done 122 tasks      | elapsed:  2.8min
[Parallel(n_jobs=20)]: Done 141 tasks      | elapsed:  3.2min
[Parallel(n_jobs=20)]: Done 160 tasks      | elapsed:  3.6min
[Parallel(n_jobs=20)]: Done 181 tasks      | elapsed:  4.1min
[Parallel(n_jobs=20)]: Done 202 tasks      | elapsed:  4.6min
[Parallel(n_jobs=20)]: Done 225 tasks      | elapsed:  

Bird4


HBox(children=(IntProgress(value=0, max=320), HTML(value='')))

[Parallel(n_jobs=20)]: Using backend LokyBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done   1 tasks      | elapsed:   10.4s
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:   22.1s
[Parallel(n_jobs=20)]: Done  21 tasks      | elapsed:   30.1s
[Parallel(n_jobs=20)]: Done  32 tasks      | elapsed:   43.8s
[Parallel(n_jobs=20)]: Done  45 tasks      | elapsed:  1.0min
[Parallel(n_jobs=20)]: Done  58 tasks      | elapsed:  1.3min
[Parallel(n_jobs=20)]: Done  73 tasks      | elapsed:  1.6min
[Parallel(n_jobs=20)]: Done  88 tasks      | elapsed:  1.9min
[Parallel(n_jobs=20)]: Done 105 tasks      | elapsed:  2.2min
[Parallel(n_jobs=20)]: Done 122 tasks      | elapsed:  2.5min
[Parallel(n_jobs=20)]: Done 141 tasks      | elapsed:  2.9min
[Parallel(n_jobs=20)]: Done 160 tasks      | elapsed:  3.3min
[Parallel(n_jobs=20)]: Done 181 tasks      | elapsed:  3.7min
[Parallel(n_jobs=20)]: Done 202 tasks      | elapsed:  4.1min
[Parallel(n_jobs=20)]: Done 225 tasks      | elapsed:  

Bird0


HBox(children=(IntProgress(value=0, max=95), HTML(value='')))

[Parallel(n_jobs=20)]: Using backend LokyBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done   1 tasks      | elapsed:    7.7s
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:   24.0s
[Parallel(n_jobs=20)]: Done  21 tasks      | elapsed:   28.8s
[Parallel(n_jobs=20)]: Done  32 tasks      | elapsed:   46.0s
[Parallel(n_jobs=20)]: Done  45 tasks      | elapsed:   57.1s
[Parallel(n_jobs=20)]: Done  66 out of  95 | elapsed:  1.4min remaining:   36.5s
[Parallel(n_jobs=20)]: Done  76 out of  95 | elapsed:  1.5min remaining:   22.0s
[Parallel(n_jobs=20)]: Done  86 out of  95 | elapsed:  1.7min remaining:   10.4s
[Parallel(n_jobs=20)]: Done  95 out of  95 | elapsed:  1.8min finished


Bird10


HBox(children=(IntProgress(value=0, max=90), HTML(value='')))

[Parallel(n_jobs=20)]: Using backend LokyBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done   1 tasks      | elapsed:   10.3s
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:   19.1s
[Parallel(n_jobs=20)]: Done  21 tasks      | elapsed:   29.3s
[Parallel(n_jobs=20)]: Done  32 tasks      | elapsed:   39.5s
[Parallel(n_jobs=20)]: Done  45 tasks      | elapsed:   52.3s
[Parallel(n_jobs=20)]: Done  61 out of  90 | elapsed:  1.1min remaining:   32.1s
[Parallel(n_jobs=20)]: Done  71 out of  90 | elapsed:  1.3min remaining:   20.7s
[Parallel(n_jobs=20)]: Done  81 out of  90 | elapsed:  1.4min remaining:    9.3s
[Parallel(n_jobs=20)]: Done  90 out of  90 | elapsed:  1.4min finished


Bird7


HBox(children=(IntProgress(value=0, max=107), HTML(value='')))

[Parallel(n_jobs=20)]: Using backend LokyBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done   1 tasks      | elapsed:   12.5s
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:   26.8s
[Parallel(n_jobs=20)]: Done  21 tasks      | elapsed:   46.1s
[Parallel(n_jobs=20)]: Done  32 tasks      | elapsed:  1.0min
[Parallel(n_jobs=20)]: Done  45 tasks      | elapsed:  1.2min
[Parallel(n_jobs=20)]: Done  58 tasks      | elapsed:  1.6min
[Parallel(n_jobs=20)]: Done  79 out of 107 | elapsed:  2.2min remaining:   46.4s
[Parallel(n_jobs=20)]: Done  90 out of 107 | elapsed:  2.4min remaining:   27.3s
[Parallel(n_jobs=20)]: Done 101 out of 107 | elapsed:  2.6min remaining:    9.3s
[Parallel(n_jobs=20)]: Done 107 out of 107 | elapsed:  2.8min finished


Bird5


HBox(children=(IntProgress(value=0, max=277), HTML(value='')))

[Parallel(n_jobs=20)]: Using backend LokyBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done   1 tasks      | elapsed:   11.9s
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:   17.5s
[Parallel(n_jobs=20)]: Done  21 tasks      | elapsed:   30.2s
[Parallel(n_jobs=20)]: Done  32 tasks      | elapsed:   35.4s
[Parallel(n_jobs=20)]: Done  45 tasks      | elapsed:   49.4s
[Parallel(n_jobs=20)]: Done  58 tasks      | elapsed:  1.0min
[Parallel(n_jobs=20)]: Done  73 tasks      | elapsed:  1.3min
[Parallel(n_jobs=20)]: Done  88 tasks      | elapsed:  1.6min
[Parallel(n_jobs=20)]: Done 105 tasks      | elapsed:  1.8min
[Parallel(n_jobs=20)]: Done 122 tasks      | elapsed:  2.1min
[Parallel(n_jobs=20)]: Done 141 tasks      | elapsed:  2.4min
[Parallel(n_jobs=20)]: Done 160 tasks      | elapsed:  2.7min


In [18]:
indv_data

{'indv': array(['Bird6', 'Bird6', 'Bird6', ..., 'Bird6', 'Bird6', 'Bird6'],
       dtype='<U5'),
 'original_wav_name': array(['/mnt/cube/Datasets/BengaleseFinch/Koumura_Okanoya-paper_data/Bird6/Wave/37.wav',
        '/mnt/cube/Datasets/BengaleseFinch/Koumura_Okanoya-paper_data/Bird6/Wave/37.wav',
        '/mnt/cube/Datasets/BengaleseFinch/Koumura_Okanoya-paper_data/Bird6/Wave/37.wav',
        ...,
        '/mnt/cube/Datasets/BengaleseFinch/Koumura_Okanoya-paper_data/Bird6/Wave/154.wav',
        '/mnt/cube/Datasets/BengaleseFinch/Koumura_Okanoya-paper_data/Bird6/Wave/154.wav',
        '/mnt/cube/Datasets/BengaleseFinch/Koumura_Okanoya-paper_data/Bird6/Wave/154.wav'],
       dtype='<U79'),
 'wav_datetime': [numpy.datetime64('1900-04-25T08:00:00.000000'),
  numpy.datetime64('1900-04-25T08:00:00.000000'),
  numpy.datetime64('1900-04-25T08:00:00.000000'),
  numpy.datetime64('1900-04-25T08:00:00.000000'),
  numpy.datetime64('1900-04-25T08:00:00.000000'),
  numpy.datetime64('1900-04-25T08:00: