### Zebra finch custom parsing
- An unlabelled dataset of zebra finch vocalizations
    - ,MAT files with rate and data
- This notebook creates a JSON corresponding to each WAV file (and Noise file where available).
- Dataset origin:
    - https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0181992
    - https://osf.io/bx76r/

In [1]:
from avgn.utils.general import prepare_env



In [2]:
prepare_env()

env: CUDA_VISIBLE_DEVICES=GPU


### Import relevant packages

In [3]:
from joblib import Parallel, delayed
from tqdm.autonotebook import tqdm
import pandas as pd
pd.options.display.max_columns = None
import librosa
from datetime import datetime
import numpy as np

In [4]:
import avgn
from avgn.custom_parsing.zebra_finch_gardner import generate_json_wav_noise
from avgn.utils.paths import DATA_DIR

### Load data in original format

In [5]:
DATASET_ID = 'zebra_finch_gardner'

In [6]:
# create a unique datetime identifier for the files output by this notebook
DT_ID = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
DT_ID

'2019-10-08_10-16-03'

In [8]:
DSLOC = avgn.utils.paths.Path('/mnt/cube/Datasets/ZebraFinch/OSF-BX76R/')
DSLOC

PosixPath('/mnt/cube/Datasets/ZebraFinch/OSF-BX76R')

In [9]:
MATFILES = list((DSLOC).expanduser().glob('*/[!.]*.mat')) 
len(MATFILES), MATFILES[0]

(8, PosixPath('/mnt/cube/Datasets/ZebraFinch/OSF-BX76R/lr13/song.mat'))

In [10]:
MATFILES

[PosixPath('/mnt/cube/Datasets/ZebraFinch/OSF-BX76R/lr13/song.mat'),
 PosixPath('/mnt/cube/Datasets/ZebraFinch/OSF-BX76R/lr77/song.mat'),
 PosixPath('/mnt/cube/Datasets/ZebraFinch/OSF-BX76R/lr28/song.mat'),
 PosixPath('/mnt/cube/Datasets/ZebraFinch/OSF-BX76R/lny46/song.mat'),
 PosixPath('/mnt/cube/Datasets/ZebraFinch/OSF-BX76R/lny4rb/song.mat'),
 PosixPath('/mnt/cube/Datasets/ZebraFinch/OSF-BX76R/lr12/song.mat'),
 PosixPath('/mnt/cube/Datasets/ZebraFinch/OSF-BX76R/lny64/song.mat'),
 PosixPath('/mnt/cube/Datasets/ZebraFinch/OSF-BX76R/lny42/song.mat')]

### parse MAT and create wav/JSON

In [11]:
import h5py as h5

In [12]:
for mat_file in tqdm(MATFILES):
    indv = mat_file.parent.stem
    # try loading the MAT file
    try:
        with h5.File(mat_file) as f:
            songs = f["song"].value
            nonsongs = f["nonsong"].value
            rate = f["fs"].value
    except:
        print(indv + " failed")
        continue

    with Parallel(n_jobs=-1, verbose=10) as parallel:
        parallel(
            delayed(generate_json_wav_noise)(indv, wav_num, song, nonsong, int(rate), DT_ID)
            for wav_num, (song, nonsong) in tqdm(
                enumerate(zip(songs, nonsongs)), total=len(songs)
            )
        )

HBox(children=(IntProgress(value=0, max=8), HTML(value='')))

HBox(children=(IntProgress(value=0, max=3000), HTML(value='')))

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:   17.9s
[Parallel(n_jobs=-1)]: Done  13 tasks      | elapsed:   18.0s
[Parallel(n_jobs=-1)]: Done  24 tasks      | elapsed:   18.0s
[Parallel(n_jobs=-1)]: Done  37 tasks      | elapsed:   18.1s
[Parallel(n_jobs=-1)]: Done  50 tasks      | elapsed:   18.2s
[Parallel(n_jobs=-1)]: Done  65 tasks      | elapsed:   18.2s
[Parallel(n_jobs=-1)]: Done  80 tasks      | elapsed:   18.3s
[Parallel(n_jobs=-1)]: Batch computation too fast (0.1997s.) Setting batch_size=2.
[Parallel(n_jobs=-1)]: Done  97 tasks      | elapsed:   18.3s
[Parallel(n_jobs=-1)]: Done 114 tasks      | elapsed:   18.4s
[Parallel(n_jobs=-1)]: Done 137 tasks      | elapsed:   18.6s
[Parallel(n_jobs=-1)]: Done 175 tasks      | elapsed:   19.2s
[Parallel(n_jobs=-1)]: Done 217 tasks      | elapsed:   19.4s
[Parallel(n_jobs=-1)]: Done 259 tasks      | elapsed:   19.6s
[Parallel(n_jobs=-1)]: Done 305 ta

HBox(children=(IntProgress(value=0, max=3000), HTML(value='')))

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    1.9s
[Parallel(n_jobs=-1)]: Done  13 tasks      | elapsed:    2.0s
[Parallel(n_jobs=-1)]: Done  24 tasks      | elapsed:    2.0s
[Parallel(n_jobs=-1)]: Done  37 tasks      | elapsed:    2.1s
[Parallel(n_jobs=-1)]: Done  50 tasks      | elapsed:    2.1s
[Parallel(n_jobs=-1)]: Done  65 tasks      | elapsed:    2.2s
[Parallel(n_jobs=-1)]: Done  80 tasks      | elapsed:    2.3s
[Parallel(n_jobs=-1)]: Done  97 tasks      | elapsed:    2.4s
[Parallel(n_jobs=-1)]: Done 114 tasks      | elapsed:    2.5s
[Parallel(n_jobs=-1)]: Batch computation too fast (0.1997s.) Setting batch_size=2.
[Parallel(n_jobs=-1)]: Done 133 tasks      | elapsed:    2.5s
[Parallel(n_jobs=-1)]: Done 152 tasks      | elapsed:    2.6s
[Parallel(n_jobs=-1)]: Done 177 tasks      | elapsed:    2.8s
[Parallel(n_jobs=-1)]: Done 219 tasks      | elapsed:    2.9s
[Parallel(n_jobs=-1)]: Done 265 ta

HBox(children=(IntProgress(value=0, max=4000), HTML(value='')))

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    1.7s
[Parallel(n_jobs=-1)]: Done  13 tasks      | elapsed:    1.8s
[Parallel(n_jobs=-1)]: Done  24 tasks      | elapsed:    1.8s
[Parallel(n_jobs=-1)]: Done  37 tasks      | elapsed:    1.8s
[Parallel(n_jobs=-1)]: Done  50 tasks      | elapsed:    1.9s
[Parallel(n_jobs=-1)]: Done  65 tasks      | elapsed:    2.0s
[Parallel(n_jobs=-1)]: Batch computation too fast (0.1981s.) Setting batch_size=2.
[Parallel(n_jobs=-1)]: Done  80 tasks      | elapsed:    2.0s
[Parallel(n_jobs=-1)]: Done  97 tasks      | elapsed:    2.1s
[Parallel(n_jobs=-1)]: Done 114 tasks      | elapsed:    2.1s
[Parallel(n_jobs=-1)]: Batch computation too fast (0.1668s.) Setting batch_size=4.
[Parallel(n_jobs=-1)]: Done 151 tasks      | elapsed:    2.2s
[Parallel(n_jobs=-1)]: Done 189 tasks      | elapsed:    2.3s
[Parallel(n_jobs=-1)]: Done 251 tasks      | elapsed:    2.5s
[Parallel(n_j

lny46 failed
lny4rb failed


HBox(children=(IntProgress(value=0, max=3000), HTML(value='')))

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    1.5s
[Parallel(n_jobs=-1)]: Done  13 tasks      | elapsed:    1.6s
[Parallel(n_jobs=-1)]: Done  24 tasks      | elapsed:    1.6s
[Parallel(n_jobs=-1)]: Done  37 tasks      | elapsed:    1.7s
[Parallel(n_jobs=-1)]: Done  50 tasks      | elapsed:    1.7s
[Parallel(n_jobs=-1)]: Batch computation too fast (0.1940s.) Setting batch_size=2.
[Parallel(n_jobs=-1)]: Done  65 tasks      | elapsed:    1.7s
[Parallel(n_jobs=-1)]: Done  80 tasks      | elapsed:    1.8s
[Parallel(n_jobs=-1)]: Done  97 tasks      | elapsed:    1.8s
[Parallel(n_jobs=-1)]: Batch computation too fast (0.1992s.) Setting batch_size=4.
[Parallel(n_jobs=-1)]: Done 118 tasks      | elapsed:    1.9s
[Parallel(n_jobs=-1)]: Done 156 tasks      | elapsed:    2.0s
[Parallel(n_jobs=-1)]: Done 194 tasks      | elapsed:    2.1s
[Parallel(n_jobs=-1)]: Done 266 tasks      | elapsed:    2.7s
[Parallel(n_j

HBox(children=(IntProgress(value=0, max=2818), HTML(value='')))

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    8.2s
[Parallel(n_jobs=-1)]: Done  13 tasks      | elapsed:    8.2s
[Parallel(n_jobs=-1)]: Done  24 tasks      | elapsed:    8.2s
[Parallel(n_jobs=-1)]: Done  37 tasks      | elapsed:    8.3s
[Parallel(n_jobs=-1)]: Done  50 tasks      | elapsed:    8.3s
[Parallel(n_jobs=-1)]: Done  65 tasks      | elapsed:    8.3s
[Parallel(n_jobs=-1)]: Batch computation too fast (0.1825s.) Setting batch_size=2.
[Parallel(n_jobs=-1)]: Done  80 tasks      | elapsed:    8.4s
[Parallel(n_jobs=-1)]: Done  97 tasks      | elapsed:    9.4s
[Parallel(n_jobs=-1)]: Done 114 tasks      | elapsed:    9.5s
[Parallel(n_jobs=-1)]: Done 149 tasks      | elapsed:    9.6s
[Parallel(n_jobs=-1)]: Done 187 tasks      | elapsed:    9.7s
[Parallel(n_jobs=-1)]: Done 229 tasks      | elapsed:   10.1s
[Parallel(n_jobs=-1)]: Batch computation too slow (2.3494s.) Setting batch_size=1.
[Parallel(n_j

HBox(children=(IntProgress(value=0, max=2245), HTML(value='')))

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    0.7s
[Parallel(n_jobs=-1)]: Done  13 tasks      | elapsed:    0.7s
[Parallel(n_jobs=-1)]: Done  24 tasks      | elapsed:    0.8s
[Parallel(n_jobs=-1)]: Done  37 tasks      | elapsed:    0.8s
[Parallel(n_jobs=-1)]: Done  50 tasks      | elapsed:    0.8s
[Parallel(n_jobs=-1)]: Batch computation too fast (0.1983s.) Setting batch_size=2.
[Parallel(n_jobs=-1)]: Done  65 tasks      | elapsed:    0.9s
[Parallel(n_jobs=-1)]: Done  80 tasks      | elapsed:    0.9s
[Parallel(n_jobs=-1)]: Done  97 tasks      | elapsed:    1.0s
[Parallel(n_jobs=-1)]: Done 121 tasks      | elapsed:    1.2s
[Parallel(n_jobs=-1)]: Done 159 tasks      | elapsed:    1.8s
[Parallel(n_jobs=-1)]: Done 197 tasks      | elapsed:    1.9s
[Parallel(n_jobs=-1)]: Done 239 tasks      | elapsed:    2.0s
[Parallel(n_jobs=-1)]: Done 281 tasks      | elapsed:    2.6s
[Parallel(n_jobs=-1)]: Done 327 ta




[Parallel(n_jobs=-1)]: Done 2223 out of 2223 | elapsed:   22.1s finished
