### Zebra finch custom parsing
- An unlabelled dataset of zebra finch vocalizations
    - ,MAT files with rate and data
- This notebook creates a JSON corresponding to each WAV file (and Noise file where available).
- Dataset origin:
    - https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0181992
    - https://osf.io/bx76r/

In [1]:
from avgn.utils.general import prepare_env

In [2]:
prepare_env()

env: CUDA_VISIBLE_DEVICES=GPU


### Import relevant packages

In [3]:
from joblib import Parallel, delayed
from tqdm.autonotebook import tqdm
import pandas as pd
pd.options.display.max_columns = None
import librosa
from datetime import datetime
import numpy as np



In [4]:
import avgn
from avgn.custom_parsing.zebra_finch_gardner import generate_json_wav_noise
from avgn.utils.paths import DATA_DIR

### Load data in original format

In [5]:
DATASET_ID = 'zebra_finch_gardner'

In [6]:
# create a unique datetime identifier for the files output by this notebook
DT_ID = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
DT_ID

'2019-06-26_11-51-49'

In [7]:
DSLOC = avgn.utils.paths.Path('/mnt/cube/Datasets/ZebraFinch/OSF-BX76R/')
DSLOC

PosixPath('/mnt/cube/Datasets/ZebraFinch/OSF-BX76R')

In [8]:
MATFILES = list((DSLOC).expanduser().glob('*/[!.]*.mat')) 
len(MATFILES), MATFILES[0]

(8, PosixPath('/mnt/cube/Datasets/ZebraFinch/OSF-BX76R/lr13/song.mat'))

### parse MAT and create wav/JSON

In [9]:
import h5py as h5

In [10]:
for mat_file in tqdm(MATFILES):
    indv = mat_file.parent.stem
    # try loading the MAT file
    try:
        with h5.File(mat_file) as f:
            songs = f["song"].value
            nonsongs = f["nonsong"].value
            rate = f["fs"].value
    except:
        print(indv + " failed")
        continue

    with Parallel(n_jobs=-1, verbose=10) as parallel:
        parallel(
            delayed(generate_json_wav_noise)(indv, wav_num, song, nonsong, int(rate), DT_ID)
            for wav_num, (song, nonsong) in tqdm(
                enumerate(zip(songs, nonsongs)), total=len(songs)
            )
        )
    break

HBox(children=(IntProgress(value=0, max=8), HTML(value='')))

HBox(children=(IntProgress(value=0, max=3000), HTML(value='')))

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    3.1s
[Parallel(n_jobs=-1)]: Done  13 tasks      | elapsed:    3.2s
[Parallel(n_jobs=-1)]: Done  24 tasks      | elapsed:    3.2s
[Parallel(n_jobs=-1)]: Done  37 tasks      | elapsed:    3.3s
[Parallel(n_jobs=-1)]: Batch computation too fast (0.1946s.) Setting batch_size=2.
[Parallel(n_jobs=-1)]: Done  50 tasks      | elapsed:    3.3s
[Parallel(n_jobs=-1)]: Done  65 tasks      | elapsed:    3.4s
[Parallel(n_jobs=-1)]: Batch computation too fast (0.1451s.) Setting batch_size=4.
[Parallel(n_jobs=-1)]: Done  82 tasks      | elapsed:    3.5s
[Parallel(n_jobs=-1)]: Done 113 tasks      | elapsed:    3.6s
[Parallel(n_jobs=-1)]: Done 145 tasks      | elapsed:    3.7s
[Parallel(n_jobs=-1)]: Done 204 tasks      | elapsed:    3.9s
[Parallel(n_jobs=-1)]: Done 274 tasks      | elapsed:    4.2s
[Parallel(n_jobs=-1)]: Done 349 tasks      | elapsed:    4.5s
[Parallel(n_j