### Bengalese finch (Sober lab) custom parsing
- An labelled dataset of Bengalese finch vocalizations
    - .MAT files with labels
    - .BIN files with 
- This notebook creates a JSON corresponding to each WAV file (and Noise file where available).
- Dataset origin:
    - https://zenodo.org/record/3237218

In [1]:
from avgn.utils.general import prepare_env



In [2]:
prepare_env()

env: CUDA_VISIBLE_DEVICES=GPU


### Import relevant packages

In [3]:
from joblib import Parallel, delayed
from tqdm.autonotebook import tqdm
import pandas as pd
pd.options.display.max_columns = None
import librosa
from datetime import datetime
import numpy as np

In [4]:
import avgn
from avgn.custom_parsing.bengalese_finch_sober import (
    load_cbin,
    generate_json_wav,
    parse_song_df,
)
from avgn.utils.paths import DATA_DIR

### Load data in original format

In [5]:
DATASET_ID = 'bengalese_finch_sober'

In [6]:
# create a unique datetime identifier for the files output by this notebook
DT_ID = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
DT_ID

'2019-10-10_18-44-37'

In [7]:
DSLOC = avgn.utils.paths.Path('/mnt/cube/Datasets/BengaleseFinch/sober/')
DSLOC

PosixPath('/mnt/cube/Datasets/BengaleseFinch/sober')

In [8]:
MATLIST = list((DSLOC).expanduser().glob('*/*.not.mat'))
len(MATLIST), MATLIST[0]

(2664,
 PosixPath('/mnt/cube/Datasets/BengaleseFinch/sober/032212/gy6or6_baseline_220312_1744.1688.cbin.not.mat'))

In [9]:
CBINLIST = list((DSLOC).expanduser().glob('*/*.cbin'))
len(CBINLIST), CBINLIST[0]

(3546,
 PosixPath('/mnt/cube/Datasets/BengaleseFinch/sober/032212/gy6or6_washout_130312_1303.5709.cbin'))

In [10]:
song_df = parse_song_df(MATLIST)

HBox(children=(IntProgress(value=0, max=2664), HTML(value='')))




  song_df.set_value(idx, "rec_num", idxi)


In [11]:
cbin_names = np.array([i.name for i in CBINLIST])

In [12]:
song_df[:3]

Unnamed: 0,index,bird,species,stime,syllables,start_times,end_times,bout_duration,syll_lens,day,wavname,rate,NumNote,rec_num
0,0,gy6or6,BF,2012-03-22 17:44:00,"[i, i, i, i, i, i, i, i, i, i, i, a, b, c, d, ...","[0.023375, 0.18903125, 0.42575, 0.60078125, 0....","[0.08803125, 0.2813125, 0.5279375, 0.7005625, ...",8.446781,"[0.06465625, 0.09228125, 0.1021875, 0.09978125...",2012-03-22,gy6or6_baseline_220312_1744.1688.cbin,32000,70,218
1,1,gy6or6,BF,2012-03-22 12:59:00,"[i, i, i, i, i, i, i, i, i, i, i, a, b, c, d, ...","[0.02746875, 0.18878125, 0.35603125, 0.546125,...","[0.07509375, 0.2473125, 0.43353125, 0.63846875...",10.279844,"[0.047625, 0.05853125, 0.0775, 0.09234375, 0.1...",2012-03-22,gy6or6_baseline_220312_1259.990.cbin,32000,73,149
2,2,gy6or6,BF,2012-03-22 09:06:00,"[i, i, i, i, i, i, i, a, b, c, d, e, e, f, g, ...","[0.75171875, 0.9445, 1.13971875, 1.32734375, 1...","[0.8020625, 1.01359375, 1.22996875, 1.40834375...",10.561281,"[0.05034375, 0.06909375000000012, 0.09025, 0.0...",2012-03-22,gy6or6_baseline_220312_0906.117.cbin,32000,82,36


### Generate JSON and WAV files

In [13]:
Parallel(n_jobs=-1, verbose=10)(
    delayed(generate_json_wav)(row, CBINLIST, cbin_names, DT_ID)
    for idx, row in tqdm(song_df.iterrows(), total=len(song_df))
);

HBox(children=(IntProgress(value=0, max=2664), HTML(value='')))

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:   13.1s
[Parallel(n_jobs=-1)]: Done  13 tasks      | elapsed:   13.2s
[Parallel(n_jobs=-1)]: Done  24 tasks      | elapsed:   13.8s
[Parallel(n_jobs=-1)]: Done  37 tasks      | elapsed:   15.2s
[Parallel(n_jobs=-1)]: Done  50 tasks      | elapsed:   17.0s
[Parallel(n_jobs=-1)]: Done  65 tasks      | elapsed:   19.3s
[Parallel(n_jobs=-1)]: Done  80 tasks      | elapsed:   21.4s
[Parallel(n_jobs=-1)]: Done  97 tasks      | elapsed:   24.0s
[Parallel(n_jobs=-1)]: Done 114 tasks      | elapsed:   26.4s
[Parallel(n_jobs=-1)]: Done 133 tasks      | elapsed:   29.4s
[Parallel(n_jobs=-1)]: Done 152 tasks      | elapsed:   32.3s
[Parallel(n_jobs=-1)]: Done 173 tasks      | elapsed:   35.2s
[Parallel(n_jobs=-1)]: Done 194 tasks      | elapsed:   38.2s
[Parallel(n_jobs=-1)]: Done 217 tasks      | elapsed:   41.7s
[Parallel(n_jobs=-1)]: Done 240 tasks      | elapsed:  




[Parallel(n_jobs=-1)]: Done 2664 out of 2664 | elapsed:  6.8min finished
