### Macaque custom parsing
- This dataset has vocalizations from several individuals. Calls are already broken down into individual coos. FIles include :
    - .WAV files of vocalizations and filename with ID
- This notebook creates a JSON corresponding to each WAV file
- Dataset origin:
    - https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4806230/
    - https://datadryad.org/handle/10255/dryad.101951

In [1]:
from avgn.utils.general import prepare_env

In [2]:
prepare_env()

env: CUDA_VISIBLE_DEVICES=GPU


### Import relevant packages

In [3]:
from joblib import Parallel, delayed
from tqdm.autonotebook import tqdm
import pandas as pd
pd.options.display.max_columns = None
import librosa
from datetime import datetime
import numpy as np



In [4]:
import avgn
from avgn.custom_parsing.fukushima_macaque import generate_json
from avgn.utils.paths import DATA_DIR

### Load data in original format

In [5]:
DATASET_ID = 'macaque_coo'

In [6]:
# create a unique datetime identifier for the files output by this notebook
DT_ID = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
DT_ID

'2019-06-23_21-14-59'

In [7]:
DSLOC = avgn.utils.paths.Path('/mnt/cube/Datasets/Macaque/')
DSLOC

PosixPath('/mnt/cube/Datasets/Macaque')

In [8]:
WAVLIST = list((DSLOC / 'dataupload').expanduser().glob('*/*.wav'))

In [14]:
wav_df = pd.DataFrame(
    [[wav, wav.parent.stem, int(wav.stem[2:])] for wav in tqdm(WAVLIST)],
    columns=["wavloc", "indv", "idnum"],
)

HBox(children=(IntProgress(value=0, max=7285), HTML(value='')))

In [15]:
print(len(wav_df))
wav_df[:3]

7285


Unnamed: 0,wavloc,indv,idnum
0,/mnt/cube/Datasets/Macaque/dataupload/IO/IO184...,IO,184
1,/mnt/cube/Datasets/Macaque/dataupload/IO/IO623...,IO,623
2,/mnt/cube/Datasets/Macaque/dataupload/IO/IO997...,IO,997


### Generate JSONS

In [17]:
with Parallel(n_jobs=-1, verbose=10) as parallel:
    parallel(
        delayed(generate_json)(row, DT_ID)
        for idx, row in tqdm(wav_df.iterrows(), total=len(wav_df))
    )

HBox(children=(IntProgress(value=0, max=7285), HTML(value='')))

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    5.0s
[Parallel(n_jobs=-1)]: Done  13 tasks      | elapsed:    5.0s
[Parallel(n_jobs=-1)]: Done  24 tasks      | elapsed:    5.1s
[Parallel(n_jobs=-1)]: Done  37 tasks      | elapsed:    5.1s
[Parallel(n_jobs=-1)]: Done  50 tasks      | elapsed:    5.1s
[Parallel(n_jobs=-1)]: Done  65 tasks      | elapsed:    5.2s
[Parallel(n_jobs=-1)]: Done  80 tasks      | elapsed:    5.2s
[Parallel(n_jobs=-1)]: Batch computation too fast (0.1706s.) Setting batch_size=2.
[Parallel(n_jobs=-1)]: Done  97 tasks      | elapsed:    5.2s
[Parallel(n_jobs=-1)]: Done 114 tasks      | elapsed:    5.2s
[Parallel(n_jobs=-1)]: Batch computation too fast (0.0469s.) Setting batch_size=16.
[Parallel(n_jobs=-1)]: Done 138 tasks      | elapsed:    5.3s
[Parallel(n_jobs=-1)]: Done 176 tasks      | elapsed:    5.5s
[Parallel(n_jobs=-1)]: Done 218 tasks      | elapsed:    5.6s
[Parallel(n_