### this segmentation is a sub-segmentation of the original segmentations provided by the dataset

1) create a dataset of syllable waveforms for each isolation segment

2) get segmented times for each syllable in that segment

3) create a new dataframe with the isolated syllables

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import joblib
joblib.Parallel(n_jobs=-1, verbose=1)(
            joblib.delayed(print)(a) 
                 for a in range(1000)
        )

In [None]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from tqdm.autonotebook import tqdm
from joblib import Parallel, delayed
import umap
import pandas as pd
from datetime import datetime


In [None]:
import joblib
joblib.Parallel(n_jobs=-1, verbose=1)(
            joblib.delayed(print)(a) 
                 for a in range(1000)
        )

In [None]:
from avgn.utils.paths import DATA_DIR, most_recent_subdirectory, ensure_dir

In [None]:
DATASET_ID = 'batsong'

In [None]:
# create a unique datetime identifier for the files output by this notebook
DT_ID = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
DT_ID

In [None]:
from avgn.utils.hparams import HParams
from avgn.dataset import DataSet

In [None]:
from avgn.signalprocessing.create_spectrogram_dataset import prepare_wav, create_label_df, get_row_audio

### create dataset

In [None]:
hparams = HParams(
    num_mel_bins = 32,
    mel_lower_edge_hertz=500,
    mel_upper_edge_hertz=120000,
    butter_lowcut = 500,
    butter_highcut = 120000,
    ref_level_db = 20,
    min_level_db = -60,
    mask_spec = True,
    win_length_ms = 0.5,
    hop_length_ms = 0.05,
    mask_spec_kwargs = {"spec_thresh": 0.9, "offset": 1e-10},
    n_jobs = -1,
    verbosity=1,
    nex = -1
)

In [None]:
# create a dataset object
dataset = DataSet(DATASET_ID, hparams = hparams)

In [None]:
# dataset.data_files = {i:dataset.data_files[i] for i in list(dataset.data_files.keys())[:5]}

In [None]:
dataset.sample_json

In [None]:
len(dataset.data_files)

#### Create dataset based upon JSON

In [None]:
from joblib import Parallel, delayed
n_jobs = -1; verbosity = 10

In [None]:
with Parallel(n_jobs=n_jobs, verbose=verbosity) as parallel:
    syllable_dfs = parallel(
        delayed(create_label_df)(
            dataset.data_files[key].data,
            hparams=dataset.hparams,
            labels_to_retain=["context"],
            unit="syllables",
            dict_features_to_retain = [],
            key = key,
        )
        for key in tqdm(dataset.data_files.keys())
    )
syllable_df = pd.concat(syllable_dfs)
len(syllable_df)

In [None]:
import joblib
joblib.Parallel(n_jobs=-1, verbose=1)(
            joblib.delayed(print)(a) 
                 for a in range(1000)
        )

In [None]:
syllable_df[:3]

In [None]:
syllable_df.context.unique()

In [None]:
from vocalseg.dynamic_thresholding import dynamic_threshold_segmentation
from vocalseg.dynamic_thresholding import plot_segmented_spec, plot_segmentations
from vocalseg.utils import butter_bandpass_filter, spectrogram, int16tofloat32, plot_spec
import warnings
warnings.filterwarnings('ignore')

#### Save audio df for each context since this is a big dataset

In [None]:
n_fft=1024
hop_length_ms=.5
win_length_ms=4
ref_level_db=20
pre=0.97
min_level_db=-50
silence_threshold = 0.02
min_silence_for_spec=0.1
max_vocal_for_spec=1.0,
min_syllable_length_s = 0.01
spectral_range = [5000, 60000]

In [None]:
syllable_df[:2]

In [None]:
from avgn.utils.audio import int16_to_float32
from avgn.signalprocessing.filtering import butter_bandpass_filter
import librosa

In [None]:
syllable_df[:3]

In [None]:
db_delta = 5

In [None]:
butter_lowcut = 500
butter_highcut = 120000
def segment_spec_custom(indv, key, start_time, end_time, context, json_dict, save=False, plot=False):
    # load wav
    wav_loc = dataset.data_files[key].data['wav_loc']
    duration = end_time - start_time
    data, rate = librosa.core.load(wav_loc, sr=None, offset = start_time, duration = duration)
    
    # filter data
    data = butter_bandpass_filter(data, butter_lowcut, butter_highcut, rate)
    
    # segment
    results = dynamic_threshold_segmentation(
        data,
        rate,
        n_fft=n_fft,
        hop_length_ms=hop_length_ms,
        win_length_ms=win_length_ms,
        min_level_db_floor=20,
        db_delta=db_delta,
        ref_level_db=ref_level_db,
        pre=pre,
        
        min_silence_for_spec=min_silence_for_spec,
        max_vocal_for_spec=max_vocal_for_spec,
        min_level_db=min_level_db,
        silence_threshold=silence_threshold,
        verbose=False,
        min_syllable_length_s=min_syllable_length_s,
        spectral_range=spectral_range,
    )
    
    if results is None:
        return
    
    if plot:
        plot_segmentations(
            results["spec"],
            results["vocal_envelope"],
            results["onsets"],
            results["offsets"],
            hop_length_ms,
            rate,
            figsize=(100, 5)
        )
        plt.show()
    
    # save the results
    json_out = DATA_DIR / "processed" / (DATASET_ID + "_segmented") / DT_ID / "JSON" / (
        key + ".JSON"
    )
    json_dict['context'] = context
    json_dict["indvs"][indv]["syllables"] = {
        "start_times": NoIndent(list(results["onsets"])),
        "end_times": NoIndent(list(results["offsets"])),
    }

    json_txt = json.dumps(json_dict, cls=NoIndentEncoder, indent=2)
    # save json
    if save:
        ensure_dir(json_out.as_posix())
        print(json_txt, file=open(json_out.as_posix(), "w"))

    #print(json_txt)

In [None]:
import joblib
from avgn.utils.json import NoIndent, NoIndentEncoder

In [None]:
syllable_df[:3]

In [None]:
iters = [[row.indv, row.key, row.start_time, row.end_time, row.context] for idx, row in tqdm(syllable_df.iterrows(), total = len(syllable_df))]

In [None]:
nex = 3
joblib.Parallel(n_jobs=1, verbose=11)(
            joblib.delayed(segment_spec_custom)( indv, key, start_time, end_time, context, dataset.data_files[key].data.copy(), plot=True) 
                 for indv, key, start_time, end_time, context in tqdm(iters[:nex])
        )

In [None]:
syllable_df[:3]

In [None]:
nex = -1
n_jobs = 1
joblib.Parallel(n_jobs=n_jobs, verbose=11)(
            joblib.delayed(segment_spec_custom)(indv, key, start_time, end_time, context, dataset.data_files[key].data.copy(), save = True, plot=False) 
                 for indv, key, start_time, end_time, context in tqdm(iters[:nex])
        )