### this segmentation is a sub-segmentation of the original segmentations provided by the dataset

1) create a dataset of syllable waveforms for each isolation segment

2) get segmented times for each syllable in that segment

3) create a new dataframe with the isolated syllables

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from tqdm.autonotebook import tqdm
from joblib import Parallel, delayed
import umap
import pandas as pd
from datetime import datetime




In [3]:
from avgn.utils.paths import DATA_DIR, most_recent_subdirectory, ensure_dir

In [4]:
DATASET_ID = 'batsong'

In [5]:
# create a unique datetime identifier for the files output by this notebook
DT_ID = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
DT_ID

'2019-11-11_15-26-37'

In [6]:
from avgn.utils.hparams import HParams
from avgn.dataset import DataSet

In [7]:
from avgn.signalprocessing.create_spectrogram_dataset import prepare_wav, create_label_df, get_row_audio

### create dataset

In [8]:
hparams = HParams(
    num_mel_bins = 32,
    mel_lower_edge_hertz=500,
    mel_upper_edge_hertz=120000,
    butter_lowcut = 500,
    butter_highcut = 120000,
    ref_level_db = 20,
    min_level_db = -60,
    mask_spec = True,
    win_length_ms = 0.5,
    hop_length_ms = 0.05,
    mask_spec_kwargs = {"spec_thresh": 0.9, "offset": 1e-10},
    n_jobs = -1,
    verbosity=1,
    nex = -1
)

In [9]:
# create a dataset object
dataset = DataSet(DATASET_ID, hparams = hparams)

HBox(children=(IntProgress(value=0, description='loading json', max=83884, style=ProgressStyle(description_wid…

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    4.3s
[Parallel(n_jobs=-1)]: Done 1319 tasks      | elapsed:    4.9s
[Parallel(n_jobs=-1)]: Done 17819 tasks      | elapsed:    7.0s
[Parallel(n_jobs=-1)]: Done 40919 tasks      | elapsed:    9.8s
[Parallel(n_jobs=-1)]: Done 70619 tasks      | elapsed:   13.6s





[Parallel(n_jobs=-1)]: Done 83884 out of 83884 | elapsed:   15.2s finished


HBox(children=(IntProgress(value=0, description='getting unique individuals', max=83884, style=ProgressStyle(d…



In [10]:
# dataset.data_files = {i:dataset.data_files[i] for i in list(dataset.data_files.keys())[:5]}

In [11]:
dataset.sample_json

OrderedDict([('fid', 166697),
             ('Treatment ID', 17),
             ('Recording channel', 1),
             ('Recording time', '2013-01-16 08:06:24'),
             ('indvs',
              OrderedDict([('-210',
                            OrderedDict([('syllables',
                                          OrderedDict([('start_times',
                                                        [4e-06]),
                                                       ('end_times',
                                                        [1.543488]),
                                                       ('addressee', [-207]),
                                                       ('context',
                                                        ['Threat-like']),
                                                       ('emit_prevoc_act',
                                                        [2]),
                                                       ('add_prevoc_act', [2]),
               

In [12]:
len(dataset.data_files)

83884

#### Create dataset based upon JSON

In [13]:
from joblib import Parallel, delayed
n_jobs = -1; verbosity = 10

In [14]:
with Parallel(n_jobs=n_jobs, verbose=verbosity) as parallel:
    syllable_dfs = parallel(
        delayed(create_label_df)(
            dataset.data_files[key].data,
            hparams=dataset.hparams,
            labels_to_retain=["context"],
            unit="syllables",
            dict_features_to_retain = [],
            key = key,
        )
        for key in tqdm(dataset.data_files.keys())
    )
syllable_df = pd.concat(syllable_dfs)
len(syllable_df)

HBox(children=(IntProgress(value=0, max=83884), HTML(value='')))

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    0.8s
[Parallel(n_jobs=-1)]: Done  13 tasks      | elapsed:    0.9s
[Parallel(n_jobs=-1)]: Done  24 tasks      | elapsed:    0.9s
[Parallel(n_jobs=-1)]: Done  37 tasks      | elapsed:    0.9s
[Parallel(n_jobs=-1)]: Done  50 tasks      | elapsed:    1.0s
[Parallel(n_jobs=-1)]: Done  65 tasks      | elapsed:    1.0s
[Parallel(n_jobs=-1)]: Batch computation too fast (0.1841s.) Setting batch_size=2.
[Parallel(n_jobs=-1)]: Done  80 tasks      | elapsed:    1.1s
[Parallel(n_jobs=-1)]: Done  97 tasks      | elapsed:    1.1s
[Parallel(n_jobs=-1)]: Done 114 tasks      | elapsed:    1.1s
[Parallel(n_jobs=-1)]: Batch computation too fast (0.1301s.) Setting batch_size=6.
[Parallel(n_jobs=-1)]: Done 151 tasks      | elapsed:    1.2s
[Parallel(n_jobs=-1)]: Done 189 tasks      | elapsed:    1.3s
[Parallel(n_jobs=-1)]: Done 271 tasks      | elapsed:    1.5s
[Parallel(n_j

[Parallel(n_jobs=-1)]: Done 54433 tasks      | elapsed:  1.4min
[Parallel(n_jobs=-1)]: Done 55255 tasks      | elapsed:  1.4min
[Parallel(n_jobs=-1)]: Done 56077 tasks      | elapsed:  1.4min
[Parallel(n_jobs=-1)]: Done 56911 tasks      | elapsed:  1.5min
[Parallel(n_jobs=-1)]: Done 57745 tasks      | elapsed:  1.5min
[Parallel(n_jobs=-1)]: Done 58591 tasks      | elapsed:  1.5min
[Parallel(n_jobs=-1)]: Done 59437 tasks      | elapsed:  1.5min
[Parallel(n_jobs=-1)]: Done 60295 tasks      | elapsed:  1.5min
[Parallel(n_jobs=-1)]: Done 61153 tasks      | elapsed:  1.6min
[Parallel(n_jobs=-1)]: Done 62023 tasks      | elapsed:  1.6min
[Parallel(n_jobs=-1)]: Done 62893 tasks      | elapsed:  1.6min
[Parallel(n_jobs=-1)]: Done 63775 tasks      | elapsed:  1.7min
[Parallel(n_jobs=-1)]: Done 64657 tasks      | elapsed:  1.7min
[Parallel(n_jobs=-1)]: Done 65551 tasks      | elapsed:  1.7min
[Parallel(n_jobs=-1)]: Done 66445 tasks      | elapsed:  1.7min
[Parallel(n_jobs=-1)]: Done 67351 tasks 




[Parallel(n_jobs=-1)]: Done 83884 out of 83884 | elapsed:  2.2min finished


86867

In [15]:
syllable_df[:3]

Unnamed: 0,start_time,end_time,context,indv,indvi,key
0,4e-06,1.543488,Threat-like,-210,0,130116080549387242
0,4e-06,3.247424,Sleeping,-221,0,130303053329639859
0,1.002284,2.16608,Isolation,222,0,121204031642219643


In [16]:
syllable_df.context.unique()

array(['Threat-like', 'Sleeping', 'Isolation', 'Fighting', 'General',
       'Mating protest', 'Biting', 'Feeding', 'Kissing', 'Separation',
       'Unknown', 'Grooming', 'Landing'], dtype=object)

In [17]:
from vocalseg.dynamic_thresholding import dynamic_threshold_segmentation
from vocalseg.dynamic_thresholding import plot_segmented_spec, plot_segmentations
from vocalseg.utils import butter_bandpass_filter, spectrogram, int16tofloat32, plot_spec
import warnings
warnings.filterwarnings('ignore')

#### Save audio df for each context since this is a big dataset

In [18]:
n_fft=1024
hop_length_ms=.5
win_length_ms=4
ref_level_db=20
pre=0.97
min_level_db=-30
silence_threshold = 0.1
min_silence_for_spec=0.1
max_vocal_for_spec=1.0,
min_syllable_length_s = 0.01
spectral_range = [5000, 60000]

In [19]:
syllable_df[:2]

Unnamed: 0,start_time,end_time,context,indv,indvi,key
0,4e-06,1.543488,Threat-like,-210,0,130116080549387242
0,4e-06,3.247424,Sleeping,-221,0,130303053329639859


In [26]:
from avgn.utils.audio import int16_to_float32
from avgn.signalprocessing.filtering import butter_bandpass_filter
import librosa

In [31]:
syllable_df[:3]

Unnamed: 0,start_time,end_time,context,indv,indvi,key
0,4e-06,1.543488,Threat-like,-210,0,130116080549387242
0,4e-06,3.247424,Sleeping,-221,0,130303053329639859
0,1.002284,2.16608,Isolation,222,0,121204031642219643


In [44]:
butter_lowcut = 500
butter_highcut = 120000
def create_segmented_json(row):
    # load audio
    # get rate and date
    wav_loc = dataset.data_files[row.key].data['wav_loc']
    duration = row.end_time - row.start_time
    """data, rate = librosa.core.load(wav_loc, sr=None, offset = row.start_time, duration = duration)

    # convert data if needed
    if np.issubdtype(type(data[0]), np.integer):
        data = int16_to_float32(data)
    # bandpass filter
    data = butter_bandpass_filter(
        data, butter_lowcut, butter_highcut, rate, order=5
    )

    results = dynamic_threshold_segmentation(
        data,
        rate,
        n_fft=n_fft,
        hop_length_ms=hop_length_ms,
        win_length_ms=win_length_ms,
        ref_level_db=ref_level_db,
        pre=pre,
        min_level_db=min_level_db,
        silence_threshold=silence_threshold,
        verbose=False,
        spectral_range=spectral_range,
        min_syllable_length_s=min_syllable_length_s,
        min_level_db_floor=20,

    )
    if results is None:
        return None, None
    return results['onsets'], results['offsets']"""

In [None]:
warnings.filterwarnings('ignore')

with Parallel(n_jobs=-1, verbose=verbosity) as parallel:
    onsets_offsets = parallel(
        delayed(create_segmented_json)(
            row
        )
        for idx, row in tqdm(syllable_df.iterrows(), total=len(syllable_df))
    )
len(onsets_offsets)

HBox(children=(IntProgress(value=0, max=86867), HTML(value='')))

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers.
[Parallel(n_jobs=-1)]: Batch computation too fast (0.1850s.) Setting batch_size=2.


KeyboardInterrupt: 

In [None]:
breakme

In [None]:
syllable_df['start_times'] = start_times
syllable_df['end_times'] = end_times

In [None]:
display_results=False
start_times = []
end_times = []
for idx, row in tqdm(syllable_df.iterrows(), total=len(syllable_df)):
    
    # load audio
    rate, data = prepare_wav(dataset.data_files[row.key].data['wav_loc'], hparams)
    
    results = dynamic_threshold_segmentation(
        data,
        rate,
        n_fft=n_fft,
        hop_length_ms=hop_length_ms,
        win_length_ms=win_length_ms,
        ref_level_db=ref_level_db,
        pre=pre,
        min_level_db=min_level_db,
        
        silence_threshold=silence_threshold,
        verbose=False,
        spectral_range=spectral_range,
        min_syllable_length_s=min_syllable_length_s,
                min_level_db_floor=20,

    )
    
    if results is None:
        if display_results:
            spec = spectrogram(
                row.audio,
                row.rate,
                n_fft=n_fft,
                hop_length_ms=hop_length_ms,
                win_length_ms=win_length_ms,
                ref_level_db=ref_level_db,
                pre=pre,
                min_level_db=min_level_db,
            )
            fig, ax = plt.subplots(figsize=(30,5))
            plot_spec(spec, fig, ax);
            plt.show()
        start_times.append(None)
        end_times.append(None)
        continue
    else:
        if display_results:
            plot_segmentations(
                results["spec"],
                vocal_envelope=results["vocal_envelope"],
                onsets=results["onsets"],
                offsets=results["offsets"],
                rate=row.rate,
                hop_length_ms=hop_length_ms,
                figsize=(30, 5),
                #spectral_range=spectral_range
            )
            plt.show()
            
    start_times.append(results['onsets'])
    end_times.append(results['offsets'])

In [None]:
warnings.filterwarnings('ignore')

for context in tqdm(syllable_df.context.unique()):
    
    save_loc = (DATA_DIR
        / "audio_df"
        / (DATASET_ID + context + '.pickle'))
    
    if save_loc.exists():
        continue 
        
    # get only isolation calls
    subset_df = syllable_df[syllable_df.context == context]
    print(context, len(subset_df))
    
    with Parallel(n_jobs=n_jobs, verbose=0) as parallel:
        syllable_dfs = parallel(
            delayed(get_row_audio)(
                subset_df[subset_df.key == key], 
                dataset.data_files[key].data['wav_loc'], 
                dataset.hparams
            )
            for key in tqdm(subset_df.key.unique(), leave=False)
        )
    subset_df = pd.concat(syllable_dfs)
    print(len(subset_df))
    
    # mask short audio
    df_mask  = np.array([len(i) > 0 for i in tqdm(subset_df.audio.values)])
    subset_df = subset_df[np.array(df_mask)]
    
    # normalize audio
    subset_df['audio'] = [i/np.max(np.abs(i)) for i in tqdm(subset_df.audio.values)]
    
    # plot
    nrows = 5; ncols = 10
    zoom = 2
    fig, axs = plt.subplots(ncols=ncols, nrows = nrows,figsize = (ncols*zoom, nrows+zoom/1.5))
    for i, syll in tqdm(enumerate(subset_df.audio.values), total = nrows*ncols):
        ax = axs.flatten()[i]
        ax.plot(syll)
        if i == nrows*ncols -1:
            break
    plt.show()
    
    # save
    save_loc = (DATA_DIR
        / "audio_df"
        / (DATASET_ID + context + '.pickle'))
    ensure_dir(save_loc)
    subset_df.to_pickle(save_loc)