In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from tqdm.autonotebook import tqdm
from joblib import Parallel, delayed
import umap
import pandas as pd



In [3]:
from scipy.io import loadmat
from pathlib2 import Path

In [11]:
from avgn.utils.hparams import HParams
from avgn.dataset import DataSet

In [6]:
from vocalseg.utils import butter_bandpass_filter, spectrogram, int16tofloat32, plot_spec
from vocalseg.dynamic_thresholding import dynamic_threshold_segmentation

In [7]:
DATASET_ID = 'gibbon_morita_segmented'

In [8]:
### segmentation parameters
n_fft=1024
hop_length_ms=5
win_length_ms=20
ref_level_db=20
pre=0.97
min_level_db=-70
min_level_db_floor = -50
db_delta = 5
silence_threshold = 0.01
min_silence_for_spec=0.05
max_vocal_for_spec=10,
min_syllable_length_s = 0.5
butter_min = 50
butter_max = 7999
spectral_range = [50, 7999]

In [9]:
from avgn.utils.paths import DATA_DIR, most_recent_subdirectory, ensure_dir

In [13]:
hparams = HParams(
    butter_lowcut = 50,
    butter_highcut = 7999,
mel_upper_edge_hertz=7999,)
# create a dataset object

dataset = DataSet(DATASET_ID, hparams = hparams)

HBox(children=(IntProgress(value=0, description='loading json', max=128, style=ProgressStyle(description_width…




[Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers.
[Parallel(n_jobs=-1)]: Done 128 out of 128 | elapsed:    0.1s finished


HBox(children=(IntProgress(value=0, description='getting unique individuals', max=128, style=ProgressStyle(des…



In [14]:
dataset.sample_json

OrderedDict([('datetime', '2017-09-12_07-00-02'),
             ('species', 'Hylobatidae'),
             ('common_name', 'Gibbon'),
             ('samplerate_hz', 16000),
             ('original_wav',
              '/mnt/cube/tsainbur/Projects/github_repos/avgn_paper/data/raw/gibbon_morita/gibbon_superregular_data/test_data/20170912-070002_01_ch.wav'),
             ('length_s', 1800.0),
             ('wav_loc',
              '/mnt/cube/tsainbur/Projects/github_repos/avgn_paper/data/raw/gibbon_morita/gibbon_superregular_data/test_data/20170912-070002_01_ch.wav'),
             ('indvs',
              OrderedDict([('UNK',
                            OrderedDict([('syllables',
                                          OrderedDict([('start_times',
                                                        [349.04,
                                                         390.04,
                                                         422.44,
                                                     

In [15]:
import librosa
from vocalseg.utils import butter_bandpass_filter, spectrogram, int16tofloat32, plot_spec
from vocalseg.dynamic_thresholding import dynamic_threshold_segmentation
from PIL import Image
from avgn.umap import umap_reduce
import seaborn as sns

In [16]:
from itertools import islice

def window(seq, n=2):
    "Returns a sliding window (of width n) over data from the iterable"
    "   s -> (s0,s1,...s[n-1]), (s1,s2,...,sn), ...                   "
    it = iter(seq)
    result = tuple(islice(it, n))
    if len(result) == n:
        yield result
    for elem in it:
        result = result[1:] + (elem,)
        yield result

In [17]:
# for each wav, compute the spectrogram, break the spectrogram up into multiple segments, then project into PCA, then plot PCA projections over time

In [18]:
indvs = np.array([list(i)[0] for i in dataset.json_indv])
np.unique(indvs)

array(['UNK'], dtype='<U3')

In [125]:
indv = 'UNK' 
indv_keys = np.array(list(dataset.data_files.keys()))[indvs==indv]
silence_thresh_seconds = 25

In [137]:
window_list = [200]
window_length_ms = window_list[0]
specs = []
key_windows = {i:{} for i in window_list}

In [138]:
### for each key, get bouts with pauses > 10 seconds, save the windows from that bout, the key, the start and end time of the bout

In [None]:
from itertools import islice

def window(seq, n=2, skip = 100):
    "Returns a sliding window (of width n) over data from the iterable"
    "   s -> (s0,s1,...s[n-1]), (s1,s2,...,sn), ...                   "
    it = iter(seq)
    result = tuple(islice(it, n))
    if len(result) == n:
        yield result
    for elem in it:
        result = result[1:] + (elem,)
        yield result

In [142]:
windows_df = pd.DataFrame(columns = ['key', 'bout_number', 'bout_start_time', 'bout_end_time', 'bout_window', 'bout_spec'])

for key in tqdm(indv_keys):    
    # load data
    wf = dataset.data_files[key].data['wav_loc']
    data, rate = librosa.core.load(wf, sr=None)
    data = butter_bandpass_filter(data, 500, 7999, rate, order = 2)
    
    # get start and end times
    start_times = dataset.data_files[key].data['indvs']['UNK']['syllables']['start_times']
    end_times = dataset.data_files[key].data['indvs']['UNK']['syllables']['end_times']
    if len(start_times) < 20: 
        continue
    
    # get split times
    split_points = [False] + list(np.array(start_times[1:]) - (end_times[:-1]) > silence_thresh_seconds)
    split_point_locs = [0] + list(np.where(split_points)[0])
    if split_point_locs[-1] != len(split_points) -1:
        split_point_locs = split_point_locs + [len(split_points) -1]

    # get the spectrogram
    # segment
    results = dynamic_threshold_segmentation(
        data,
        rate,
        n_fft=n_fft,
        hop_length_ms=hop_length_ms,
        win_length_ms=win_length_ms,
        min_level_db_floor=min_level_db_floor,
        db_delta=db_delta,
        ref_level_db=ref_level_db,
        pre=pre,
        min_silence_for_spec=min_silence_for_spec,
        max_vocal_for_spec=max_vocal_for_spec,
        min_level_db=min_level_db,
        silence_threshold=silence_threshold,
        verbose=False,
        min_syllable_length_s=min_syllable_length_s,
        spectral_range=spectral_range,
    )
    spec = results['spec']
    spec = np.array(Image.fromarray(spec).resize((np.shape(spec)[1], 32), Image.ANTIALIAS))
    
    # get bouts
    for bi, (sp0, sp1) in enumerate(zip(split_point_locs[:-1], split_point_locs[1:])):
        spec_st = int(start_times[sp0] / (hop_length_ms/ 1000))
        spec_et = int(end_times[sp1-1] / (hop_length_ms/ 1000))
        bout_spec = spec[:, spec_st:spec_et]
        
        # get windows
        window_length_frames = int(window_length_ms / hop_length_ms)
        windows = [i for i in window(bout_spec.T, n=window_length_frames)]
        windows = np.reshape(windows, (len(windows), np.product(np.shape(windows)[1:])))
        
        windows_df.loc[len(windows_df)] = [key, bi, sp0, sp1, windows, bout_spec]

HBox(children=(IntProgress(value=0), HTML(value='')))

Exception ignored in: <generator object tqdm_notebook.__iter__ at 0x7f1f441f5ca8>
Traceback (most recent call last):
  File "/mnt/cube/tsainbur/conda_envs/tpy3/lib/python3.6/site-packages/tqdm/_tqdm_notebook.py", line 226, in __iter__
    self.sp(bar_style='danger')
AttributeError: 'tqdm_notebook' object has no attribute 'sp'
Exception ignored in: <generator object tqdm_notebook.__iter__ at 0x7f1f44231f68>
Traceback (most recent call last):
  File "/mnt/cube/tsainbur/conda_envs/tpy3/lib/python3.6/site-packages/tqdm/_tqdm_notebook.py", line 226, in __iter__
    self.sp(bar_style='danger')
AttributeError: 'tqdm_notebook' object has no attribute 'sp'
Exception ignored in: <generator object tqdm_notebook.__iter__ at 0x7f1f44231e60>
Traceback (most recent call last):
  File "/mnt/cube/tsainbur/conda_envs/tpy3/lib/python3.6/site-packages/tqdm/_tqdm_notebook.py", line 226, in __iter__
    self.sp(bar_style='danger')
AttributeError: 'tqdm_notebook' object has no attribute 'sp'
Exception ignore

Exception ignored in: <generator object tqdm_notebook.__iter__ at 0x7f1f603360f8>
Traceback (most recent call last):
  File "/mnt/cube/tsainbur/conda_envs/tpy3/lib/python3.6/site-packages/tqdm/_tqdm_notebook.py", line 226, in __iter__
    self.sp(bar_style='danger')
AttributeError: 'tqdm_notebook' object has no attribute 'sp'
Exception ignored in: <generator object tqdm_notebook.__iter__ at 0x7f1f443dcdb0>
Traceback (most recent call last):
  File "/mnt/cube/tsainbur/conda_envs/tpy3/lib/python3.6/site-packages/tqdm/_tqdm_notebook.py", line 226, in __iter__
    self.sp(bar_style='danger')
AttributeError: 'tqdm_notebook' object has no attribute 'sp'
Exception ignored in: <generator object tqdm_notebook.__iter__ at 0x7f1f442e0938>
Traceback (most recent call last):
  File "/mnt/cube/tsainbur/conda_envs/tpy3/lib/python3.6/site-packages/tqdm/_tqdm_notebook.py", line 226, in __iter__
    self.sp(bar_style='danger')
AttributeError: 'tqdm_notebook' object has no attribute 'sp'
Exception ignore

Exception ignored in: <generator object tqdm_notebook.__iter__ at 0x7f1f600d5e60>
Traceback (most recent call last):
  File "/mnt/cube/tsainbur/conda_envs/tpy3/lib/python3.6/site-packages/tqdm/_tqdm_notebook.py", line 226, in __iter__
    self.sp(bar_style='danger')
AttributeError: 'tqdm_notebook' object has no attribute 'sp'
Exception ignored in: <generator object tqdm_notebook.__iter__ at 0x7f1f442d4ca8>
Traceback (most recent call last):
  File "/mnt/cube/tsainbur/conda_envs/tpy3/lib/python3.6/site-packages/tqdm/_tqdm_notebook.py", line 226, in __iter__
    self.sp(bar_style='danger')
AttributeError: 'tqdm_notebook' object has no attribute 'sp'
Exception ignored in: <generator object tqdm_notebook.__iter__ at 0x7f1f603e2938>
Traceback (most recent call last):
  File "/mnt/cube/tsainbur/conda_envs/tpy3/lib/python3.6/site-packages/tqdm/_tqdm_notebook.py", line 226, in __iter__
    self.sp(bar_style='danger')
AttributeError: 'tqdm_notebook' object has no attribute 'sp'
Exception ignore

Exception ignored in: <generator object tqdm_notebook.__iter__ at 0x7f1f60336518>
Traceback (most recent call last):
  File "/mnt/cube/tsainbur/conda_envs/tpy3/lib/python3.6/site-packages/tqdm/_tqdm_notebook.py", line 226, in __iter__
    self.sp(bar_style='danger')
AttributeError: 'tqdm_notebook' object has no attribute 'sp'
Exception ignored in: <generator object tqdm_notebook.__iter__ at 0x7f1f000420a0>
Traceback (most recent call last):
  File "/mnt/cube/tsainbur/conda_envs/tpy3/lib/python3.6/site-packages/tqdm/_tqdm_notebook.py", line 226, in __iter__
    self.sp(bar_style='danger')
AttributeError: 'tqdm_notebook' object has no attribute 'sp'
Exception ignored in: <generator object tqdm_notebook.__iter__ at 0x7f1f441f52b0>
Traceback (most recent call last):
  File "/mnt/cube/tsainbur/conda_envs/tpy3/lib/python3.6/site-packages/tqdm/_tqdm_notebook.py", line 226, in __iter__
    self.sp(bar_style='danger')
AttributeError: 'tqdm_notebook' object has no attribute 'sp'
Exception ignore




In [148]:
ensure_dir(DATA_DIR/'umap_windows' / DATASET_ID)
save_loc = DATA_DIR/'umap_windows' / DATASET_ID / ('gibbon_windows_' + str(window_length_ms)+ '.pickle')

In [149]:
windows_df.to_pickle(save_loc)