In [1]:
%load_ext autoreload
%autoreload 2

from config import *
from dataset import WavDataset

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import librosa 
import os
import h5py
import pickle

from pathlib import Path
from config import *
from maad import sound, util
from maad.rois import template_matching

In [2]:
os.listdir(ANNOTATIONS / 'manual_annotations')

['initial_manual_annotations.csv',
 'raw',
 'all_annotated_recordings_filtered.npy',
 'signalless_recordings.npy']

In [11]:
annotations = pd.read_csv(ANNOTATIONS / 'manual_annotations' / 'initial_manual_annotations.csv')
annotations

Unnamed: 0,min_t,max_t,min_f,max_f,label,recording
0,437.526667,437.986667,1768.06,4014.97,nr_syllable_3khz,1_20230821_060000.wav
1,438.280000,438.826667,1910.69,3926.95,nr_syllable_3khz,1_20230821_060000.wav
2,439.033333,439.553333,1565.13,3798.53,nr_syllable_3khz,1_20230821_060000.wav
3,439.853333,440.313333,1385.49,3840.86,nr_syllable_3khz,1_20230821_060000.wav
4,440.646667,441.193333,1432.34,4014.97,nr_syllable_3khz,1_20230821_060000.wav
...,...,...,...,...,...,...
1976,406.308000,407.064000,5964.76,7874.75,fast_trill_6khz,1_20230828_063000.wav
1977,410.489333,411.693333,5964.76,8324.64,fast_trill_6khz,1_20230828_063000.wav
1978,417.769333,418.740000,5964.76,7984.89,fast_trill_6khz,1_20230828_063000.wav
1979,423.388000,424.125333,6132.78,8209.81,fast_trill_6khz,1_20230828_063000.wav


In [12]:
annotations['label'].value_counts()

label
nr_syllable_3khz    969
fast_trill_6khz     465
upsweep_500hz       317
triangle_3khz       230
Name: count, dtype: int64

In [14]:
annotations['duration'] = annotations['max_t'] - annotations['min_t']
annotations['mid_frequency'] = (annotations['max_f'] + annotations['min_f']) / 2
annotations

Unnamed: 0,min_t,max_t,min_f,max_f,label,recording,av_duration,duration,mid_frequency
0,437.526667,437.986667,1768.06,4014.97,nr_syllable_3khz,1_20230821_060000.wav,-0.460000,0.460000,2891.515
1,438.280000,438.826667,1910.69,3926.95,nr_syllable_3khz,1_20230821_060000.wav,-0.546667,0.546667,2918.820
2,439.033333,439.553333,1565.13,3798.53,nr_syllable_3khz,1_20230821_060000.wav,-0.520000,0.520000,2681.830
3,439.853333,440.313333,1385.49,3840.86,nr_syllable_3khz,1_20230821_060000.wav,-0.460000,0.460000,2613.175
4,440.646667,441.193333,1432.34,4014.97,nr_syllable_3khz,1_20230821_060000.wav,-0.546667,0.546667,2723.655
...,...,...,...,...,...,...,...,...,...
1976,406.308000,407.064000,5964.76,7874.75,fast_trill_6khz,1_20230828_063000.wav,-0.756000,0.756000,6919.755
1977,410.489333,411.693333,5964.76,8324.64,fast_trill_6khz,1_20230828_063000.wav,-1.204000,1.204000,7144.700
1978,417.769333,418.740000,5964.76,7984.89,fast_trill_6khz,1_20230828_063000.wav,-0.970667,0.970667,6974.825
1979,423.388000,424.125333,6132.78,8209.81,fast_trill_6khz,1_20230828_063000.wav,-0.737333,0.737333,7171.295


In [17]:
for l, df in annotations.groupby('label'):
    print(l)
    print(len(df))
    print(df['mid_frequency'].mean())
    print(df['duration'].mean())
    print()
    

fast_trill_6khz
465
7153.285634408602
1.111492495531184

nr_syllable_3khz
969
3050.816665634675
0.5204588923137258

triangle_3khz
230
2754.931826086957
0.7071478713826086

upsweep_500hz
317
404.8087318611987
0.8348559411230282



In [4]:
# with open(ANNOTATIONS / 'manual_annotations' / 'initial_training_recordings.pkl', 'rb') as f:
#     training_recs = np.array(pickle.load(f))

# annotations_df = pd.read_csv(ANNOTATIONS / 'manual_annotations' / 'initial_manual_annotations.csv')
# annotated = np.array(annotations_df['recording'].unique())
# len(training_recs), len(annotated)

In [None]:
unnannotated = training_recs[~np.isin(training_recs, annotated)]
len(unnannotated)

In [5]:
templates = pd.read_csv(CORRELATIONS / 'templates.csv')
labels = ['nr_syllable_3khz', 'fast_trill_6khz', 'upsweep_500hz', 'triangle_3khz']
templates = templates[templates['name'].isin(labels)]
numerical = ['low', 'high', 'start', 'end']
templates[numerical] = templates[numerical].astype(float)

nperseg = 1024
noverlap = 512
window = 'hann'
db_range = 80

def process_spec(path, flims, tlims):
    s, fs = sound.load(path)
    Sxx_template, _, _, _ = sound.spectrogram(s, fs, window, nperseg, noverlap, flims, tlims)
    return util.power2dB(Sxx_template, db_range)

ds = WavDataset()
spectrograms = { # should have just done a list
    row['name']: (
        process_spec(
            ds[row['recording']], (row['low'], row['high']), (row['start'], row['end'])
        ), row
    )
    
    for i, row in templates.iterrows()
}

{label: (S.shape) for label, (S, row) in spectrograms.items()}



KeyError: '1_20230316_063000.wav'

In [7]:
from tqdm import tqdm

def template_xcoefs(rec, significance_thresh=0.30):
    s, fs = sound.load(ds[rec])
    xcoefs = {}
    for template_name, (Sxx_template, info) in spectrograms.items():
        Sxx_audio, tn, fn, ext = sound.spectrogram(
            s, fs, window, nperseg, noverlap, 
            flims=(info['low'], info['high'])
        )
        Sxx_audio = util.power2dB(Sxx_audio, db_range)
        xcorrcoef, rois = template_matching(Sxx_audio, Sxx_template, tn, ext, significance_thresh)
        xcoefs[template_name] = rois['xcorrcoef']
    
    return xcoefs

correlations = {}
for rec in tqdm(unnannotated, desc='correlating'):
    correlations[rec] = template_xcoefs(rec)
    with open('objects/non_annotated_coefs.pkl', 'wb') as f:
        pickle.dump(correlations, f)

correlating: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 799/799 [7:29:32<00:00, 33.76s/it]


In [7]:
with open("objects/non_annotated_coefs.pkl", 'rb') as f:
    correlations = pickle.load(f)

def print_counts(xcoefs, thresh):
    for label, x in xcoefs.items():
        x = np.array(x)
        filtered = x[x > thresh]
        print(f'{label}: {len(filtered)}, mean={filtered.mean():.3f}', end = "\t")
    
for rec, xcoefs in list(correlations.items())[:50]:
    print('\n\n' + rec)
    for thresh in [0.001, 0.3, 0.4, 0.5, 0.6, 0.7]:
        print("")
        print_counts(xcoefs, thresh)        



1_20230918_000000.wav



  print(f'{label}: {len(filtered)}, mean={filtered.mean():.3f}', end = "\t")
  ret = ret.dtype.type(ret / rcount)


triangle_3khz: 0, mean=nan	nr_syllable_3khz: 0, mean=nan	fast_trill_6khz: 0, mean=nan	upsweep_500hz: 4077, mean=0.330	
triangle_3khz: 0, mean=nan	nr_syllable_3khz: 0, mean=nan	fast_trill_6khz: 0, mean=nan	upsweep_500hz: 4077, mean=0.330	
triangle_3khz: 0, mean=nan	nr_syllable_3khz: 0, mean=nan	fast_trill_6khz: 0, mean=nan	upsweep_500hz: 46, mean=0.409	
triangle_3khz: 0, mean=nan	nr_syllable_3khz: 0, mean=nan	fast_trill_6khz: 0, mean=nan	upsweep_500hz: 0, mean=nan	
triangle_3khz: 0, mean=nan	nr_syllable_3khz: 0, mean=nan	fast_trill_6khz: 0, mean=nan	upsweep_500hz: 0, mean=nan	
triangle_3khz: 0, mean=nan	nr_syllable_3khz: 0, mean=nan	fast_trill_6khz: 0, mean=nan	upsweep_500hz: 0, mean=nan	

1_20230322_053000.wav

triangle_3khz: 0, mean=nan	nr_syllable_3khz: 0, mean=nan	fast_trill_6khz: 0, mean=nan	upsweep_500hz: 19135, mean=0.334	
triangle_3khz: 0, mean=nan	nr_syllable_3khz: 0, mean=nan	fast_trill_6khz: 0, mean=nan	upsweep_500hz: 19135, mean=0.334	
triangle_3khz: 0, mean=nan	nr_syllable_

In [8]:
correlation_all =  [
    (rec, np.concatenate([x for label, x in xcoefs.items()]))
    for rec, xcoefs in list(correlations.items())
]
correlation_all[0][1].mean(), len(correlation_all)

(0.3302508954772795, 799)

In [11]:
Proposed_cutoff = 0.45
filt = [rec for rec, x in correlation_all if (x < Proposed_cutoff).all()]
len(filt)

253

In [14]:
signalless_recordings = filt
initial_dataset_recordings = np.concatenate((filt, annotated))
len(initial_dataset_recordings)

354

In [15]:
np.save(ANNOTATIONS / 'manual_annotations' / 'all_annotated_recordings_filtered.npy', initial_dataset_recordings)
np.save(ANNOTATIONS / 'manual_annotations' / 'signalless_recordings.npy', signalless_recordings)