In [1]:
%load_ext autoreload
%autoreload 2

from util import WavDataset, read_audio_section
from config import *
import pandas as np
import numpy as np
from pathlib import Path
import pickle

In [2]:
raw_ann_path = ANNOTATIONS / 'manual_annotations' / 'raw'

with open(ANNOTATIONS / 'manual_annotations' / 'initial_training_recordings.pkl', 'rb') as f:
    annotated_training_recordings = pickle.load(f)

In [10]:
import os
from plotting import view_spectrogram
from util import read_audio_section
import librosa
import matplotlib.pyplot as plt

ds = WavDataset()

def view(rec, start, end):
    widen = 1
    y, sr = read_audio_section(rec, start - widen, end + widen)
    y = librosa.resample(y, orig_sr=sr, target_sr=22_000)
    S = librosa.stft(y)
    S_db = librosa.amplitude_to_db(np.abs(S), ref=np.max)
    librosa.display.specshow(S_db)
    plt.show()

annotations_df = pd.DataFrame()
for i, csv_file in enumerate(os.listdir(raw_ann_path)):
    rec = csv_file[:-4] # strip(.csv)
    df = pd.read_csv(raw_ann_path / csv_file,
            index_col=None,
            sep=',',
            names=['min_t', 'max_t', 'min_f', 'max_f', 'label'],
            encoding='unicode_escape',
        )
    df['recording'] = [rec] * len(df)
    
    labels = list(df['label'].unique())
    
    # already annotated both
    if "'" in labels and ';' in labels:
        nr_mask = df["label"] ==  "\'"
        traingle_mask = df["label"] ==  ";"
        
        df.loc[nr_mask, 'label'] = 'nr_syllable_3khz'
        df.loc[traingle_mask, 'label'] = "triangle_3khz"

    # labels exist (nr or triangle exists)
    elif len(labels) > 1: # nan
        
        # all nr
        if "'" in labels and not ';' in labels:
            df.loc[df.query("2_000 < max_f < 6_000").index, 'label'] = 'nr_syllable_3khz'
        
        # all traingle
        elif ';' in labels and not "'" in labels:
            df.loc[df.query("2_000 < max_f < 6_000").index, 'label'] = 'triangle_3khz'

        # error
        else:
            print("unknown label")
            print(labels)
            for index, row in df.iterrows():
                print(i, index, row.index)
                view(ds[rec], row['min_t'], row['max_t'])
                
    # fast trills
    df.loc[df.query("5_900 < max_f").index, 'label'] = 'fast_trill_6khz'
    
    # upsweeps
    df.loc[df.query("min_f < 600").index, 'label'] = 'upsweep_500hz'
    
    annotations_df = pd.concat([annotations_df, df])
    

In [11]:
annotations_df = annotations_df.reset_index()
mask = annotations_df['label'].isna()
annotations_df[mask]

Unnamed: 0,index,min_t,max_t,min_f,max_f,label,recording
0,0,437.526667,437.986667,1768.06,4014.97,,1_20230821_060000.wav
1,1,438.28,438.826667,1910.69,3926.95,,1_20230821_060000.wav
2,2,439.033333,439.553333,1565.13,3798.53,,1_20230821_060000.wav
3,3,439.853333,440.313333,1385.49,3840.86,,1_20230821_060000.wav
4,4,440.646667,441.193333,1432.34,4014.97,,1_20230821_060000.wav
559,0,81.48,82.104,1816.1,4075.53,,1_20230323_070000.wav
560,1,234.396,235.512,1405.96,4243.62,,1_20230323_070000.wav
561,2,245.616,246.732,1368.59,4186.84,,1_20230323_070000.wav
743,4,287.152,287.936,1822.34,4289.02,,1_20230429_073000.wav
933,22,308.976,308.97601,1608.73,1608.73,,1_20230328_063000.wav


In [None]:
for i, row in annotations_df[mask].iterrows():
    print(i)
    view(ds[row['recording']], row['min_t'], row['max_t'])
    print( row['min_t'] - row['max_t'])
    

In [12]:
# clean
# annotations_df.loc[range(0, 5), 'label'] = 'nr_syllable_3khz'
# annotations_df.head(7)
# annotations_df.drop([4, 22], inplace=True)

annotations_df.loc[range(0, 5), 'label'] = 'nr_syllable_3khz'
t= [559, 560 ,561 , 743, 933, 1348, 1946, 1947, 1948, 1949]
annotations_df.loc[t, 'label'] = 'triangle_3khz'
mask = annotations_df['label'].isna()
annotations_df[mask]

Unnamed: 0,index,min_t,max_t,min_f,max_f,label,recording


In [6]:
annotations_df.label.value_counts()

label
nr_syllable_3khz    950
fast_trill_6khz     465
upsweep_500hz       326
triangle_3khz       236
;                     4
Name: count, dtype: int64

In [None]:
mask = annotations_df['label'] == 'upsweep_500hz'
df = annotations_df[mask]

for i, row in df.iterrows():
    print(i)
    
    widen = 1
    y, sr = read_audio_section(ds[row['recording']], max(row['min_t'] - widen, 0), min(row['max_t'] + widen, 599))
    
    y = librosa.resample(y, orig_sr=sr, target_sr=22_000)
    S = librosa.stft(y)
    S_db = librosa.amplitude_to_db(np.abs(S), ref=np.max)
    librosa.display.specshow(S_db)
    plt.show()


In [15]:
# should be nr
# 612 - 630
annotations_df.loc[range(612, 630 + 1), 'label'] = 'nr_syllable_3khz'


# and just remove the 4 ; annotations
mask = annotations_df["label"] == ';'
annotations_df.loc[mask, 'label'] = 'triangle_3khz'
annotations_df[mask]

Unnamed: 0,index,min_t,max_t,min_f,max_f,label,recording


In [18]:
annotations_df.label.value_counts()

label
nr_syllable_3khz    969
fast_trill_6khz     465
upsweep_500hz       317
triangle_3khz       230
Name: count, dtype: int64

In [24]:
annotations_df.reset_index()

Unnamed: 0,level_0,index,min_t,max_t,min_f,max_f,label,recording
0,0,0,437.526667,437.986667,1768.06,4014.97,nr_syllable_3khz,1_20230821_060000.wav
1,1,1,438.280000,438.826667,1910.69,3926.95,nr_syllable_3khz,1_20230821_060000.wav
2,2,2,439.033333,439.553333,1565.13,3798.53,nr_syllable_3khz,1_20230821_060000.wav
3,3,3,439.853333,440.313333,1385.49,3840.86,nr_syllable_3khz,1_20230821_060000.wav
4,4,4,440.646667,441.193333,1432.34,4014.97,nr_syllable_3khz,1_20230821_060000.wav
...,...,...,...,...,...,...,...,...
1976,1976,18,406.308000,407.064000,5964.76,7874.75,fast_trill_6khz,1_20230828_063000.wav
1977,1977,19,410.489333,411.693333,5964.76,8324.64,fast_trill_6khz,1_20230828_063000.wav
1978,1978,20,417.769333,418.740000,5964.76,7984.89,fast_trill_6khz,1_20230828_063000.wav
1979,1979,21,423.388000,424.125333,6132.78,8209.81,fast_trill_6khz,1_20230828_063000.wav


In [31]:
annotations_df = annotations_df.drop('index', axis=1)

In [33]:
from config import ANNOTATIONS

annotations_df.to_csv(ANNOTATIONS / 'manual_annotations' / 'initial_manual_annotations.csv', index=False)

In [32]:
annotations_df

Unnamed: 0,min_t,max_t,min_f,max_f,label,recording
0,437.526667,437.986667,1768.06,4014.97,nr_syllable_3khz,1_20230821_060000.wav
1,438.280000,438.826667,1910.69,3926.95,nr_syllable_3khz,1_20230821_060000.wav
2,439.033333,439.553333,1565.13,3798.53,nr_syllable_3khz,1_20230821_060000.wav
3,439.853333,440.313333,1385.49,3840.86,nr_syllable_3khz,1_20230821_060000.wav
4,440.646667,441.193333,1432.34,4014.97,nr_syllable_3khz,1_20230821_060000.wav
...,...,...,...,...,...,...
1976,406.308000,407.064000,5964.76,7874.75,fast_trill_6khz,1_20230828_063000.wav
1977,410.489333,411.693333,5964.76,8324.64,fast_trill_6khz,1_20230828_063000.wav
1978,417.769333,418.740000,5964.76,7984.89,fast_trill_6khz,1_20230828_063000.wav
1979,423.388000,424.125333,6132.78,8209.81,fast_trill_6khz,1_20230828_063000.wav


In [34]:
annotations_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1981 entries, 0 to 1980
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   min_t      1981 non-null   float64
 1   max_t      1981 non-null   float64
 2   min_f      1981 non-null   float64
 3   max_f      1981 non-null   float64
 4   label      1981 non-null   object 
 5   recording  1981 non-null   object 
dtypes: float64(4), object(2)
memory usage: 93.0+ KB
