In [41]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os

In [2]:
files = os.listdir('raven_annotations')
files = [f for f in files if f.endswith('.txt')]

In [3]:
annotations = pd.DataFrame()

for file in files:
    recording_name = f"{'_'.join(file.split('_')[0:3])}.wav"
    table = pd.read_table(f'raven_annotations/{file}')
    table['filename'] = recording_name
    id_column = 'ID'
    if 'ID' in table.columns:
        id_column = 'ID'
    elif 'Annotation' in table.columns:
        id_column = 'Annotation'
    elif 'Bird ID' in table.columns:
        id_column = 'Bird ID'
    elif 'fBirdID' in table.columns:
        id_column = 'fBirdID'
    else:
        raise ValueError(f'No ID column found, cols: {table.columns}')
    out = table[['filename', 'Begin Time (s)', 'End Time (s)', id_column]]
    out.columns = ['filename', 'begin_time_s', 'end_time_s', 'id']
    out.loc[:, 'id'] = out['id'].str.lower()
    annotations = pd.concat([annotations, out]).reset_index(drop=True)

annotations = annotations.dropna(subset=['id'])

In [4]:
bird_codes = pd.read_csv('bird_codes.csv')
bird_codes.loc[:, 'four_code'] = bird_codes['four_code'].str.lower()

In [5]:
codes_dict = dict(zip(bird_codes['four_code'], bird_codes['code']))

In [6]:
for i, row in annotations.iterrows():
    four_code_plus = row['id']
    if '*' in four_code_plus:
            continue
    for code in codes_dict:
        if code in four_code_plus:
            annotations.loc[i, 'label'] = codes_dict[code]

In [7]:
annotations = annotations.dropna(subset=['label']).reset_index(drop=True)

In [8]:
annotations['timestamp_s'] = ((annotations['begin_time_s'] + annotations['end_time_s']) / 2) - (((annotations['begin_time_s'] + annotations['end_time_s']) / 2) % 5)

In [38]:
for i, row in annotations.iterrows():
    if len(row['filename'].split('_')[2]) == 9:
        annotations.at[i, 'filename'] = row['filename'].split('_')[0] + '_' + row['filename'].split('_')[1] + '_' + '0' + row['filename'].split('_')[2]


In [39]:
annotations

Unnamed: 0,filename,begin_time_s,end_time_s,id,label,timestamp_s
0,BLUE-6-CAPLE_20180606_070000.wav,1.227414,3.657139,moch,mouchi,0.0
1,BLUE-6-CAPLE_20180606_070000.wav,0.074323,1.145050,moch,mouchi,0.0
2,BLUE-6-CAPLE_20180606_070000.wav,5.040054,6.632416,stja,stejay,5.0
3,BLUE-6-CAPLE_20180606_070000.wav,9.405324,10.832959,stja,stejay,10.0
4,BLUE-6-CAPLE_20180606_070000.wav,90.914068,92.198867,rbnu,rebnut,90.0
...,...,...,...,...,...,...
11487,RED-4-CAPL_20190624_093000.wav,293.455127,294.813289,gcki,gockin,290.0
11488,RED-4-CAPL_20190624_093000.wav,298.820286,299.999977,stja,stejay,295.0
11489,RED-4-CAPL_20190624_093000.wav,298.116952,299.790402,gcki,gockin,295.0
11490,RED-4-CAPL_20190624_093000.wav,296.176721,298.771781,gcki,gockin,295.0


In [40]:
annotations.to_csv('raven_annotations.csv', index=False)