In [1]:
%load_ext autoreload
%autoreload 2

In [35]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import os
import torchaudio
import soundfile as sf
import seaborn as sns

from pathlib import Path
import plotly.express as px
import matplotlib.pyplot as plt
from IPython.display import Audio

from birdnetlib import Recording
from birdnetlib.analyzer import Analyzer
from birdnetlib.batch import DirectoryMultiProcessingAnalyzer

In [11]:
train_dir = Path('E:\data\BirdCLEF')

class CFG:
    random_seed = 42
    
    ROOT_FOLDER = train_dir
    AUDIO_FOLDER = train_dir / 'train_audio'
    DATA_DIR = train_dir / 'spectros'
    TRAIN_CSV = train_dir / 'train_metadata.csv'
    RESULTS_DIR = train_dir / 'results'
    CKPT_DIR = RESULTS_DIR / 'ckpt'
    bird20223 = train_dir / 'bird2023.csv'

In [36]:
meta_df = pd.read_csv(CFG.TRAIN_CSV)
df_23 = pd.read_csv(CFG.bird20223)
df_23.shape, df_23.shape

((16941, 12), (16941, 12))

In [78]:
directories = meta_df.primary_label.unique().tolist()
directories = [str(CFG.AUDIO_FOLDER / d) for d in directories]
len(directories), directories[0]

(182, 'E:\\data\\BirdCLEF\\train_audio\\asbfly')

In [42]:
meta_df['filename'] = f'{str(CFG.AUDIO_FOLDER)}\\' + meta_df['filename']

In [43]:
meta_df.head(2)

Unnamed: 0,primary_label,secondary_labels,type,latitude,longitude,scientific_name,common_name,author,license,rating,url,filename
0,asbfly,[],['call'],39.2297,118.1987,Muscicapa dauurica,Asian Brown Flycatcher,Matt Slaymaker,Creative Commons Attribution-NonCommercial-Sha...,5.0,https://www.xeno-canto.org/134896,E:\data\BirdCLEF\train_audio\asbfly/XC134896.ogg
1,asbfly,[],['song'],51.403,104.6401,Muscicapa dauurica,Asian Brown Flycatcher,Magnus Hellström,Creative Commons Attribution-NonCommercial-Sha...,2.5,https://www.xeno-canto.org/164848,E:\data\BirdCLEF\train_audio\asbfly/XC164848.ogg


In [44]:
prim_df = meta_df[meta_df['secondary_labels'] == '[]']
prim_df.shape

(22567, 12)

### Analyze

In [45]:
prim_df.iloc[0]

primary_label                                                  asbfly
secondary_labels                                                   []
type                                                         ['call']
latitude                                                      39.2297
longitude                                                    118.1987
scientific_name                                    Muscicapa dauurica
common_name                                    Asian Brown Flycatcher
author                                                 Matt Slaymaker
license             Creative Commons Attribution-NonCommercial-Sha...
rating                                                            5.0
url                                 https://www.xeno-canto.org/134896
filename             E:\data\BirdCLEF\train_audio\asbfly/XC134896.ogg
Name: 0, dtype: object

In [46]:
filename = prim_df.iloc[0].filename
filename

'E:\\data\\BirdCLEF\\train_audio\\asbfly/XC134896.ogg'

In [47]:
# Load and initialize the BirdNET-Analyzer models.
analyzer = Analyzer()

recording = Recording(
    analyzer,
    filename,
    # lat=35.4244,
    # lon=-120.7463,
    # date=datetime(year=2022, month=5, day=10), # use date or week_48
    min_conf=0.5,
)
recording.analyze()
len(recording.detections)

Labels loaded.
load model True
Model loaded.
Labels loaded.
load_species_list_model
Meta model loaded.
read_audio_data
read_audio_data: complete, read  9 chunks.
analyze_recording XC134896.ogg


6

In [54]:
recording.path, recording.path.split('/')[0].split('\\')[-1]

('E:\\data\\BirdCLEF\\train_audio\\asbfly/XC134896.ogg', 'asbfly')

In [48]:
recording.detections

[{'common_name': 'Asian Brown Flycatcher',
  'scientific_name': 'Muscicapa dauurica',
  'start_time': 0.0,
  'end_time': 3.0,
  'confidence': 0.9642523527145386,
  'label': 'Muscicapa dauurica_Asian Brown Flycatcher'},
 {'common_name': 'Asian Brown Flycatcher',
  'scientific_name': 'Muscicapa dauurica',
  'start_time': 6.0,
  'end_time': 9.0,
  'confidence': 0.9933910369873047,
  'label': 'Muscicapa dauurica_Asian Brown Flycatcher'},
 {'common_name': 'Asian Brown Flycatcher',
  'scientific_name': 'Muscicapa dauurica',
  'start_time': 9.0,
  'end_time': 12.0,
  'confidence': 0.9805001020431519,
  'label': 'Muscicapa dauurica_Asian Brown Flycatcher'},
 {'common_name': 'Asian Brown Flycatcher',
  'scientific_name': 'Muscicapa dauurica',
  'start_time': 15.0,
  'end_time': 18.0,
  'confidence': 0.9784349203109741,
  'label': 'Muscicapa dauurica_Asian Brown Flycatcher'},
 {'common_name': 'Asian Brown Flycatcher',
  'scientific_name': 'Muscicapa dauurica',
  'start_time': 18.0,
  'end_time':

In [59]:
label = recording.path.split('/')[0].split('\\')[-1]

# for det in recording.detections:
#     print(det['start_time'])
data = [(label, x['start_time'], x['end_time']) for x in recording.detections]

data[0]

('asbfly', 0.0, 3.0)

In [103]:
def on_analyze_directory_complete(recordings, file=train_dir / "bird_preds.csv"):
    detections = []
    
    for rec in recordings:
        if rec.error:
            print(f'{rec.error_message} in {rec.path}')
        else:
            filename= rec.path.split('\\')[-1]
            label = rec.path.split('\\')[-2]
            
            # print(filename, label)
            
            data = [(filename, label, x['start_time'], x['end_time']) for x in recording.detections]
            detections.append(pd.DataFrame(data, columns = ['filename', 'label', 'start', 'end']))

    print(len(detections))

    results_df = pd.concat(detections, axis=0)

    results_df.to_csv(file, index=False)    
    
    # return detections

In [104]:
directory = directories[1]
directory = CFG.AUDIO_FOLDER
print(directory)

batch = DirectoryMultiProcessingAnalyzer(
    directory,
    analyzers=[analyzer],
    patterns=["*/*.ogg"],
    # lon=-120.7463,
    # lat=35.4244,
    # # date=datetime(year=2022, month=5, day=10),
    min_conf=0.8,
)

batch.on_analyze_directory_complete = on_analyze_directory_complete

E:\data\BirdCLEF\train_audio


In [105]:
batch.process()

24459
