In [2]:
import os
from pathlib import Path

from datetime import date, time
import csv
from birdnet import (SpeciesPredictions,
                     predict_species_within_audio_file,
                     predict_species_within_audio_files_mp,
                     predict_species_at_location_and_time
)
import matplotlib.pyplot as plt
import pandas as pd





In [3]:
import warnings
warnings.filterwarnings('ignore')

In [4]:
# Establish empty dictionary to write results to
result_dict = {}

# Establist directory to obtain audio files from
audio_data_path = '../data/raw/audio_data'

for file in os.listdir(audio_data_path):
    # Generate file path
    file_path = Path(f'{audio_data_path}/{os.path.basename(file)}')
    # Run BirdNET artificial neural network
    result = SpeciesPredictions(predict_species_within_audio_file(
        file_path, min_confidence=0.50))
    # File name for metadata
    file_name = os.path.basename(file_path)
    # Add to dictionary
    result_dict[f'{file_name}'] = result

Predicting species: 100%|██████████| 3600/3600 [00:44<00:00, 80.42s/s] 
Predicting species: 100%|██████████| 3598/3598 [00:45<00:00, 79.36s/s] 


In [8]:
result_dict

{'CP1-007_20240701_193800.wav': OrderedDict([((0.0, 3.0), OrderedDict()),
              ((3.0, 6.0), OrderedDict()),
              ((6.0, 9.0), OrderedDict()),
              ((9.0, 12.0), OrderedDict()),
              ((12.0, 15.0), OrderedDict()),
              ((15.0, 18.0), OrderedDict()),
              ((18.0, 21.0), OrderedDict()),
              ((21.0, 24.0), OrderedDict()),
              ((24.0, 27.0), OrderedDict()),
              ((27.0, 30.0), OrderedDict()),
              ((30.0, 33.0), OrderedDict()),
              ((33.0, 36.0), OrderedDict()),
              ((36.0, 39.0), OrderedDict()),
              ((39.0, 42.0), OrderedDict()),
              ((42.0, 45.0),
               OrderedDict([('Setophaga magnolia_Magnolia Warbler',
                             0.67423725)])),
              ((45.0, 48.0), OrderedDict()),
              ((48.0, 51.0), OrderedDict()),
              ((51.0, 54.0), OrderedDict()),
              ((54.0, 57.0), OrderedDict()),
              ((57.0, 60

In [5]:
results_temp = []

for file, result in result_dict.items():
    # Loop through first tier OrderedDict
    for time_interval, pred_dict in list(result.items()):
        # Loop through second tier OrderedDict
        for species, confidence in pred_dict.items():
            species_dict = {'source': file,
                            'Burn_unit': file[:2],
                            'Burn_Severity': file[2],
                            'Survey_Location': int(file[4:7]),
                            'date': date.fromisoformat(file[8:16]),
                            'time': time.fromisoformat(file[17:23]),
                            'ScientificName': species.split('_')[0],
                            'CommonName': species.split('_')[1],
                            'Start(s)': int(time_interval[0]),
                            'End(s)': int(time_interval[1]),
                            'Confidence': round(confidence, 2)}
            results_temp.append(species_dict)

result_species_df = pd.DataFrame(results_temp, 
                                   columns=['source',
                                            'Burn_unit',
                                            'Burn_Severity',
                                            'Survey_Location',
                                            'date',
                                            'time',
                                            'ScientificName', 
                                            'CommonName',
                                            'Start(s)',
                                            'End(s)',
                                            'Confidence'])

result_species_df

Unnamed: 0,source,Burn_unit,Burn_Severity,Survey_Location,date,time,ScientificName,CommonName,Start(s),End(s),Confidence
0,CP1-007_20240701_193800.wav,CP,1,7,2024-07-01,19:38:00,Setophaga magnolia,Magnolia Warbler,42,45,0.67
1,CP1-007_20240701_193800.wav,CP,1,7,2024-07-01,19:38:00,Melospiza lincolnii,Lincoln's Sparrow,153,156,0.96
2,CP1-007_20240701_193800.wav,CP,1,7,2024-07-01,19:38:00,Setophaga nigrescens,Black-throated Gray Warbler,186,189,0.95
3,CP1-007_20240701_193800.wav,CP,1,7,2024-07-01,19:38:00,Corthylio calendula,Ruby-crowned Kinglet,417,420,0.57
4,CP1-007_20240701_193800.wav,CP,1,7,2024-07-01,19:38:00,Setophaga coronata,Yellow-rumped Warbler,420,423,0.58
...,...,...,...,...,...,...,...,...,...,...,...
103,CP1-007_20240702_053702.wav,CP,1,7,2024-07-02,05:37:02,Melospiza lincolnii,Lincoln's Sparrow,1200,1203,0.55
104,CP1-007_20240702_053702.wav,CP,1,7,2024-07-02,05:37:02,Melospiza lincolnii,Lincoln's Sparrow,1239,1242,0.60
105,CP1-007_20240702_053702.wav,CP,1,7,2024-07-02,05:37:02,Melospiza lincolnii,Lincoln's Sparrow,1281,1284,0.63
106,CP1-007_20240702_053702.wav,CP,1,7,2024-07-02,05:37:02,Setophaga coronata,Yellow-rumped Warbler,1284,1287,0.57


In [7]:
result_species_df.to_csv(
    Path('../data/processed/clean_bird_data.csv'),
    index=False)