In [1]:
import os
import pathlib
from pathlib import Path
import pickle

import logging
logging.basicConfig(level=logging.ERROR)

from datetime import date, time
from birdnet import (SpeciesPredictions,
                     predict_species_within_audio_file,
                     predict_species_at_location_and_time
)
import pandas as pd
from tqdm.notebook import tqdm

In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
clean_bird_path = Path('../data/processed/clean_bird_data.csv')

if os.path.exists(clean_bird_path):
    clean_bird_df = pd.read_csv(clean_bird_path)
try:
    clean_bird_df
except:
    SyntaxError

In [4]:
clean_bird_df

Unnamed: 0,source,Burn_unit,Burn_Severity,Survey_Location,date,time,ScientificName,CommonName,Start(s),End(s),Confidence
0,CP1-045_20240613_053202.wav,CP,1,45,2024-06-13,05:32:02,Piranga ludoviciana,Western Tanager,249,252,0.599413
1,CP1-045_20240613_053202.wav,CP,1,45,2024-06-13,05:32:02,Poecile gambeli,Mountain Chickadee,381,384,0.708244
2,CP1-045_20240613_053202.wav,CP,1,45,2024-06-13,05:32:02,Selasphorus platycercus,Broad-tailed Hummingbird,495,498,0.610796
3,CP1-045_20240613_053202.wav,CP,1,45,2024-06-13,05:32:02,Piranga ludoviciana,Western Tanager,1908,1911,0.506427
4,CP1-045_20240613_053202.wav,CP,1,45,2024-06-13,05:32:02,Empidonax occidentalis,Cordilleran Flycatcher,2616,2619,0.537847
5,CP1-045_20240613_053202.wav,CP,1,45,2024-06-13,05:32:02,Empidonax occidentalis,Cordilleran Flycatcher,2829,2832,0.75395
6,CP1-045_20240613_053202.wav,CP,1,45,2024-06-13,05:32:02,Poecile gambeli,Mountain Chickadee,2841,2844,0.521106
7,CP1-045_20240613_053202.wav,CP,1,45,2024-06-13,05:32:02,Empidonax occidentalis,Cordilleran Flycatcher,2871,2874,0.789575
8,CP1-045_20240613_053202.wav,CP,1,45,2024-06-13,05:32:02,Empidonax occidentalis,Cordilleran Flycatcher,2886,2889,0.75074
9,CP1-045_20240613_053202.wav,CP,1,45,2024-06-13,05:32:02,Empidonax occidentalis,Cordilleran Flycatcher,2889,2892,0.746448


In [5]:
species_list = predict_species_at_location_and_time(40,-105)
species_set = set(species_list.keys())
species_set

{'Acanthis flammea_Common Redpoll',
 "Accipiter cooperii_Cooper's Hawk",
 'Accipiter gentilis_Northern Goshawk',
 'Accipiter striatus_Sharp-shinned Hawk',
 'Actitis macularius_Spotted Sandpiper',
 "Aechmophorus clarkii_Clark's Grebe",
 'Aechmophorus occidentalis_Western Grebe',
 'Aegolius acadicus_Northern Saw-whet Owl',
 'Aeronautes saxatalis_White-throated Swift',
 'Agelaius phoeniceus_Red-winged Blackbird',
 'Aix sponsa_Wood Duck',
 'Ammodramus savannarum_Grasshopper Sparrow',
 'Anas acuta_Northern Pintail',
 'Anas crecca_Green-winged Teal',
 'Anas platyrhynchos_Mallard',
 'Anser albifrons_Greater White-fronted Goose',
 'Anser caerulescens_Snow Goose',
 "Anser rossii_Ross's Goose",
 'Anthus rubescens_American Pipit',
 "Anthus spragueii_Sprague's Pipit",
 'Antigone canadensis_Sandhill Crane',
 "Aphelocoma woodhouseii_Woodhouse's Scrub-Jay",
 'Aquila chrysaetos_Golden Eagle',
 'Archilochus alexandri_Black-chinned Hummingbird',
 'Ardea alba_Great Egret',
 'Ardea herodias_Great Blue Her

In [6]:
base_dir = Path(os.path.join(
    pathlib.Path.home(),
    'earth-analytics',
    'bird-automation'
))

wav_files = [
    f for f in base_dir.rglob('*.wav')
    # Exclude Bat Audio Files
    if "Data" not in f.parts and "Data2" in f.parts
]
wav_files

[WindowsPath('C:/Users/tjsto/earth-analytics/bird-automation/data/raw/Data2/CP1_045/CP1-045_20240613_053202.wav'),
 WindowsPath('C:/Users/tjsto/earth-analytics/bird-automation/data/raw/Data2/CP1_045/CP1-045_20240613_193600.wav')]

In [7]:
cache_file = "bird_results.pkl"

# Load existing cache if exists
if os.path.exists(cache_file):
    with open(cache_file, "rb") as f:
        result_dict = pickle.load(f)
else:
    # Establish empty dictionary to write results to
    result_dict = {}

In [None]:
# Loop through .wav files
for file in tqdm(wav_files):
    # Check if file has been analyzed already
    # if file.name in clean_bird_df.source:
    #     print(True)
    # else:
    #     print(False)
    if file.name in result_dict:
        continue
    # Run BirdNET artificial neural network
    result = SpeciesPredictions(predict_species_within_audio_file(
        file, min_confidence=0.50,
        species_filter=species_set))
    # Add to dictionary
    result_dict[file.name] = result

    with open(cache_file, "wb") as f:
        pickle.dump(result_dict, f)

  0%|          | 0/2 [00:00<?, ?it/s]

False
False


In [11]:
result_dict

{'CP1-045_20240613_053202.wav': OrderedDict([((0.0, 3.0), OrderedDict()),
              ((3.0, 6.0), OrderedDict()),
              ((6.0, 9.0), OrderedDict()),
              ((9.0, 12.0), OrderedDict()),
              ((12.0, 15.0), OrderedDict()),
              ((15.0, 18.0), OrderedDict()),
              ((18.0, 21.0), OrderedDict()),
              ((21.0, 24.0), OrderedDict()),
              ((24.0, 27.0), OrderedDict()),
              ((27.0, 30.0), OrderedDict()),
              ((30.0, 33.0), OrderedDict()),
              ((33.0, 36.0), OrderedDict()),
              ((36.0, 39.0), OrderedDict()),
              ((39.0, 42.0), OrderedDict()),
              ((42.0, 45.0), OrderedDict()),
              ((45.0, 48.0), OrderedDict()),
              ((48.0, 51.0), OrderedDict()),
              ((51.0, 54.0), OrderedDict()),
              ((54.0, 57.0), OrderedDict()),
              ((57.0, 60.0), OrderedDict()),
              ((60.0, 63.0), OrderedDict()),
              ((63.0, 66.0), Or

In [13]:
results_temp = []

for file, result in result_dict.items():
    # Loop through first tier OrderedDict
    for time_interval, pred_dict in list(result.items()):
        # Loop through second tier OrderedDict
        for species, confidence in pred_dict.items():
            species_dict = {'source': file,
                            'Burn_unit': file[:2],
                            'Burn_Severity': file[2],
                            'Survey_Location': int(file[4:7]),
                            'date': date.fromisoformat(file[8:16]),
                            'time': time.fromisoformat(file[17:23]),
                            'ScientificName': species.split('_')[0],
                            'CommonName': species.split('_')[1],
                            'Start(s)': int(time_interval[0]),
                            'End(s)': int(time_interval[1]),
                            'Confidence': confidence}
            results_temp.append(species_dict)

# Build DataFrame from results
result_species_df = pd.DataFrame(results_temp, 
                                   columns=['source',
                                            'Burn_unit',
                                            'Burn_Severity',
                                            'Survey_Location',
                                            'date',
                                            'time',
                                            'ScientificName', 
                                            'CommonName',
                                            'Start(s)',
                                            'End(s)',
                                            'Confidence'])

result_species_df

Unnamed: 0,source,Burn_unit,Burn_Severity,Survey_Location,date,time,ScientificName,CommonName,Start(s),End(s),Confidence
0,CP1-045_20240613_053202.wav,CP,1,45,2024-06-13,05:32:02,Piranga ludoviciana,Western Tanager,249,252,0.599413
1,CP1-045_20240613_053202.wav,CP,1,45,2024-06-13,05:32:02,Poecile gambeli,Mountain Chickadee,381,384,0.708244
2,CP1-045_20240613_053202.wav,CP,1,45,2024-06-13,05:32:02,Selasphorus platycercus,Broad-tailed Hummingbird,495,498,0.610796
3,CP1-045_20240613_053202.wav,CP,1,45,2024-06-13,05:32:02,Piranga ludoviciana,Western Tanager,1908,1911,0.506427
4,CP1-045_20240613_053202.wav,CP,1,45,2024-06-13,05:32:02,Empidonax occidentalis,Cordilleran Flycatcher,2616,2619,0.537847
5,CP1-045_20240613_053202.wav,CP,1,45,2024-06-13,05:32:02,Empidonax occidentalis,Cordilleran Flycatcher,2829,2832,0.75395
6,CP1-045_20240613_053202.wav,CP,1,45,2024-06-13,05:32:02,Poecile gambeli,Mountain Chickadee,2841,2844,0.521106
7,CP1-045_20240613_053202.wav,CP,1,45,2024-06-13,05:32:02,Empidonax occidentalis,Cordilleran Flycatcher,2871,2874,0.789576
8,CP1-045_20240613_053202.wav,CP,1,45,2024-06-13,05:32:02,Empidonax occidentalis,Cordilleran Flycatcher,2886,2889,0.75074
9,CP1-045_20240613_053202.wav,CP,1,45,2024-06-13,05:32:02,Empidonax occidentalis,Cordilleran Flycatcher,2889,2892,0.746448


In [14]:
# Remove duplicate rows
result_species_df.drop_duplicates(inplace=True)

result_species_df

Unnamed: 0,source,Burn_unit,Burn_Severity,Survey_Location,date,time,ScientificName,CommonName,Start(s),End(s),Confidence
0,CP1-045_20240613_053202.wav,CP,1,45,2024-06-13,05:32:02,Piranga ludoviciana,Western Tanager,249,252,0.599413
1,CP1-045_20240613_053202.wav,CP,1,45,2024-06-13,05:32:02,Poecile gambeli,Mountain Chickadee,381,384,0.708244
2,CP1-045_20240613_053202.wav,CP,1,45,2024-06-13,05:32:02,Selasphorus platycercus,Broad-tailed Hummingbird,495,498,0.610796
3,CP1-045_20240613_053202.wav,CP,1,45,2024-06-13,05:32:02,Piranga ludoviciana,Western Tanager,1908,1911,0.506427
4,CP1-045_20240613_053202.wav,CP,1,45,2024-06-13,05:32:02,Empidonax occidentalis,Cordilleran Flycatcher,2616,2619,0.537847
5,CP1-045_20240613_053202.wav,CP,1,45,2024-06-13,05:32:02,Empidonax occidentalis,Cordilleran Flycatcher,2829,2832,0.75395
6,CP1-045_20240613_053202.wav,CP,1,45,2024-06-13,05:32:02,Poecile gambeli,Mountain Chickadee,2841,2844,0.521106
7,CP1-045_20240613_053202.wav,CP,1,45,2024-06-13,05:32:02,Empidonax occidentalis,Cordilleran Flycatcher,2871,2874,0.789576
8,CP1-045_20240613_053202.wav,CP,1,45,2024-06-13,05:32:02,Empidonax occidentalis,Cordilleran Flycatcher,2886,2889,0.75074
9,CP1-045_20240613_053202.wav,CP,1,45,2024-06-13,05:32:02,Empidonax occidentalis,Cordilleran Flycatcher,2889,2892,0.746448


### Save to CSV file

In [38]:
result_species_df.to_csv(
    Path('../data/processed/clean_bird_data.csv'),
    index=False)

## Rewrite for caching ability

In [20]:
if os.path.exists('clean_bird_data.csv'):
    clean_bird_df = pd.read_csv('clean_bird_data.csv')

True


In [12]:
def gen_species_set():
    lat = int(input("Enter the latitude to filter species (e.g. 40.0): "))
    lon = int(input("Enter the longitude to filter species (e.g. -105.0): "))
    species_list = predict_species_at_location_and_time(lat, lon)
    species_set = set(species_list.keys())

    return species_set

In [None]:
# Load existing cache if exists
if os.path.exists(cache_file):
    with open(cache_file, "rb") as f:
        result_dict = pickle.load(f)
else: 
    # Establish empty dictionary to write results to
    result_dict = {}

# Loop through .wav files
for file_path in tqdm(wav_files):
    # File name for metadata
    file_name = os.path.basename(file_path)
    # Check if file has been analyzed already
    if file_name not in clean_bird_df.source:
        # Run BirdNET artificial neural network
        result = SpeciesPredictions(predict_species_within_audio_file(
            file, min_confidence=0.50,
            species_filter=species_set))
    
        # Add to dictionary
        result_dict[file_name] = result
    