# Example Usage of the Code with Two Files

In [25]:
import os
from io import BytesIO
import pathlib
from pathlib import Path
import requests
from zipfile import ZipFile

import logging
logging.basicConfig(level=logging.ERROR)

from datetime import date, time
from birdnet import (SpeciesPredictions,
                     predict_species_within_audio_file,
                     predict_species_at_location_and_time
)
import pandas as pd
from tqdm.notebook import tqdm

In [26]:
# Set home directory
base_dir = Path(os.path.join(
    pathlib.Path.home(),
    'earth-analytics',
    'bird-automation'
))

# Create directory and download testing audio data.
test_data_dir = Path(os.path.join(
    base_dir,
    "data",
    "raw",
))

os.makedirs(test_data_dir, exist_ok=True)

test_data_url = ("https://github.com/tjstogoski/bird-automation/"
                  "releases/download/v.1.0/audio_data.zip")

test_data_path = Path(os.path.join(
    test_data_dir,
    'Audio Data'
))

# Download test files once
if not os.path.exists(test_data_path):
    req = requests.get(test_data_url)
    if req.status_code == 200:
        with ZipFile(BytesIO(req.content)) as zip_file:
            zip_file.extractall(test_data_path)
        print("Files extracted successfully")
    else:
        print(f"Failed to download file: {req.status_code}")

In [27]:
def gen_species_set():
    """
    Generates a set of bird species based off user inputted lat/lon.

    Parameters
    ----------
    lat : float
        Latitude value ranging from -180 to 180 with one decimal place.
    lon : float
        Longitude value ranging from -180 to 180 with one decimal place.
    Returns
    -------
    species_set : set
        Set of bird species name and common names found at location.
    """
    lat = float(input("Enter the latitude to filter species (e.g. 40.0): "))
    lon = float(input("Enter the longitude to filter species (e.g. -105.0): "))
    species_list = predict_species_at_location_and_time(lat, lon)
    species_set = set(species_list.keys())

    return species_set

In [None]:
# For example files use the coordinates (40, -105)
species_set = gen_species_set()
species_set

{'Acanthis flammea_Common Redpoll',
 "Accipiter cooperii_Cooper's Hawk",
 'Accipiter gentilis_Northern Goshawk',
 'Accipiter striatus_Sharp-shinned Hawk',
 'Actitis macularius_Spotted Sandpiper',
 "Aechmophorus clarkii_Clark's Grebe",
 'Aechmophorus occidentalis_Western Grebe',
 'Aegolius acadicus_Northern Saw-whet Owl',
 'Aeronautes saxatalis_White-throated Swift',
 'Agelaius phoeniceus_Red-winged Blackbird',
 'Aix sponsa_Wood Duck',
 'Ammodramus savannarum_Grasshopper Sparrow',
 'Anas acuta_Northern Pintail',
 'Anas crecca_Green-winged Teal',
 'Anas platyrhynchos_Mallard',
 'Anser albifrons_Greater White-fronted Goose',
 'Anser caerulescens_Snow Goose',
 "Anser rossii_Ross's Goose",
 'Anthus rubescens_American Pipit',
 "Anthus spragueii_Sprague's Pipit",
 'Antigone canadensis_Sandhill Crane',
 "Aphelocoma woodhouseii_Woodhouse's Scrub-Jay",
 'Aquila chrysaetos_Golden Eagle',
 'Archilochus alexandri_Black-chinned Hummingbird',
 'Ardea alba_Great Egret',
 'Ardea herodias_Great Blue Her

In [None]:
# Check for existing result csv, otherwise create the header to append to.
clean_bird_path = Path(os.path.join(
    base_dir,
    'data/processed/clean_bird_data.csv'))
columns = ['source', 'Burn_unit', 'Burn_Severity', 'Survey_Location',
           'date', 'time', 'ScientificName', 'CommonName', 'Start(s)',
           'End(s)', 'Confidence']

if os.path.exists(clean_bird_path):
    clean_bird_df = pd.read_csv(clean_bird_path, usecols=["source"])
    analyzed_files = set(clean_bird_df["source"])
else:
    analyzed_files = set()
    pd.DataFrame(columns=columns).to_csv(clean_bird_path, index=False)

In [30]:
# Extract list of .wav files
wav_files = [
    f for f in base_dir.rglob('*.wav')
]
wav_files

[WindowsPath('C:/Users/tjsto/earth-analytics/bird-automation/data/raw/Audio Data/CP1-007_20240701_193800.wav'),
 WindowsPath('C:/Users/tjsto/earth-analytics/bird-automation/data/raw/Audio Data/CP1-007_20240702_053702.wav')]

In [31]:
result_dict = {}

# Loop through .wav files
for file in tqdm(wav_files):
    # Check if file has been analyzed already
    if file.name in analyzed_files:
        continue
    # Run BirdNET artificial neural network
    result = SpeciesPredictions(predict_species_within_audio_file(
        file, min_confidence=0.50,
        species_filter=species_set))
    # Add to dictionary
    result_dict[file.name] = result

    results_temp = []

    # Unpack the results
    for file, result in result_dict.items():
        # Loop through first tier OrderedDict
        for time_interval, pred_dict in list(result.items()):
            # Loop through second tier OrderedDict
            for species, confidence in pred_dict.items():
                species_dict = {'source': file,
                                'Burn_unit': file[:2],
                                'Burn_Severity': file[2],
                                'Survey_Location': int(file[4:7]),
                                'date': date.fromisoformat(file[8:16]),
                                'time': time.fromisoformat(file[17:23]),
                                'ScientificName': species.split('_')[0],
                                'CommonName': species.split('_')[1],
                                'Start(s)': int(time_interval[0]),
                                'End(s)': int(time_interval[1]),
                                'Confidence': confidence}
                results_temp.append(species_dict)

    # Build DataFrame from results
    result_species_df = pd.DataFrame(results_temp, 
                                    columns=['source',
                                                'Burn_unit',
                                                'Burn_Severity',
                                                'Survey_Location',
                                                'date',
                                                'time',
                                                'ScientificName', 
                                                'CommonName',
                                                'Start(s)',
                                                'End(s)',
                                                'Confidence'])
    
    # Append results to csv incrementally
    result_species_df.to_csv(
        clean_bird_path, mode='a', header=False, index=False)

  0%|          | 0/2 [00:00<?, ?it/s]

Predicting species: 100%|██████████| 3600/3600 [00:43<00:00, 83.17s/s] 
Predicting species: 100%|██████████| 3598/3598 [00:43<00:00, 82.34s/s] 


### Results are located in 'data/processed/clean_bird_data.csv'