Explore the eBird API

- https://documenter.getpostman.com/view/664302/S1ENwy59
- https://pypi.org/project/ebird-api/
- https://github.com/ProjectBabbler/ebird-api

In [66]:
from dotenv import load_dotenv
from ebird.api import get_observations
import os
import pandas
import json
from glob import glob
from pathlib import Path
from tqdm import tqdm 

load_dotenv()  # take environment variables from .env.
api_key = os.environ['EBIRD_API']

In [47]:
records = get_observations(api_key, 'L227544', back=7)
records

[{'speciesCode': 'wooduc',
  'comName': 'Wood Duck',
  'sciName': 'Aix sponsa',
  'locId': 'L227544',
  'locName': 'Woodman Pond',
  'obsDt': '2023-08-27 08:28',
  'howMany': 12,
  'lat': 42.8565252,
  'lng': -75.5714536,
  'obsValid': True,
  'obsReviewed': False,
  'locationPrivate': False,
  'subId': 'S148217277'},
 {'speciesCode': 'mallar3',
  'comName': 'Mallard',
  'sciName': 'Anas platyrhynchos',
  'locId': 'L227544',
  'locName': 'Woodman Pond',
  'obsDt': '2023-08-27 08:28',
  'howMany': 4,
  'lat': 42.8565252,
  'lng': -75.5714536,
  'obsValid': True,
  'obsReviewed': False,
  'locationPrivate': False,
  'subId': 'S148217277'},
 {'speciesCode': 'pibgre',
  'comName': 'Pied-billed Grebe',
  'sciName': 'Podilymbus podiceps',
  'locId': 'L227544',
  'locName': 'Woodman Pond',
  'obsDt': '2023-08-27 08:28',
  'howMany': 1,
  'lat': 42.8565252,
  'lng': -75.5714536,
  'obsValid': True,
  'obsReviewed': False,
  'locationPrivate': False,
  'subId': 'S148217277'},
 {'speciesCode': '

In [48]:
from ebird.api import get_taxonomy, get_taxonomy_forms, get_taxonomy_versions

# Get all the species in the eBird taxonomy.
taxonomy = get_taxonomy(api_key)
taxonomy = pandas.DataFrame.from_records(taxonomy)
taxonomy

Unnamed: 0,sciName,comName,speciesCode,category,taxonOrder,bandingCodes,comNameCodes,sciNameCodes,order,familyCode,familyComName,familySciName,reportAs,extinct,extinctYear
0,Struthio camelus,Common Ostrich,ostric2,species,2.0,[],[COOS],[STCA],Struthioniformes,struth1,Ostriches,Struthionidae,,,
1,Struthio molybdophanes,Somali Ostrich,ostric3,species,7.0,[],[SOOS],[STMO],Struthioniformes,struth1,Ostriches,Struthionidae,,,
2,Struthio camelus/molybdophanes,Common/Somali Ostrich,y00934,slash,8.0,[],"[SOOS, COOS]","[STCA, STMO]",Struthioniformes,struth1,Ostriches,Struthionidae,,,
3,Rhea americana,Greater Rhea,grerhe1,species,10.0,[],[GRRH],[RHAM],Rheiformes,rheida1,Rheas,Rheidae,,,
4,Rhea pennata,Lesser Rhea,lesrhe2,species,16.0,[],[LERH],[RHPE],Rheiformes,rheida1,Rheas,Rheidae,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16855,Saltator grossus,Slate-colored Grosbeak,slcgro1,species,35127.0,[SCOG],[SCGR],[SAGR],Passeriformes,thraup2,Tanagers and Allies,Thraupidae,,,
16856,Saltator fuliginosus,Black-throated Grosbeak,bltgro2,species,35130.0,[],[BTGR],[SAFU],Passeriformes,thraup2,Tanagers and Allies,Thraupidae,,,
16857,Saltator sp.,saltator sp.,saltat1,spuh,35131.0,[],[],[],Passeriformes,thraup2,Tanagers and Allies,Thraupidae,,,
16858,Passeriformes sp.,passerine sp.,passer1,spuh,35132.0,[],[],[],Passeriformes,,,,,,


In [60]:
def load_birdnet_log(path, confidence_threshold: float = 0.25) -> pandas.DataFrame:
    lines = [json.loads(s) for s in open(path)]
    raw = pandas.DataFrame.from_records(lines)

    raw = raw[raw['msg'] == 'success'].copy()
    raw = raw.explode(['results'])
    raw['timestamp'] = pandas.to_datetime(raw['timestamp'])

    raw[['name','confidence']] = pandas.DataFrame(raw.results.tolist(), index=raw.index)
    raw[['species', 'common']] = raw['name'].str.split("_", expand = True)
    raw.drop(columns=['msg', 'results', 'filename', 'oldest', 'name', 'skipped', 'hour_of_day'], inplace=True)
    return raw.query('confidence > @confidence_threshold', engine='python').copy()


In [102]:
def generate_ebird_record(df, path, location):
    species_by_minute = (
        df
        .assign(timestamp=lambda r: r['timestamp'].dt.round('min'))
        .groupby(by=['timestamp', 'common'])
        .agg({'species': 'count', 'confidence': 'max'})
        .reset_index()
        .rename(columns={0: "calls"})
    )
    species_by_minute

    obs = []
    for row in species_by_minute.itertuples():
        row = row._asdict()
        assert row['common'] in list(taxonomy.comName.values)
        
        common = row['common']
        if common =="Willie-wagtail": common = "Willie Wagtail"
        if common == "Eurasian Blackbird": common = "Common Blackbird"
        if "Gray" in common: common = common.replace("Gray", "Grey")

        obs.append([
            common,                                     # Common/scientific Name
            "",                                         # Genus
            "",                                         # Species
            "+",                                        # Species Count
            "",                                         # Species Comments
            location,                                   # Location Name
            "",                                         # latitude
            "",                                         # longitude
            row['timestamp'].strftime('%m/%d/%Y'),      # Observation date
            row['timestamp'].strftime('%H:%M'),         # Start time
            "",                                         # State
            "AU",                                       # Country
            "casual",                                   # Protocol
            "",                                         # Number of observers
            "1",                                        # Duration (minutes)
            "Y",                                        # All observations?
            "",                                         # Distance covered
            "",                                         # Area covered
            f"{row['species']} BirdNET calls; {row['confidence']} confidence", # Comments
        ])

    if len(obs):
        Path(path).parent.mkdir(exist_ok=True)
        with open(path, 'wt') as fp:
            for line in obs:
                fp.write(','.join(line) + '\n')


In [104]:
location = input("What is the observation location?")

In [103]:
import shutil
shutil.rmtree('ebird')
for logpath in tqdm(glob("logs/*")):
    raw = load_birdnet_log(logpath, confidence_threshold=0.5)
    csvpath = logpath.replace("logs/", "ebird/") + ".csv"
    generate_ebird_record(raw, csvpath, location)
    

100%|██████████| 251/251 [02:36<00:00,  1.60it/s]


In [80]:
raw

Unnamed: 0,timestamp,confidence,species,common
3957,2023-08-22 05:29:50+10:00,0.6620,Rhipidura leucophrys,Willie-wagtail
4041,2023-08-22 05:36:50+10:00,0.6234,Rhipidura leucophrys,Willie-wagtail
4051,2023-08-22 05:37:40+10:00,0.6312,Rhipidura leucophrys,Willie-wagtail
4062,2023-08-22 05:38:35+10:00,0.6841,Rhipidura leucophrys,Willie-wagtail
4199,2023-08-22 05:50:00+10:00,0.6257,Rhipidura leucophrys,Willie-wagtail
...,...,...,...,...
12561,2023-08-22 17:27:37+10:00,0.7364,Cormobates leucophaea,White-throated Treecreeper
12564,2023-08-22 17:27:52+10:00,0.6372,Pachycephala rufiventris,Rufous Whistler
12591,2023-08-22 17:30:07+10:00,0.7004,Rhipidura leucophrys,Willie-wagtail
12660,2023-08-22 17:35:52+10:00,0.7843,Rhipidura leucophrys,Willie-wagtail
