In [3]:
from pygbif import species
import pandas as pd
import json

In [74]:
# read the JSON file "../resources/infraspecies_ebird.json"
with open("../resources/infraspecies_ebird.json") as f:
    infraspecies = json.load(f)

In [159]:
species.name_backbone("Habia fuscicauda salvini Group")

{'usageKey': 6172397,
 'scientificName': 'Habia fuscicauda salvini (von Berlepsch, 1883)',
 'canonicalName': 'Habia fuscicauda salvini',
 'rank': 'SUBSPECIES',
 'status': 'ACCEPTED',
 'confidence': 96,
 'matchType': 'EXACT',
 'kingdom': 'Animalia',
 'phylum': 'Chordata',
 'order': 'Passeriformes',
 'family': 'Cardinalidae',
 'genus': 'Habia',
 'species': 'Habia fuscicauda',
 'kingdomKey': 1,
 'phylumKey': 44,
 'classKey': 212,
 'orderKey': 729,
 'familyKey': 9285,
 'genusKey': 5230696,
 'speciesKey': 5230697,
 'synonym': False,
 'class': 'Aves'}

In [204]:
# def subspecies_name_formatter(name):
#     """
#     EXAMPLE: Junco hyemalis [oreganus Group] -> Junco hyemalis oreganus Group
#     """
#     return name.replace('[', '').replace(']', '')

# assert subspecies_name_formatter("Junco hyemalis [oreganus Group]") == "Junco hyemalis oreganus Group"

# def subspecies_name_splitter(name):
#     """
#     EXAMPLE: Junco hyemalis hyemalis/carolinensis -> ['Junco hyemalis hyemalis', 'Junco hyemalis carolinensis']
#     """
#     split_name = name.split(' ')
#     subspecies_names = split_name[-1].split('/')

#     base_name = ' '.join(split_name[:2])
#     return [' '.join([base_name, subspecies_name]) for subspecies_name in subspecies_names]

# assert subspecies_name_splitter("Junco hyemalis hyemalis/carolinensis/otherstuff") == ['Junco hyemalis hyemalis', 'Junco hyemalis carolinensis', 'Junco hyemalis otherstuff']

clements = pd.read_csv("../resources/eBird-Clements-v2024-integrated-checklist-October-2024-rev.csv")

def find_subspecies_groups(name, clements=clements):
    # Find the row with the group name in the "scientific_name" column
    row = clements[clements["scientific name"] == name]

    # Check if this is a polytypic group
    if row["category"].values[0] == "group (polytypic)":
        # Find all rows beneath this row that are subspecies, and stop listing them when something isn't a subspecies
        subspecies = []
        for i in range(row.index[0]+1, len(clements)):
            if clements.iloc[i]["category"] == "subspecies":
                subspecies.append(clements.iloc[i]["scientific name"])
            else:
                break
        
        return subspecies
    else:
        return [name]

assert find_subspecies_groups("Driophlox fuscicauda [salvini Group]") == ['Driophlox fuscicauda salvini','Driophlox fuscicauda insularis','Driophlox fuscicauda discolor']


# Function to query GBIF for taxonomic information
def get_gbif_info(name):
    # search_formatted_name = subspecies_name_formatter(name)
    if '/' in name or '[' in name:
        ssps = find_subspecies_groups(name)
        formatted = {'eBird_name' : name, 'group_members': ssps, 'GBIF_name' : [], 'GBIF_rank' : [], 'GBIF_taxonKey' : []}
    else:
        ssps = [name]
        formatted = {'eBird_name' : name, 'group_members':[name], 'GBIF_name' : [], 'GBIF_rank' : [], 'GBIF_taxonKey' : []}
    
    results = []
    for ssp in ssps:
        try:
            unformatted_result = species.name_backbone(name=ssp)
            if unformatted_result['matchType'] != 'EXACT':
                unformatted_result = {"scientificName":None, "rank":None, "usageKey":None}
        except Exception as e:
            print("Error querying GBIF for", name)
            print(e)
            unformatted_result = {"scientificName":None, "rank":None, "usageKey":None}
        results.append(unformatted_result)
    
    
    for result in results:
        formatted['GBIF_name'].append(result['scientificName'])
        formatted['GBIF_rank'].append(result['rank'])
        formatted['GBIF_taxonKey'].append(result['usageKey'])
    return formatted


example_ssp = "Junco hyemalis hyemalis/carolinensis"
example_ssp = "Driophlox fuscicauda [salvini Group]"
get_gbif_info(example_ssp)


{'eBird_name': 'Driophlox fuscicauda [salvini Group]',
 'group_members': ['Driophlox fuscicauda salvini',
  'Driophlox fuscicauda insularis',
  'Driophlox fuscicauda discolor'],
 'GBIF_name': [None, None, None],
 'GBIF_rank': [None, None, None],
 'GBIF_taxonKey': [None, None, None]}

In [115]:
gbif_results = []
completed_species = []

In [117]:

# Function to process the eBird dictionary
def process_ebird_dict(species_name, infraspecies=infraspecies):
    # Add the main species
    results = [get_gbif_info(species_name)]

    # Get results for all infraspecies
    taxonomy = infraspecies[species_name]
    if 'infraspecies' in taxonomy:
        # Process ISSF if there
        for group_type, group in taxonomy["infraspecies"].items():
            if group_type == "issf":
                for subspecies_name, subspecies_details in group.items():
                    result = get_gbif_info(subspecies_name)
                    results.append(result)
    return results


# Process the eBird taxonomy
#len(set(infraspecies.keys()) - set(completed_species))
for species_name in infraspecies.keys():
    if species_name not in completed_species:
        print(species_name)
        result = process_ebird_dict(species_name)
        gbif_results.append(result)
        completed_species.append(species_name)


Branta canadensis
Branta sandvicensis
Branta ruficollis
Cereopsis novaehollandiae
Stictonetta naevosa
Cyanochen cyanoptera
Cygnus olor
Cygnus atratus
Cygnus melancoryphus
Cygnus buccinator
Cygnus columbianus
Cygnus cygnus
Coscoroba coscoroba
Sarkidiornis melanotos
Sarkidiornis sylvicola
Pteronetta hartlaubii
Oressochen jubatus
Oressochen melanopterus
Chloephaga picta
Chloephaga hybrida
Chloephaga poliocephala
Chloephaga rubidiceps
Radjah radjah
Alopochen aegyptiaca
Alopochen mauritiana
Alopochen kervazoi
Tadorna ferruginea
Tadorna cana
Tadorna tadornoides
Tadorna variegata
Tadorna tadorna
Tadorna cristata
Plectropterus gambensis
Tachyeres patachonicus
Tachyeres pteneres
Tachyeres brachypterus
Tachyeres leucocephalus
Lophonetta specularioides
Speculanas specularis
Cairina moschata
Nettapus pulchellus
Nettapus coromandelianus
Nettapus auritus
Callonetta leucophrys
Aix sponsa
Aix galericulata
Chenonetta jubata
Chenonetta finschi
Amazonetta brasiliensis
Hymenolaimus malacorhynchos
Merganet

In [155]:
import itertools
# Convert to a DataFrame
translation_table = pd.DataFrame(itertools.chain.from_iterable(gbif_results))

# Save to a CSV file
translation_table.to_csv("../resources/ebird_gbif_taxonomy_translation.csv", index=False)

# Display the DataFrame
translation_table

Unnamed: 0,eBird_name,GBIF_name,GBIF_rank,GBIF_taxonKey
0,Struthio camelus,"Struthio camelus Linnaeus, 1758",SPECIES,2495150
1,Struthio molybdophanes,"Struthio molybdophanes Reichenow, 1883",SPECIES,2495149
2,Casuarius casuarius,"Casuarius casuarius (Linnaeus, 1758)",SPECIES,5231783
3,Casuarius bennetti,"Casuarius bennetti Gould, 1857",SPECIES,5231784
4,Casuarius unappendiculatus,"Casuarius unappendiculatus Blyth, 1860",SPECIES,5231785
...,...,...,...,...
14796,Saltator maxillosus,"Saltator maxillosus Cabanis, 1851",SPECIES,5230924
14797,Saltator aurantiirostris,"Saltator aurantiirostris Vieillot, 1817",SPECIES,5230903
14798,Saltator cinctus,"Saltator cinctus J.T.Zimmer, 1943",SPECIES,5230940
14799,Saltator grossus,"Saltator grossus (Linnaeus, 1767)",SPECIES,5230955


## Redo the function above with the fixed get_gbif_info function

In [214]:
translation_table["group_members"] = translation_table.apply(lambda x: [x['eBird_name']], axis=1)

In [216]:
to_redo = translation_table[['[' in name or '/' in name for name in translation_table.eBird_name.tolist()]]
for idx, row in to_redo.iterrows():
    new_row = get_gbif_info(row.eBird_name)
    translation_table.iloc[idx] = new_row

In [157]:
with open('../resources/GBIF_ebird_mismatches.csv', 'w') as f:
    f.write('eBird_name,\n')
    for x in translation_table[translation_table.GBIF_name.apply(lambda x: x is None)].eBird_name.values.tolist():
        f.write(f'"{x}",""\n')

In [None]:
Caprimulgus ritae
Aegotheles terborghi
Anthracothorax nigricollis nigricollis
Pampa curvipennis pampa
Ramosomyia viridifrons wagneri
Chrysuronia boucardi
No exact match found for Gallirallus woodfordi tertius
No exact match found for Gallirallus woodfordi immaculatus
No exact match found for Gallirallus woodfordi woodfordi
Aramides cajaneus
Amaurornis moluccana
Rufirallus schomburgkii
Rufirallus fasciatus
Rufirallus leucopyrrhus
Rufirallus xenopterus
Laterallus notatus
Laterallus spilonota
Laterallus spilopterus
Hesperoburhinus bistriatus
Hesperoburhinus superciliaris
No exact match found for Thinornis melanops
No exact match found for Thinornis forbesi
No exact match found for Thinornis tricollaris
No exact match found for Thinornis tricollaris tricollaris
No exact match found for Thinornis tricollaris bifrontatus
No exact match found for Thinornis dubius
No exact match found for Thinornis dubius curonicus
No exact match found for Thinornis dubius dubius
No exact match found for Thinornis dubius jerdoni
No exact match found for Thinornis placidus
No exact match found for Anarhynchus mongolus
No exact match found for Anarhynchus atrifrons
No exact match found for Anarhynchus bicinctus
No exact match found for Anarhynchus obscurus
No exact match found for Anarhynchus obscurus aquilonius
No exact match found for Anarhynchus obscurus obscurus
No exact match found for Anarhynchus wilsonia
No exact match found for Anarhynchus collaris
No exact match found for Anarhynchus montanus
No exact match found for Anarhynchus alticola
No exact match found for Anarhynchus falklandicus
No exact match found for Anarhynchus thoracicus
No exact match found for Anarhynchus pecuarius
No exact match found for Anarhynchus sanctaehelenae
No exact match found for Anarhynchus ruficapillus
No exact match found for Anarhynchus nivosus
No exact match found for Anarhynchus nivosus nivosus
No exact match found for Anarhynchus nivosus occidentalis
No exact match found for Anarhynchus pallidus
No exact match found for Anarhynchus peronii
No exact match found for Anarhynchus marginatus
No exact match found for Anarhynchus javanicus
No exact match found for Anarhynchus alexandrinus alexandrinus
No exact match found for Anarhynchus alexandrinus nihonensis
No exact match found for Anarhynchus alexandrinus seebohmi
No exact match found for Anarhynchus dealbatus
Larus mongolicus
Hydrobates pelagicus pelagicus
No exact match found for Sula brewsteri
No exact match found for Sula brewsteri brewsteri
No exact match found for Sula brewsteri etesiaca
Leucocarbo chalconotus stewarti
No exact match found for Botaurus involucris
No exact match found for Botaurus exilis
No exact match found for Botaurus exilis exilis
No exact match found for Botaurus exilis pullus
No exact match found for Botaurus exilis [erythromelas Group]
No exact match found for Botaurus flavicollis
No exact match found for Botaurus cinnamomeus
No exact match found for Botaurus eurhythmus
No exact match found for Botaurus sturmii
No exact match found for Botaurus minutus
No exact match found for Botaurus minutus minutus
No exact match found for Botaurus minutus payesii
No exact match found for Botaurus minutus podiceps
No exact match found for Botaurus sinensis
No exact match found for Botaurus dubius
No exact match found for Botaurus novaezelandiae
Egretta garzetta immaculata
Ardea coromanda

## List species without a match

In [None]:
no_match_species = []


## List GBIF subspecies that weren't listed in eBird

In [None]:
# Example: 
# Ketupa blakistoni (Seebohm, 1884)
# ==> Ketupa blakistoni subsp. blakistoni, Ketupa blakistoni subsp. doerriesi

In [None]:

# Add matches for the species that were unmatched
# (Found these manually)
ebd_to_gbif_manual = {
    'Apteryx maxima':'Apteryx haastii'
    'Ortalis guttata':'Ortalis guttata (Spix, 1825)'
    'Xenoperdix udzungwensis obscuratus':'Xenoperdix udzungwensis obscurata Fjeldså & Kiure, 2003',
    # No match for these in GBIF
    # Aplopelia larvata inornata
    # Aplopelia larvata principalis
    # Aplopelia larvata simplex
    # Aplopelia larvata larvata
    # Aplopelia larvata bronzina
    ' Centropus burchellii':'Centropus superciliosus subsp. burchellii Swainson, 1838',
    'Eudynamys melanorhynchus':'Eudynamys scolopaceus subsp. melanorhynchus S.Muller, 1843'
    # Chalcites lucidus harterti
    # Chalcites lucidus lucidus
    # Chalcites lucidus plagosus
    # Chalcites minutillus [minutillus Group]
    # Chalcites minutillus [poecilurus Group]
    # Chalcites minutillus rufomerus
    # Chalcites minutillus salvadorii
    'Lurocalis semitorquatus nattereri':'Lurocalis semitorquatus nattererii',
    'Tepuiornis whitelyi':'Setopagis whitelyi',
    'Quechuavis decussata':'Systellura decussata',
    'Antiurus maculicaudus':'Hydropsalis maculicaudus',
    
}

# Do a manual lookup for the species that were unmatched
# And replace their position in the dataframe
for ebd_name, gbif_name in ebd_to_gbif_manual.items():
    row = get_gbif_info(ebd_name)
    row['eBird_name'] = ebd_name
    row['GBIF_name'] = gbif_name
    row['GBIF_rank'] = 'SPECIES'
    row['GBIF_taxonKey'] = species.name_suggest(query=gbif_name)[0]['key']

    # Replace the current entry in the translation table
    translation_table.loc[translation_table['eBird_name'] == ebd_name] = row

In [3]:
from pygbif import occurrences as occ
import os
import requests

In [2]:

# Install necessary packages
# pip install pygbif pandas


In [22]:
import subprocess
subprocess.call("echo $BASH_USER")

FileNotFoundError: [Errno 2] No such file or directory: 'echo $BASH_USER'

In [14]:
# Taxon-specific info
ebd_key = "4fa7b334-ce0d-4e88-aaae-2e0c138d049e"
aves_key = 212

# Set up the API endpoint and headers
url = "https://api.gbif.org/v1/occurrence/download/request"
headers = {"Content-Type": "application/json"}
response_format = "SIMPLE_CSV"

# Login info 
email = "tessa.rhinehart@gmail.com"


occ.search(taxonKey = aves_key, hasCoordinate=True, limit=2)

[0;31mSignature:[0m
[0mocc[0m[0;34m.[0m[0msearch[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0mtaxonKey[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mrepatriated[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mkingdomKey[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mphylumKey[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mclassKey[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0morderKey[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mfamilyKey[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mgenusKey[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0msubgenusKey[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mscientificName[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mcountry[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m


In [6]:
# Taxon-specific info
ebd_key = "4fa7b334-ce0d-4e88-aaae-2e0c138d049e"
aves_key = 212

# Set up the API endpoint and headers
url = "https://api.gbif.org/v1/occurrence/download/request"
headers = {"Content-Type": "application/json"}
response_format = "SIMPLE_CSV"

# Login info 
email = "tessa.rhinehart@gmail.com"

payload = {
  "creator": email,
  "notification_address": [email],
  "format": response_format,
  "predicate": {
    "type": "and",
    "predicates": [
      {
        "type": "equals",
        "key": "TAXON_KEY",
        "value": aves_key
      },
      {
        "type": "not",
        "predicate": {
          "type": "equals",
          "key": "DATASET_ID",
          "value": ebd_key
        }
      }
    ]
  }
}


# Make the request
response = requests.post(url, json=payload, headers=headers)

# Check the response
if response.status_code == 201:
    print("Download request successful!")
    print("Download key:", response.json().get("key"))
else:
    print("Error:", response.status_code, response.text)

Error: 403 Access is denied
