In [27]:
import pandas as pd
import requests
import time
import os

In [28]:
# Define the endpoint and parameters
endpoint = "https://xeno-canto.org/api/2/recordings"
query = "box:38.403202,-84.820159,41.977523,-80.518693"  # Ohio bounding box
params = {
    "query": query,
    "page": 1
}

# Make the API request
response = requests.get(endpoint, params=params)

# Check if the request was successful
if response.status_code == 200:
    data = response.json()
    
    # Print some information about the results
    print(f"Number of Recordings: {data['numRecordings']}")
    print(f"Number of Species: {data['numSpecies']}")
    
    # Check if there are any recordings
    if data["recordings"]:
        first_recording = data["recordings"][0]  # Get the first recording
        print("\nFirst Recording Details:")
        print(f"Species: {first_recording['gen']} {first_recording['sp']} - {first_recording['en']}")
        print(f"Location: {first_recording['loc']}")
        print(f"Country: {first_recording['cnt']}")
        print(f"Date: {first_recording['date']}")
        print(f"Recording URL: https://xeno-canto.org/{first_recording['id']}")
        print(f"Audio File: {first_recording['file']}")
    else:
        print("No recordings found for the specified area.")
else:
    print("Failed to retrieve data from the API")

Number of Recordings: 745
Number of Species: 147

First Recording Details:
Species: Branta canadensis - Canada Goose
Location: Whitewater Township (near  Harrison), Hamilton County, Ohio
Country: United States
Date: 2022-05-11
Recording URL: https://xeno-canto.org/726750
Audio File: https://xeno-canto.org/726750/download


In [29]:
# The audio file URL from the API response
audio_url = "https://xeno-canto.org/726750/download"

# Send a request to download the audio file
response = requests.get(audio_url)

# Check if the request was successful
if response.status_code == 200:
    # Define the filename and save the file
    filename = "Branta_canadensis_Canada_Goose.mp3"
    with open(filename, "wb") as audio_file:
        audio_file.write(response.content)
    print(f"Audio file downloaded and saved as {filename}")
else:
    print("Failed to download the audio file")

Audio file downloaded and saved as Branta_canadensis_Canada_Goose.mp3


In [30]:
import requests
import time
import os
import pandas as pd

# Define the endpoint and bounding box query for Ohio
endpoint = "https://xeno-canto.org/api/2/recordings"
query = "box:38.403202,-84.820159,41.977523,-80.518693"  # Ohio bounding box
page = 1  # Start on the first page

# Specify 'Original Recordings' directory
recordings_dir = "Original Recordings"


# Initialize a list to store metadata
metadata_list = []

# Function to clean up filename
def clean_filename(filename):
    return filename.replace(" ", "_").replace(",", "").replace("(", "").replace(")", "")

# Loop to request each page until all recordings are retrieved
while True:
    # Set up the parameters for the current page
    params = {
        "query": query,
        "page": page
    }
    
    # Make the API request
    response = requests.get(endpoint, params=params)
    
    # Check if the request was successful
    if response.status_code == 200:
        data = response.json()
        
        # Loop over each recording in the current page
        for recording in data["recordings"]:
            # Define the filename using species, ID, and location details
            species = f"{recording['gen']}_{recording['sp']}"
            location = clean_filename(recording['loc'])
            recording_id = recording['id']
            filename = f"{species}_{location}_{recording_id}.mp3"
            file_path = os.path.join(recordings_dir, filename)
            
            # Get the audio file URL and download the file
            audio_url = recording['file']
            try:
                audio_response = requests.get(audio_url)
                audio_response.raise_for_status()
                
                # Save the file if the request was successful
                with open(file_path, "wb") as audio_file:
                    audio_file.write(audio_response.content)
                print(f"Downloaded: {filename}")
                download_status = "Success"
            except requests.RequestException as e:
                print(f"Failed to download audio for {species} from {location}: {str(e)}")
                download_status = f"Failed: {str(e)}"
            
            # Collect metadata
            metadata = {
                "id": recording["id"],
                "genus": recording["gen"],
                "species": recording["sp"],
                "common_name": recording["en"],
                "latitude": recording["lat"],
                "longitude": recording["lng"],
                "type": recording["type"],
                "date": recording["date"],
                "time": recording["time"],
                "length": recording["length"],
                "quality": recording["q"],
                "remarks": recording["rmk"],
                "sex": recording.get("sex", ""),
                "stage": recording.get("stage", ""),
                "also": ", ".join(recording.get("also", [])),
                "file_name": filename,
            }
            metadata_list.append(metadata)
            
            # Pause between requests to respect the rate limit of 1 request per second
            time.sleep(1)
        
        # Check if there are more pages to process
        if page >= data["numPages"]:
            break
        else:
            page += 1  # Move to the next page
    else:
        print(f"Failed to retrieve data from the API: {response.status_code}")
        break  # Stop the loop if we can't retrieve data

    # Pause between requests to respect the rate limit of 1 request per second
    time.sleep(1)

print("Download and metadata collection complete for all available recordings.")

# Convert the metadata list to a DataFrame
metadata_df = pd.DataFrame(metadata_list)

# Save the DataFrame to a CSV file
csv_filename = "ohio_bird_recordings_metadata.csv"
metadata_df.to_csv(csv_filename, index=False, encoding="utf-8")

print(f"Metadata has been successfully saved to '{csv_filename}'")
print(f"Number of recordings with local files: {metadata_df['local_file'].notna().sum()}")

Downloaded: Branta_canadensis_Whitewater_Township_near__Harrison_Hamilton_County_Ohio_726750.mp3
Downloaded: Branta_canadensis_Lawrence_Woods_SNP_418000.mp3
Downloaded: Branta_canadensis_Miami_Whitewater_Forest_Park_wetlands_Crosby_Township_near__Harrison_Hamilton_County_Ohio_691528.mp3
Downloaded: Cygnus_buccinator_Killdeer_Plains_Wildlife_Management_Area_Wyandot_County_Ohio_713788.mp3
Downloaded: Aix_sponsa_Magee_Marsh_-_boardwalk_Lucas_County_Ohio_815809.mp3
Downloaded: Anas_acuta_Cincinnati_Hamilton_County_Ohio_855312.mp3
Downloaded: Colinus_virginianus_New_Market_Township_near__Hoagland_Highland_County_Ohio_805814.mp3
Downloaded: Colinus_virginianus_Michigan_Monroe_County_17130.mp3
Downloaded: Colinus_virginianus_Michigan_Monroe_County_17061.mp3
Downloaded: Colinus_virginianus_Michigan_Monroe_County_17060.mp3
Downloaded: Phasianus_colchicus_Michigan_Monroe_County_16976.mp3
Downloaded: Chordeiles_minor_Green_Township_near__Cincinnati_Hamilton_County_Ohio_647199.mp3
Downloaded: Antr

KeyError: 'download_status'

In [31]:
# Check the CSV

data = pd.read_csv('ohio_bird_recordings_metadata.csv')
data.head()

Unnamed: 0,id,genus,species,common_name,latitude,longitude,type,date,time,length,quality,remarks,sex,stage,also,file_name,local_file
0,726750,Branta,canadensis,Canada Goose,39.2095,-84.7821,flight call,2022-05-11,11:22,0:14,A,Flock of 5 landing on lake.,uncertain,adult,"Agelaius phoeniceus, Melospiza melodia, Cardin...",Branta_canadensis_Whitewater_Township_near__Ha...,Original Recordings\Branta_canadensis_Whitewat...
1,418000,Branta,canadensis,Canada Goose,40.5652,-83.6255,flight call,2018-05-06,13:10,0:31,A,Natural vocalizations as the birds flew overhe...,,,"Myiarchus crinitus, Quiscalus quiscula",Branta_canadensis_Lawrence_Woods_SNP_418000.mp3,Original Recordings\Branta_canadensis_Lawrence...
2,691528,Branta,canadensis,Canada Goose,39.283,-84.7459,flight call,2021-12-14,07:48,0:19,B,Part of large flock (total count 124 birds) ta...,,,,Branta_canadensis_Miami_Whitewater_Forest_Park...,Original Recordings\Branta_canadensis_Miami_Wh...
3,713788,Cygnus,buccinator,Trumpeter Swan,40.7095,-83.3032,call,2021-12-03,09:00,2:12,B,About 100m away in marshy pond. Numerous indiv...,,,,Cygnus_buccinator_Killdeer_Plains_Wildlife_Man...,Original Recordings\Cygnus_buccinator_Killdeer...
4,815809,Aix,sponsa,Wood Duck,41.6275,-83.1897,call,2023-05-16,08:29,0:04,C,calls from two birds in flight;,uncertain,adult,,Aix_sponsa_Magee_Marsh_-_boardwalk_Lucas_Count...,Original Recordings\Aix_sponsa_Magee_Marsh_-_b...
