Auto-download sounds from Xeno-Canto

In [None]:
import requests
import json
import os
from urllib.parse import urlparse
import time
import platform

def download_animal_sounds(species, limit=50, quality=None, max_duration_minutes=5, base_dir='xeno_canto', output_dir=None):

    base_url = "https://xeno-canto.org/api/2/recordings"
        
    # Build query - use species as search term
    query_parts = [species]
    if quality is not None:
        query_parts.append(f'q:{quality}')
    
    params = {
        'query': ' '.join(query_parts),
        'page': 1
    }
    
    response = requests.get(base_url, params=params)
    data = response.json()
    
    # Construct path: base_dir/species/[quality if specified]
    if output_dir is None:
        output_dir = species
    
    if quality is not None:
        download_dir = os.path.join(base_dir, output_dir, quality)
    else:
        download_dir = os.path.join(base_dir, output_dir)
    
    os.makedirs(download_dir, exist_ok=True)
    
    # Format path for OS-appropriate clickable link
    abs_path = os.path.abspath(download_dir)
    if platform.system() == "Windows":
        clickable_path = f"file:///{abs_path.replace(os.sep, '/')}"
    else:
        clickable_path = f"file://{abs_path}"
    
    print(f"Saving to: {clickable_path}")
    
    downloaded = 0
    # Handle None duration limit
    max_duration_seconds = max_duration_minutes * 60 if max_duration_minutes is not None else None
    
    for recording in data['recordings']:
        # Handle None limit (no limit)
        if limit is not None and downloaded >= limit:
            break
        
        # Get length_str for all recordings (needed for print statement)
        length_str = recording.get('length', '0:00')
        
        # Check duration only if max_duration_seconds is set
        if max_duration_seconds is not None:
            try:
                if ':' in length_str:
                    minutes, seconds = map(int, length_str.split(':'))
                    total_seconds = minutes * 60 + seconds
                else:
                    total_seconds = 0
                    
                if total_seconds > max_duration_seconds:
                    print(f"Skipping {recording['id']}: {length_str} exceeds {max_duration_minutes}min limit")
                    continue
                    
            except (ValueError, AttributeError):
                print(f"Skipping {recording['id']}: invalid duration format")
                continue

        file_url = recording['file']
        if not file_url.startswith('http'):
            file_url = f"https:{file_url}"
        
        # Get the actual file extension from the URL
        parsed_url = urlparse(file_url)
        file_extension = os.path.splitext(parsed_url.path)[1] or '.mp3'
        
        # Construct filename: XC[id] - [English name] - [Genus species][extension]
        xc_id = recording['id']
        english_name = recording['en']
        genus_name = recording['gen']
        species_name = recording['sp']
        full_scientific = f"{genus_name} {species_name}"
        
        filename = f"XC{xc_id} - {english_name} - {full_scientific}{file_extension}"
        
        # Clean filename for filesystem compatibility
        filename = "".join(c for c in filename if c.isalnum() or c in (' ', '-', '_', '.')).rstrip()
        
        try:
            audio_response = requests.get(file_url)
            with open(os.path.join(download_dir, filename), 'wb') as f:
                f.write(audio_response.content)
            
            print(f"Downloaded: {filename} ({length_str})")
            downloaded += 1
            time.sleep(1)
            
        except Exception as e:
            print(f"Failed to download {filename}: {e}")
    
    # Print summary with OS-appropriate path
    print(f"\nDownload complete! Saved {downloaded} files to: {clickable_path}")

# Usage examples:
# download_animal_sounds('kiwi', quality='A')

# Usage examples:
# download_animal_sounds('kiwi', quality='A')  # -> xeno_canto/kiwi/A/
# download_animal_sounds('kiwi')  # -> xeno_canto/kiwi/
# download_animal_sounds('Meleagris', quality='B')  # -> xeno_canto/Meleagris/B/
# download_animal_sounds('owl', output_dir='night_birds', quality='A')  # -> xeno_canto/night_birds/A/

# Usage examples:
# download_animal_sounds(common_name='kiwi')
# download_animal_sounds(common_name='wild turkey') 
# download_animal_sounds(common_name='owl')

# Usage examples:
# download_animal_sounds(genus='Apteryx')  # Kiwis only
# download_animal_sounds(genus='Meleagris')  # Turkeys only  
# download_animal_sounds()  # All birds (wildcard search)

# Usage:
# download_animal_sounds(limit=None, quality='A', max_duration_minutes=10, base_dir='xeno_canto', species='wild_turkey/A')
# download_animal_sounds(limit=None, quality='B', max_duration_minutes=10, base_dir='xeno_canto', species='wild_turkey/B')

download_animal_sounds(limit=None, quality='C', max_duration_minutes=None, base_dir='xeno_canto', species='kiwi')

Saving to: file:///d:/anvo/xeno_canto/kiwi/C
Downloaded: XC956516 - North Island Brown Kiwi - Apteryx mantelli.mp3 (0:15)
Downloaded: XC955811 - North Island Brown Kiwi - Apteryx mantelli.mp3 (0:21)
Downloaded: XC525927 - North Island Brown Kiwi - Apteryx mantelli.mp3 (0:09)
Downloaded: XC525926 - North Island Brown Kiwi - Apteryx mantelli.mp3 (0:04)
Downloaded: XC444283 - North Island Brown Kiwi - Apteryx mantelli.mp3 (0:27)
Downloaded: XC236298 - Okarito Kiwi - Apteryx rowi.mp3 (0:05)
Downloaded: XC958222 - Little Spotted Kiwi - Apteryx owenii.mp3 (0:10)
Downloaded: XC952953 - Little Spotted Kiwi - Apteryx owenii.mp3 (0:10)
Downloaded: XC498079 - Little Spotted Kiwi - Apteryx owenii.mp3 (0:27)
Downloaded: XC579690 - Great Spotted Kiwi - Apteryx haastii.mp3 (1:08)
Downloaded: XC444298 - Great Spotted Kiwi - Apteryx haastii.mp3 (0:31)
Downloaded: XC345367 - Great Spotted Kiwi - Apteryx haastii.mp3 (0:21)
Downloaded: XC114351 - Great Spotted Kiwi - Apteryx haastii.mp3 (0:25)
Downloaded:

In [None]:
download_animal_sounds(limit=None, quality='B', max_duration_minutes=10, base_dir='xeno_canto', species='wild_turkey/B')

TypeError: '>=' not supported between instances of 'int' and 'NoneType'