In [None]:
import requests
import os
from tqdm import tqdm

# Species and target image counts
species_targets = {
    "Pinus pinaster": 5,
    "Quercus suber": 5,
    "Eucalyptus globulus": 5,
}

output_folder = "data_images"

# Acceptable image formats
valid_extensions = (".jpg", ".jpeg")

# Create main output folder
os.makedirs(output_folder, exist_ok=True)

def get_occurrences(scientific_name, offset=0, limit=300):
    url = "https://api.gbif.org/v1/occurrence/search"
    params = {
        "scientificName": scientific_name,
        "country": "PT",
        "mediaType": "StillImage",
        "hasCoordinate": "true",
        "license": "CC0_1_0",
        "limit": limit,
        "offset": offset
    }
    response = requests.get(url, params=params)
    response.raise_for_status()
    return response.json()

def download_image(image_url, file_path):
    try:
        r = requests.get(image_url, stream=True, timeout=10)
        if r.status_code == 200:
            with open(file_path, "wb") as f:
                for chunk in r.iter_content(1024):
                    f.write(chunk)
    except Exception as e:
        print(f"Error downloading {image_url}: {e}")

# Loop through each species
for species_name, max_images in species_targets.items():
    print(f"\n🔍 Searching for images of: {species_name}...")

    downloaded = 0
    offset = 0

    while downloaded < max_images:
        data = get_occurrences(species_name)
        results = data.get("results", [])
        if not results:
            print("No more results found.")
            break

        for record in results:
            if downloaded >= max_images:
                break
            media = record.get("media", [])
            for m in media:
                image_url = m.get("identifier", "").lower()
                if image_url.endswith(valid_extensions):
                    ext = os.path.splitext(image_url)[-1]
                    file_path = os.path.join(output_folder, f"{species_name.replace(' ', '_')}_{downloaded:04d}{ext}")
                    download_image(image_url, file_path)
                    downloaded += 1
                    if downloaded >= max_images:
                        break

    print(f"✅ {downloaded} images saved to: {output_folder}")

print("\n📁 All images have been downloaded successfully.")


🔍 Searching for images of: Pinus pinaster...
✅ 5 images saved to: data_images

🔍 Searching for images of: Quercus suber...
✅ 5 images saved to: data_images

🔍 Searching for images of: Eucalyptus globulus...
✅ 5 images saved to: data_images

📁 All images have been downloaded successfully.
