In [23]:
import requests
import os
from tqdm import tqdm

# Species and target image counts
species_targets = {
    "Pinus pinaster": 200,
    "Quercus suber": 200,
    "Eucalyptus globulus": 200,
}

output_folder = "data_images"

# Acceptable image formats
valid_extensions = (".jpg", ".jpeg")

# Create main output folder
os.makedirs(output_folder, exist_ok=True)

def get_occurrences(scientific_name, offset=0, limit=300):
    url = "https://api.gbif.org/v1/occurrence/search"
    params = {
        "scientificName": scientific_name,
        "country": "PT",
        "mediaType": "StillImage",
        "hasCoordinate": "true",
        "license": "CC_BY_4_0",
        "limit": limit,
        "offset": offset
    }
    response = requests.get(url, params=params)
    response.raise_for_status()
    return response.json()

def download_image(image_url, file_path):
    try:
        r = requests.get(image_url, stream=True, timeout=10)
        if r.status_code == 200:
            with open(file_path, "wb") as f:
                for chunk in r.iter_content(1024):
                    f.write(chunk)
    except Exception as e:
        print(f"Error downloading {image_url}: {e}")

# Loop through each species
for species_name, target_count in species_targets.items():
    print(f"\n🔍 Searching for images of: {species_name}...")

    downloaded = 300
    offset = 300
    max_images = 500

    while downloaded < max_images:
        data = get_occurrences(species_name, offset=offset, limit=900)
        results = data.get("results", [])
        if not results:
            print("No more results found.")
            break

        for record in results:
            if downloaded >= max_images:
                break
            for m in record.get("media", []):
                image_url = m.get("identifier", "")
                if image_url and image_url.lower().endswith(valid_extensions):
                    ext = os.path.splitext(image_url)[-1]
                    file_path = os.path.join(
                        output_folder,
                        f"{species_name.replace(' ', '_')}_{downloaded:04d}{ext}"
                    )
                    download_image(image_url, file_path)
                    downloaded += 1
                    if downloaded >= max_images:
                        break

        offset += len(results)

    print(f"✅ {downloaded} images saved to: {output_folder}")

print("\n📁 All images have been downloaded successfully.")



🔍 Searching for images of: Pinus pinaster...
No more results found.
✅ 457 images saved to: data_images

🔍 Searching for images of: Quercus suber...
✅ 500 images saved to: data_images

🔍 Searching for images of: Eucalyptus globulus...
✅ 500 images saved to: data_images

📁 All images have been downloaded successfully.


In [13]:
import os
import re
from collections import defaultdict

# Caminho da pasta com as imagens
folder = 'data_images'

# Expressão regular para capturar o nome da espécie e ignorar o número
pattern = re.compile(r'^(.*?)_\d+\.(\w+)$')

# Dicionário para armazenar listas de arquivos por espécie
species_files = defaultdict(list)

# Organizar os arquivos por espécie
for filename in os.listdir(folder):
    match = pattern.match(filename)
    if match:
        species_name, ext = match.groups()
        species_files[species_name].append((filename, ext))

# Renomear os arquivos de forma sequencial por espécie
for species, files in species_files.items():
    files.sort()  # Ordena para consistência
    for idx, (old_name, ext) in enumerate(files):
        new_name = f"{species}_{idx:04d}.{ext}"
        src = os.path.join(folder, old_name)
        dst = os.path.join(folder, new_name)
        os.rename(src, dst)
        print(f"Renamed: {old_name} → {new_name}")


Renamed: Eucalyptus_globulus_0000.jpeg → Eucalyptus_globulus_0000.jpeg
Renamed: Eucalyptus_globulus_0001.jpg → Eucalyptus_globulus_0001.jpg
Renamed: Eucalyptus_globulus_0002.jpeg → Eucalyptus_globulus_0002.jpeg
Renamed: Eucalyptus_globulus_0003.jpeg → Eucalyptus_globulus_0003.jpeg
Renamed: Eucalyptus_globulus_0004.jpg → Eucalyptus_globulus_0004.jpg
Renamed: Eucalyptus_globulus_0005.jpeg → Eucalyptus_globulus_0005.jpeg
Renamed: Eucalyptus_globulus_0006.jpeg → Eucalyptus_globulus_0006.jpeg
Renamed: Eucalyptus_globulus_0007.jpeg → Eucalyptus_globulus_0007.jpeg
Renamed: Eucalyptus_globulus_0008.jpeg → Eucalyptus_globulus_0008.jpeg
Renamed: Eucalyptus_globulus_0009.jpeg → Eucalyptus_globulus_0009.jpeg
Renamed: Eucalyptus_globulus_0010.jpeg → Eucalyptus_globulus_0010.jpeg
Renamed: Eucalyptus_globulus_0011.jpeg → Eucalyptus_globulus_0011.jpeg
Renamed: Eucalyptus_globulus_0012.jpeg → Eucalyptus_globulus_0012.jpeg
Renamed: Eucalyptus_globulus_0013.jpeg → Eucalyptus_globulus_0013.jpeg
Renamed: E