In [23]:
import requests
import os
from tqdm import tqdm

# Species and target image counts
species_targets = {
    "Pinus pinaster": 200,
    "Quercus suber": 200,
    "Eucalyptus globulus": 200,
}

output_folder = "data_images"

# Acceptable image formats
valid_extensions = (".jpg", ".jpeg")

# Create main output folder
os.makedirs(output_folder, exist_ok=True)

def get_occurrences(scientific_name, offset=0, limit=300):
    url = "https://api.gbif.org/v1/occurrence/search"
    params = {
        "scientificName": scientific_name,
        "country": "PT",
        "mediaType": "StillImage",
        "hasCoordinate": "true",
        "license": "CC_BY_4_0",
        "limit": limit,
        "offset": offset
    }
    response = requests.get(url, params=params)
    response.raise_for_status()
    return response.json()

def download_image(image_url, file_path):
    try:
        r = requests.get(image_url, stream=True, timeout=10)
        if r.status_code == 200:
            with open(file_path, "wb") as f:
                for chunk in r.iter_content(1024):
                    f.write(chunk)
    except Exception as e:
        print(f"Error downloading {image_url}: {e}")

# Loop through each species
for species_name, target_count in species_targets.items():
    print(f"\n🔍 Searching for images of: {species_name}...")

    downloaded = 300
    offset = 300
    max_images = 500

    while downloaded < max_images:
        data = get_occurrences(species_name, offset=offset, limit=900)
        results = data.get("results", [])
        if not results:
            print("No more results found.")
            break

        for record in results:
            if downloaded >= max_images:
                break
            for m in record.get("media", []):
                image_url = m.get("identifier", "")
                if image_url and image_url.lower().endswith(valid_extensions):
                    ext = os.path.splitext(image_url)[-1]
                    file_path = os.path.join(
                        output_folder,
                        f"{species_name.replace(' ', '_')}_{downloaded:04d}{ext}"
                    )
                    download_image(image_url, file_path)
                    downloaded += 1
                    if downloaded >= max_images:
                        break

        offset += len(results)

    print(f"✅ {downloaded} images saved to: {output_folder}")

print("\n📁 All images have been downloaded successfully.")



🔍 Searching for images of: Pinus pinaster...
No more results found.
✅ 457 images saved to: data_images

🔍 Searching for images of: Quercus suber...
✅ 500 images saved to: data_images

🔍 Searching for images of: Eucalyptus globulus...
✅ 500 images saved to: data_images

📁 All images have been downloaded successfully.


In [2]:
import os
import re
from collections import defaultdict

# Caminho da pasta com as imagens
folder = 'data_images'

# Expressão regular para capturar o nome da espécie e ignorar o número
pattern = re.compile(r'^(.*?)_\d+\.(\w+)$')

# Dicionário para armazenar listas de arquivos por espécie
species_files = defaultdict(list)

# Organizar os arquivos por espécie
for filename in os.listdir(folder):
    match = pattern.match(filename)
    if match:
        species_name, ext = match.groups()
        species_files[species_name].append((filename, ext))

# Renomear os arquivos de forma sequencial por espécie
for species, files in species_files.items():
    files.sort()  # Ordena para consistência
    for idx, (old_name, ext) in enumerate(files):
        new_name = f"{species}_{idx:04d}.{ext}"
        src = os.path.join(folder, old_name)
        dst = os.path.join(folder, new_name)
        os.rename(src, dst)
        print(f"Renamed: {old_name} → {new_name}")


Renamed: Eucalyptus_globulus_0000.jpg → Eucalyptus_globulus_0000.jpg
Renamed: Eucalyptus_globulus_0001.jpg → Eucalyptus_globulus_0001.jpg
Renamed: Eucalyptus_globulus_0002.jpeg → Eucalyptus_globulus_0002.jpeg
Renamed: Eucalyptus_globulus_0003.jpeg → Eucalyptus_globulus_0003.jpeg
Renamed: Eucalyptus_globulus_0004.jpeg → Eucalyptus_globulus_0004.jpeg
Renamed: Eucalyptus_globulus_0005.jpeg → Eucalyptus_globulus_0005.jpeg
Renamed: Eucalyptus_globulus_0006.jpeg → Eucalyptus_globulus_0006.jpeg
Renamed: Eucalyptus_globulus_0007.jpeg → Eucalyptus_globulus_0007.jpeg
Renamed: Eucalyptus_globulus_0008.jpg → Eucalyptus_globulus_0008.jpg
Renamed: Eucalyptus_globulus_0009.jpg → Eucalyptus_globulus_0009.jpg
Renamed: Eucalyptus_globulus_0010.jpeg → Eucalyptus_globulus_0010.jpeg
Renamed: Eucalyptus_globulus_0011.jpeg → Eucalyptus_globulus_0011.jpeg
Renamed: Eucalyptus_globulus_0012.jpeg → Eucalyptus_globulus_0012.jpeg
Renamed: Eucalyptus_globulus_0013.jpg → Eucalyptus_globulus_0013.jpg
Renamed: Eucalyp

In [8]:
import os
from PIL import Image
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms
from tqdm import tqdm

# --- Settings ---
data_dir = "data_images"
model_path = "Tree_Identifier/models/model.pth"  # or "models/model.pth"
class_names = ["Eucalyptus globulus", "Pinus pinaster", "Quercus suber"]
num_classes = len(class_names)

# --- Model Definition ---
class DeeperCNN(nn.Module):
    def __init__(self, num_classes):
        super(DeeperCNN, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),
            nn.Conv2d(32, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2),
            nn.Dropout(0.25),
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2),
            nn.Dropout(0.25),
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.Conv2d(128, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.Dropout(0.25),
        )
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(128 * 32 * 32, 256),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(256, num_classes)
        )
    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x

# --- Transforms ---
transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor()
])

# --- Load Model ---
device = torch.device("cpu")
model = torch.load(model_path, map_location=device, weights_only=False)
model.eval()

# --- Prediction Function ---
def predict_image(image_path):
    image = Image.open(image_path).convert("RGB")
    image = transform(image).unsqueeze(0).to(device)
    with torch.no_grad():
        outputs = model(image)
        probs = F.softmax(outputs, dim=1)[0]
    return probs.cpu().numpy()

# --- Store Probabilities ---
results = {cls: [] for cls in class_names}

for filename in tqdm(os.listdir(data_dir)):
    if filename.lower().endswith((".jpg", ".jpeg", ".png")):
        path = os.path.join(data_dir, filename)
        try:
            probs = predict_image(path)
            for i, cls in enumerate(class_names):
                results[cls].append((filename, float(probs[i])))
        except Exception as e:
            print(f"Error with {filename}: {e}")

# --- Show Top 10 per Class ---
print("\n=== Top 10 images per class (highest confidence) ===")
for cls in class_names:
    print(f"\nClass: {cls}")
    top10 = sorted(results[cls], key=lambda x: x[1], reverse=True)[:10]
    for filename, prob in top10:
        print(f"{filename}: {prob:.4f}")

100%|██████████| 593/593 [01:41<00:00,  5.87it/s]


=== Top 10 images per class (highest confidence) ===

Class: Eucalyptus globulus
Eucalyptus_globulus_0014.jpeg: 0.6492
Eucalyptus_globulus_0139.jpeg: 0.6338
Quercus_suber_0220.jpeg: 0.6336
Eucalyptus_globulus_0077.jpeg: 0.6228
Quercus_suber_0153.jpeg: 0.6188
Eucalyptus_globulus_0045.jpeg: 0.6162
Quercus_suber_0112.jpeg: 0.5782
Eucalyptus_globulus_0022.jpeg: 0.5779
Eucalyptus_globulus_0106.jpeg: 0.5754
Eucalyptus_globulus_0132.jpeg: 0.5746

Class: Pinus pinaster
Pinus_pinaster_0157.jpeg: 0.9878
Pinus_pinaster_0161.jpeg: 0.9817
Pinus_pinaster_0069.jpeg: 0.9778
Pinus_pinaster_0187.jpeg: 0.9725
Pinus_pinaster_0133.jpeg: 0.9716
Pinus_pinaster_0156.jpeg: 0.9709
Pinus_pinaster_0081.jpeg: 0.9703
Pinus_pinaster_0195.jpeg: 0.9689
Pinus_pinaster_0068.jpeg: 0.9685
Pinus_pinaster_0194.jpeg: 0.9667

Class: Quercus suber
Quercus_suber_0125.jpeg: 0.7444
Quercus_suber_0001.jpeg: 0.6879
Quercus_suber_0021.jpeg: 0.6879
Quercus_suber_0008.jpeg: 0.6757
Quercus_suber_0023.jpeg: 0.6510
Quercus_suber_0101.jp


