In [None]:
import os
import numpy as np
from PIL import Image
from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdm

def process_image(image_path, target_size=(32, 32)):
    """
    Processa uma única imagem e retorna array numpy e classe
    """
    try:
        with Image.open(image_path) as img:
            img = img.convert('RGB')
            img = img.resize(target_size)
            return np.array(img)
    except (IOError, OSError) as e:
        print(f"Erro ao processar {image_path}: {e}")
        return None

def load_dataset(dog_path, cat_path, target_size=(32, 32)):
    """
    Carrega e processa o dataset completo usando processamento paralelo
    """
    # Lista todos os arquivos
    dog_images = [os.path.join(dog_path, f) for f in os.listdir(dog_path)]
    cat_images = [os.path.join(cat_path, f) for f in os.listdir(cat_path)]
    
    images = dog_images + cat_images
    classes = [[0, 1]] * len(dog_images) + [[1, 0]] * len(cat_images)
    
    # Processa imagens em paralelo
    with ThreadPoolExecutor() as executor:
        # Usa tqdm para mostrar barra de progresso
        results = list(tqdm(
            executor.map(process_image, images),
            total=len(images),
            desc="Processando imagens"
        ))
    
    # Remove resultados None (imagens com erro)
    valid_results = [(img, cls) for img, cls in zip(results, classes) if img is not None]
    
    if not valid_results:
        raise ValueError("Nenhuma imagem válida encontrada")
    
    # Separa imagens e classes
    train_images, train_classes = zip(*valid_results)
    
    return np.array(train_images), np.array(train_classes)

dog_images_path = "kagglecatsanddogs_5340/PetImages/Dog/"
cat_images_path = "kagglecatsanddogs_5340/PetImages/Cat/"

try:
    train, classes = load_dataset(dog_images_path, cat_images_path)
    print(f"Dataset carregado com sucesso. Shape: {train.shape}")
except Exception as e:
    print(f"Erro ao carregar dataset: {e}")

Processando imagens:  85%|████████▌ | 21330/24999 [00:32<00:08, 432.46it/s] 

Erro ao processar kagglecatsanddogs_5340/PetImages/Cat/666.jpg: cannot identify image file 'kagglecatsanddogs_5340/PetImages/Cat/666.jpg'


Processando imagens: 100%|██████████| 24999/24999 [00:40<00:00, 613.97it/s]


Dataset carregado com sucesso. Shape: (24998, 32, 32, 3)
