# Download dataset

In [1]:
import os
from zipfile import ZipFile
import gdown
from tqdm.notebook import tqdm
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, Subset
from torchvision import transforms
from transformers import AutoImageProcessor, ViTMAEForPreTraining
from PIL import Image
import os
import numpy as np
from tqdm import tqdm
from sklearn.metrics import roc_auc_score, roc_curve, auc, average_precision_score, precision_recall_fscore_support, precision_recall_curve
import sys
import matplotlib.pyplot as plt

In [2]:
file_id = '1Xvmn1BylM1cPdl21xAO06Xqp9SpoXHiJ'
url = f'https://drive.google.com/uc?id={file_id}'

data_path = "data/" # Defino el directorio de datos donde se guardará el archivo
if not os.path.exists(data_path):
    os.makedirs(data_path)  # Crea el directorio si no existe


output_path = os.path.join(data_path, 'data.zip')

gdown.download(url, output_path, quiet=False)

Downloading...
From (original): https://drive.google.com/uc?id=1Xvmn1BylM1cPdl21xAO06Xqp9SpoXHiJ
From (redirected): https://drive.google.com/uc?id=1Xvmn1BylM1cPdl21xAO06Xqp9SpoXHiJ&confirm=t&uuid=78ce61f1-4c40-4f3c-9f53-a3561c98f388
To: /content/data/data.zip
100%|██████████| 5.27G/5.27G [01:19<00:00, 66.4MB/s]


'data/data.zip'

In [3]:
print("Descomprimiendo...")

# descomprimir el archivo ZIP
with ZipFile(output_path, 'r') as zip_ref:
    # Obtener la lista de archivos en el ZIP
    file_list = zip_ref.namelist()

    # Inicializar la barra de progreso
    with tqdm(total=len(file_list), unit='file') as pbar:
        for file in file_list:
            zip_ref.extract(file, data_path)  # Extraer cada archivo
            pbar.update(1)  # Actualizar la barra de progreso

# Eliminar el archivo ZIP después de descomprimir
os.remove(output_path)

Descomprimiendo...


100%|██████████| 6878/6878 [00:58<00:00, 118.07file/s]


# EDA

In [4]:
# ya esta en otro notebook

# Dataset y dataloader

In [5]:
# Dataset y DataLoader para MVTec AD (sacada de 3.0.model-training.ipynb)
class MVTecDataset(Dataset):
    def __init__(self, root_path, category, is_train=True, transform=None, mask_transform=None):
        """
        Args:
            root_path: Ruta al directorio raíz de MVTec AD
            category: Categoría de objetos ('bottle', 'cable', 'carpet', etc.)
            is_train: Si es True, carga imágenes de entrenamiento (normales)
                      Si es False, carga imágenes de prueba (normales y anómalas)
            transform: Transformaciones opcionales a aplicar a las imágenes
            mask_transform: Transformaciones opcionales a aplicar a las máscaras
        """
        self.root_path = root_path
        self.category = category
        self.is_train = is_train
        self.transform = transform
        self.mask_transform = mask_transform

        # Definir directorios
        if self.is_train:
            self.image_dir = os.path.join(root_path, category, 'train', 'good')
            self.image_paths = [os.path.join(self.image_dir, f) for f in os.listdir(self.image_dir)
                               if f.endswith('.png')]
            self.labels = np.zeros(len(self.image_paths), dtype=np.float32)  # 0 = normal
            self.mask_paths = None

        else:  # Test set
            self.image_dir = os.path.join(root_path, category, 'test')
            self.image_paths = []
            self.labels = []
            self.mask_paths = []

            # Imágenes normales (buenas)
            good_dir = os.path.join(self.image_dir, 'good')
            if os.path.exists(good_dir):
                good_images = [os.path.join(good_dir, f) for f in os.listdir(good_dir)
                              if f.endswith('.png')]
                self.image_paths.extend(good_images)
                self.labels.extend([0] * len(good_images))  # 0 = normal
                self.mask_paths.extend([None] * len(good_images))

            # Imágenes anómalas (con defectos)
            defect_types = [d for d in os.listdir(self.image_dir)
                           if os.path.isdir(os.path.join(self.image_dir, d)) and d != 'good']

            for defect in defect_types:
                defect_dir = os.path.join(self.image_dir, defect)
                defect_images = [os.path.join(defect_dir, f) for f in os.listdir(defect_dir)
                                if f.endswith('.png')]
                self.image_paths.extend(defect_images)
                self.labels.extend([1] * len(defect_images))  # 1 = anomalía

                # Añadir máscaras de ground truth (si existen)
                gt_dir = os.path.join(root_path, category, 'ground_truth', defect)
                if os.path.exists(gt_dir):
                    for img_path in defect_images:
                        img_name = os.path.basename(img_path)
                        mask_name = img_name.replace('.png', '_mask.png')
                        mask_path = os.path.join(gt_dir, mask_name)
                        if os.path.exists(mask_path):
                            self.mask_paths.append(mask_path)
                        else:
                            self.mask_paths.append(None)
                else:
                    self.mask_paths.extend([None] * len(defect_images))

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        # Cargar imagen
        img_path = self.image_paths[idx]
        image = Image.open(img_path).convert('RGB') # Convertir a RGB
        label = self.labels[idx]

        # Cargar máscara si existe (solo para test y anomalías)
        mask = None
        if not self.is_train and self.mask_paths[idx] is not None: # Si es test y hay máscara
            mask_path = self.mask_paths[idx]
            mask = Image.open(mask_path).convert('L') # Convertir a escala de grises
            if self.mask_transform:
                mask = self.mask_transform(mask) # Aplicar transformaciones a la máscara
            elif self.transform:
                mask = transforms.Compose([
                    transforms.Resize((224, 224)),
                    transforms.ToTensor(),
                ])(mask) # Aplicar transformaciones por defecto a la máscara
        else:
            # Crear una máscara vacía si no existe
            mask = torch.zeros((1, 224, 224))

        # Aplicar transformaciones a la imagen
        if self.transform:
            image = self.transform(image)

        # Siempre devolver tres elementos
        return image, label, mask

# ViT MAE

In [6]:
def run_vit_mae_anomaly_detection(data_path, category, reports_dir='reports/vit_mae/', num_epochs=10, batch_size_train=8, lr=2e-5):
    """
    Fine-tunes un modelo ViT MAE para detección de anomalías en una categoría específica de MVTec AD.
    """
    device = torch.device("cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu")
    print(f"Usando dispositivo: {device}")
    sys.stdout.flush()

    # Crear subdirectorio para gráficas si no existe
    plots_dir = os.path.join(reports_dir, "plots")
    os.makedirs(plots_dir, exist_ok=True)

    # 1. Cargar Procesador y Modelo
    model_checkpoint = "facebook/vit-mae-base"
    try:
        image_processor = AutoImageProcessor.from_pretrained(model_checkpoint)
        model = ViTMAEForPreTraining.from_pretrained(model_checkpoint)
        model.to(device)
    except Exception as e:
        print(f"Error cargando modelo o procesador: {e}", file=sys.stderr)
        return None

    # 2. Preparar Datos
    img_size = 224

    train_transform = transforms.Compose([
        transforms.Resize((img_size, img_size)),
        transforms.RandomHorizontalFlip(),
        transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.1),
        transforms.ToTensor(),
        transforms.Normalize(mean=image_processor.image_mean, std=image_processor.image_std),
    ])
    test_transform = transforms.Compose([ # Sin augmentation para test
        transforms.Resize((img_size, img_size)),
        transforms.ToTensor(),
        transforms.Normalize(mean=image_processor.image_mean, std=image_processor.image_std),
    ])

    train_dataset = MVTecDataset(root_path=data_path, category=category, is_train=True, transform=train_transform)
    if len(train_dataset) == 0:
        print(f"No se encontraron imágenes de entrenamiento para la categoría {category}. Omitiendo.", file=sys.stderr)
        return None

    test_dataset = MVTecDataset(root_path=data_path, category=category, is_train=False, transform=test_transform)
    if len(test_dataset) == 0:
        print(f"No se encontraron imágenes de test para la categoría {category}. Omitiendo.", file=sys.stderr)
        return None

    train_loader = DataLoader(train_dataset, batch_size=batch_size_train, shuffle=True, num_workers=2, pin_memory=True)
    # Para evaluación, batch_size=1 para obtener la pérdida por instancia de ViTMAEForPreTraining
    test_loader_per_instance = DataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=2, pin_memory=True)

    # 3. Fine-tuning (Entrenamiento)
    optimizer = optim.AdamW(model.parameters(), lr=lr, weight_decay=1e-5)
    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=num_epochs, eta_min=lr/100)

    train_losses_history = [] # Para almacenar la pérdida de cada época

    print(f"\nIniciando fine-tuning para la categoría: {category}...")
    sys.stdout.flush()
    for epoch in range(num_epochs):
        model.train()
        epoch_loss = 0.0
        for images, _, _ in tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs} (Train)"):
            images = images.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = outputs.loss
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()

        scheduler.step()
        avg_epoch_loss = epoch_loss / len(train_loader)
        train_losses_history.append(avg_epoch_loss)
        print(f"Epoch {epoch+1}/{num_epochs}, Pérdida Media de Entrenamiento: {avg_epoch_loss:.4f}")
        sys.stdout.flush()

    # Guardar gráfica de pérdida de entrenamiento
    plt.figure(figsize=(10, 6))
    plt.plot(range(1, num_epochs + 1), train_losses_history, marker='o', linestyle='-')
    plt.title(f'Pérdida de Entrenamiento por Época - Categoría: {category}')
    plt.xlabel('Época')
    plt.ylabel('Pérdida Media de Entrenamiento')
    plt.grid(True)
    loss_plot_path = os.path.join(plots_dir, f'training_loss_{category}.png')
    plt.savefig(loss_plot_path)
    plt.close()
    print(f"Gráfica de pérdida de entrenamiento guardada en: {loss_plot_path}")
    sys.stdout.flush()

    # 4. Evaluación (Puntuación de Anomalía)
    model.eval()
    all_labels = []
    all_scores = []

    print(f"\nEvaluando en el conjunto de test para la categoría: {category} (puntuación por instancia)...")
    sys.stdout.flush()
    with torch.no_grad():
        for images, labels_batch, _ in tqdm(test_loader_per_instance, desc="Evaluando Instancias"):
            images = images.to(device)
            outputs = model(images)
            loss_val = outputs.loss.item()
            all_scores.append(loss_val)
            if isinstance(labels_batch, torch.Tensor):
                all_labels.extend(labels_batch.cpu().numpy())
            else:
                all_labels.extend(np.array([labels_batch]))

    if not all_labels or not all_scores:
        print(f"No se pudieron recolectar datos para la evaluación de {category}. Omitiendo AUC.", file=sys.stderr)
        return None

    all_labels_np = np.array(all_labels).astype(int)
    if len(np.unique(all_labels_np)) < 2 : # Chequeo si hay al menos dos clases para AUC
        print(f"No hay suficientes clases únicas en las etiquetas para calcular AUC para {category} (Etiquetas: {np.unique(all_labels_np)}). Omitiendo AUC.", file=sys.stderr)
        return None

    # Calcular ROC AUC (Nivel de Imagen)
    try:
        roc_auc = roc_auc_score(all_labels, all_scores)
        print(f"\nResultados para la categoría: {category}")
        print(f"ROC AUC a Nivel de Imagen: {roc_auc:.4f}")
        sys.stdout.flush()

        # Guardar curva ROC
        fpr, tpr, _ = roc_curve(all_labels_np, all_scores)
        plt.figure(figsize=(8, 6))
        plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'Curva ROC (AUC = {roc_auc:.2f})')
        plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
        plt.xlim([0.0, 1.0])
        plt.ylim([0.0, 1.05])
        plt.xlabel('Tasa de Falsos Positivos (FPR)')
        plt.ylabel('Tasa de Verdaderos Positivos (TPR)')
        plt.title(f'Curva ROC - Categoría: {category}')
        plt.legend(loc="lower right")
        roc_plot_path = os.path.join(plots_dir, f'roc_curve_{category}.png')
        plt.savefig(roc_plot_path)
        plt.close()
        print(f"Gráfica de curva ROC guardada en: {roc_plot_path}")

    except ValueError as e:
        print(f"No se pudo calcular ROC AUC para {category}: {e}. Etiquetas únicas: {np.unique(all_labels_np)}", file=sys.stderr)
        return None # Retorna None si hay error en AUC

    # Guardar el modelo fine-tuned
    model_save_dir = os.path.join(reports_dir, "trained_models")
    os.makedirs(model_save_dir, exist_ok=True)
    model_save_path = os.path.join(model_save_dir, f"vit_mae_finetuned_{category}.pth")
    torch.save(model.state_dict(), model_save_path)
    print(f"Modelo fine-tuned guardado en {model_save_path}")

    return roc_auc


# --- Ejecución Principal ---
if __name__ == "__main__":
    MVTEC_AD_PATH = 'data/'

    if not os.path.exists(MVTEC_AD_PATH) or not os.listdir(MVTEC_AD_PATH):
        print(f"Error: El directorio del dataset MVTec AD '{MVTEC_AD_PATH}' no existe o está vacío.", file=sys.stderr)
        print("Por favor, descarga el dataset MVTec AD y actualiza la variable MVTEC_AD_PATH.", file=sys.stderr)
        sys.exit(1)

    # Categorías a procesar
    categories_to_run = ["bottle", "carpet","grid","wood","leather"] # Un subconjunto para una prueba rápida

    all_category_results = {}
    num_epochs_main = 15
    learning_rate_main = 1e-5
    batch_size_training_main = 16

    for cat_name in categories_to_run:
        print(f"\n{'='*20} PROCESANDO CATEGORÍA: {cat_name.upper()} {'='*20}")
        sys.stdout.flush()

        # Verificar si el directorio de la categoría existe
        category_dir_check = os.path.join(MVTEC_AD_PATH, cat_name)
        if not os.path.exists(category_dir_check):
            print(f"Directorio para la categoría '{cat_name}' no encontrado en '{category_dir_check}'. Omitiendo.", file=sys.stderr)
            continue

        auc = run_vit_mae_anomaly_detection(
            data_path=MVTEC_AD_PATH,
            category=cat_name,
            num_epochs=num_epochs_main,
            batch_size_train=batch_size_training_main,
            lr=learning_rate_main
        )
        if auc is not None:
            all_category_results[cat_name] = auc
        sys.stdout.flush()


    print("\n\n{'='*30} RESUMEN FINAL DE ROC AUC POR CATEGORÍA {'='*30}")
    if all_category_results:
        for cat_name, auc_score in all_category_results.items():
            print(f"Categoría: {cat_name:<15} | ROC AUC: {auc_score:.4f}")

        valid_auc_scores = [score for score in all_category_results.values() if score is not None]
        if valid_auc_scores:
            avg_auc = np.mean(valid_auc_scores)
            print(f"\nROC AUC Promedio (sobre categorías exitosas): {avg_auc:.4f}")
        else:
            print("\nNo se pudieron calcular puntuaciones AUC válidas.")
    else:
        print("No se generaron resultados para ninguna categoría.")
    sys.stdout.flush()


Usando dispositivo: cuda


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


preprocessor_config.json:   0%|          | 0.00/217 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/676 [00:00<?, ?B/s]

Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`.


model.safetensors:   0%|          | 0.00/448M [00:00<?, ?B/s]


Iniciando fine-tuning para la categoría: bottle...


Epoch 1/15 (Train): 100%|██████████| 14/14 [00:08<00:00,  1.71it/s]

Epoch 1/15, Pérdida Media de Entrenamiento: 0.0163



Epoch 2/15 (Train): 100%|██████████| 14/14 [00:08<00:00,  1.71it/s]

Epoch 2/15, Pérdida Media de Entrenamiento: 0.0148



Epoch 3/15 (Train): 100%|██████████| 14/14 [00:07<00:00,  1.77it/s]

Epoch 3/15, Pérdida Media de Entrenamiento: 0.0146



Epoch 4/15 (Train): 100%|██████████| 14/14 [00:07<00:00,  1.96it/s]

Epoch 4/15, Pérdida Media de Entrenamiento: 0.0136



Epoch 5/15 (Train): 100%|██████████| 14/14 [00:08<00:00,  1.72it/s]

Epoch 5/15, Pérdida Media de Entrenamiento: 0.0131



Epoch 6/15 (Train): 100%|██████████| 14/14 [00:07<00:00,  1.95it/s]

Epoch 6/15, Pérdida Media de Entrenamiento: 0.0138



Epoch 7/15 (Train): 100%|██████████| 14/14 [00:08<00:00,  1.73it/s]

Epoch 7/15, Pérdida Media de Entrenamiento: 0.0140



Epoch 8/15 (Train): 100%|██████████| 14/14 [00:07<00:00,  1.80it/s]

Epoch 8/15, Pérdida Media de Entrenamiento: 0.0139



Epoch 9/15 (Train): 100%|██████████| 14/14 [00:07<00:00,  1.95it/s]

Epoch 9/15, Pérdida Media de Entrenamiento: 0.0136



Epoch 10/15 (Train): 100%|██████████| 14/14 [00:08<00:00,  1.70it/s]

Epoch 10/15, Pérdida Media de Entrenamiento: 0.0128



Epoch 11/15 (Train): 100%|██████████| 14/14 [00:07<00:00,  1.94it/s]

Epoch 11/15, Pérdida Media de Entrenamiento: 0.0123



Epoch 12/15 (Train): 100%|██████████| 14/14 [00:08<00:00,  1.72it/s]

Epoch 12/15, Pérdida Media de Entrenamiento: 0.0132



Epoch 13/15 (Train): 100%|██████████| 14/14 [00:08<00:00,  1.72it/s]

Epoch 13/15, Pérdida Media de Entrenamiento: 0.0130



Epoch 14/15 (Train): 100%|██████████| 14/14 [00:07<00:00,  1.93it/s]

Epoch 14/15, Pérdida Media de Entrenamiento: 0.0125



Epoch 15/15 (Train): 100%|██████████| 14/14 [00:08<00:00,  1.67it/s]

Epoch 15/15, Pérdida Media de Entrenamiento: 0.0123





Gráfica de pérdida de entrenamiento guardada en: reports/vit_mae/plots/training_loss_bottle.png

Evaluando en el conjunto de test para la categoría: bottle (puntuación por instancia)...


Evaluando Instancias: 100%|██████████| 83/83 [00:03<00:00, 26.22it/s]


Resultados para la categoría: bottle
ROC AUC a Nivel de Imagen: 0.7913





Gráfica de curva ROC guardada en: reports/vit_mae/plots/roc_curve_bottle.png
Modelo fine-tuned guardado en reports/vit_mae/trained_models/vit_mae_finetuned_bottle.pth

Usando dispositivo: cuda


Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`.



Iniciando fine-tuning para la categoría: carpet...


Epoch 1/15 (Train): 100%|██████████| 18/18 [00:15<00:00,  1.17it/s]

Epoch 1/15, Pérdida Media de Entrenamiento: 0.1312



Epoch 2/15 (Train): 100%|██████████| 18/18 [00:14<00:00,  1.27it/s]

Epoch 2/15, Pérdida Media de Entrenamiento: 0.1278



Epoch 3/15 (Train): 100%|██████████| 18/18 [00:14<00:00,  1.27it/s]

Epoch 3/15, Pérdida Media de Entrenamiento: 0.1244



Epoch 4/15 (Train): 100%|██████████| 18/18 [00:13<00:00,  1.29it/s]

Epoch 4/15, Pérdida Media de Entrenamiento: 0.1228



Epoch 5/15 (Train): 100%|██████████| 18/18 [00:14<00:00,  1.27it/s]

Epoch 5/15, Pérdida Media de Entrenamiento: 0.1251



Epoch 6/15 (Train): 100%|██████████| 18/18 [00:14<00:00,  1.27it/s]

Epoch 6/15, Pérdida Media de Entrenamiento: 0.1212



Epoch 7/15 (Train): 100%|██████████| 18/18 [00:14<00:00,  1.26it/s]

Epoch 7/15, Pérdida Media de Entrenamiento: 0.1223



Epoch 8/15 (Train): 100%|██████████| 18/18 [00:14<00:00,  1.22it/s]

Epoch 8/15, Pérdida Media de Entrenamiento: 0.1219



Epoch 9/15 (Train): 100%|██████████| 18/18 [00:14<00:00,  1.25it/s]

Epoch 9/15, Pérdida Media de Entrenamiento: 0.1234



Epoch 10/15 (Train): 100%|██████████| 18/18 [00:14<00:00,  1.27it/s]

Epoch 10/15, Pérdida Media de Entrenamiento: 0.1195



Epoch 11/15 (Train): 100%|██████████| 18/18 [00:14<00:00,  1.26it/s]

Epoch 11/15, Pérdida Media de Entrenamiento: 0.1182



Epoch 12/15 (Train): 100%|██████████| 18/18 [00:14<00:00,  1.25it/s]

Epoch 12/15, Pérdida Media de Entrenamiento: 0.1190



Epoch 13/15 (Train): 100%|██████████| 18/18 [00:15<00:00,  1.19it/s]

Epoch 13/15, Pérdida Media de Entrenamiento: 0.1176



Epoch 14/15 (Train): 100%|██████████| 18/18 [00:14<00:00,  1.20it/s]

Epoch 14/15, Pérdida Media de Entrenamiento: 0.1206



Epoch 15/15 (Train): 100%|██████████| 18/18 [00:15<00:00,  1.19it/s]

Epoch 15/15, Pérdida Media de Entrenamiento: 0.1202





Gráfica de pérdida de entrenamiento guardada en: reports/vit_mae/plots/training_loss_carpet.png

Evaluando en el conjunto de test para la categoría: carpet (puntuación por instancia)...


Evaluando Instancias: 100%|██████████| 117/117 [00:06<00:00, 16.79it/s]


Resultados para la categoría: carpet
ROC AUC a Nivel de Imagen: 0.4872





Gráfica de curva ROC guardada en: reports/vit_mae/plots/roc_curve_carpet.png
Modelo fine-tuned guardado en reports/vit_mae/trained_models/vit_mae_finetuned_carpet.pth

Usando dispositivo: cuda


Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`.



Iniciando fine-tuning para la categoría: grid...


Epoch 1/15 (Train): 100%|██████████| 17/17 [00:08<00:00,  1.90it/s]

Epoch 1/15, Pérdida Media de Entrenamiento: 0.0543



Epoch 2/15 (Train): 100%|██████████| 17/17 [00:08<00:00,  2.01it/s]

Epoch 2/15, Pérdida Media de Entrenamiento: 0.0478



Epoch 3/15 (Train): 100%|██████████| 17/17 [00:07<00:00,  2.16it/s]

Epoch 3/15, Pérdida Media de Entrenamiento: 0.0469



Epoch 4/15 (Train): 100%|██████████| 17/17 [00:08<00:00,  1.94it/s]

Epoch 4/15, Pérdida Media de Entrenamiento: 0.0450



Epoch 5/15 (Train): 100%|██████████| 17/17 [00:08<00:00,  2.11it/s]

Epoch 5/15, Pérdida Media de Entrenamiento: 0.0441



Epoch 6/15 (Train): 100%|██████████| 17/17 [00:07<00:00,  2.18it/s]

Epoch 6/15, Pérdida Media de Entrenamiento: 0.0435



Epoch 7/15 (Train): 100%|██████████| 17/17 [00:08<00:00,  2.03it/s]

Epoch 7/15, Pérdida Media de Entrenamiento: 0.0422



Epoch 8/15 (Train): 100%|██████████| 17/17 [00:07<00:00,  2.21it/s]

Epoch 8/15, Pérdida Media de Entrenamiento: 0.0418



Epoch 9/15 (Train): 100%|██████████| 17/17 [00:08<00:00,  1.99it/s]

Epoch 9/15, Pérdida Media de Entrenamiento: 0.0412



Epoch 10/15 (Train): 100%|██████████| 17/17 [00:08<00:00,  2.01it/s]

Epoch 10/15, Pérdida Media de Entrenamiento: 0.0407



Epoch 11/15 (Train): 100%|██████████| 17/17 [00:07<00:00,  2.20it/s]

Epoch 11/15, Pérdida Media de Entrenamiento: 0.0397



Epoch 12/15 (Train): 100%|██████████| 17/17 [00:08<00:00,  1.98it/s]

Epoch 12/15, Pérdida Media de Entrenamiento: 0.0399



Epoch 13/15 (Train): 100%|██████████| 17/17 [00:08<00:00,  1.95it/s]

Epoch 13/15, Pérdida Media de Entrenamiento: 0.0406



Epoch 14/15 (Train): 100%|██████████| 17/17 [00:07<00:00,  2.22it/s]

Epoch 14/15, Pérdida Media de Entrenamiento: 0.0404



Epoch 15/15 (Train): 100%|██████████| 17/17 [00:08<00:00,  1.96it/s]

Epoch 15/15, Pérdida Media de Entrenamiento: 0.0399
Gráfica de pérdida de entrenamiento guardada en: reports/vit_mae/plots/training_loss_grid.png

Evaluando en el conjunto de test para la categoría: grid (puntuación por instancia)...



Evaluando Instancias: 100%|██████████| 78/78 [00:02<00:00, 27.51it/s]


Resultados para la categoría: grid
ROC AUC a Nivel de Imagen: 0.9081





Gráfica de curva ROC guardada en: reports/vit_mae/plots/roc_curve_grid.png
Modelo fine-tuned guardado en reports/vit_mae/trained_models/vit_mae_finetuned_grid.pth

Usando dispositivo: cuda


Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`.



Iniciando fine-tuning para la categoría: wood...


Epoch 1/15 (Train): 100%|██████████| 16/16 [00:13<00:00,  1.14it/s]

Epoch 1/15, Pérdida Media de Entrenamiento: 0.0226



Epoch 2/15 (Train): 100%|██████████| 16/16 [00:13<00:00,  1.18it/s]

Epoch 2/15, Pérdida Media de Entrenamiento: 0.0226



Epoch 3/15 (Train): 100%|██████████| 16/16 [00:13<00:00,  1.21it/s]

Epoch 3/15, Pérdida Media de Entrenamiento: 0.0229



Epoch 4/15 (Train): 100%|██████████| 16/16 [00:13<00:00,  1.23it/s]

Epoch 4/15, Pérdida Media de Entrenamiento: 0.0223



Epoch 5/15 (Train): 100%|██████████| 16/16 [00:12<00:00,  1.24it/s]

Epoch 5/15, Pérdida Media de Entrenamiento: 0.0216



Epoch 6/15 (Train): 100%|██████████| 16/16 [00:13<00:00,  1.22it/s]

Epoch 6/15, Pérdida Media de Entrenamiento: 0.0224



Epoch 7/15 (Train): 100%|██████████| 16/16 [00:13<00:00,  1.21it/s]

Epoch 7/15, Pérdida Media de Entrenamiento: 0.0222



Epoch 8/15 (Train): 100%|██████████| 16/16 [00:13<00:00,  1.21it/s]

Epoch 8/15, Pérdida Media de Entrenamiento: 0.0224



Epoch 9/15 (Train): 100%|██████████| 16/16 [00:13<00:00,  1.22it/s]

Epoch 9/15, Pérdida Media de Entrenamiento: 0.0227



Epoch 10/15 (Train): 100%|██████████| 16/16 [00:12<00:00,  1.24it/s]

Epoch 10/15, Pérdida Media de Entrenamiento: 0.0223



Epoch 11/15 (Train): 100%|██████████| 16/16 [00:13<00:00,  1.22it/s]

Epoch 11/15, Pérdida Media de Entrenamiento: 0.0221



Epoch 12/15 (Train): 100%|██████████| 16/16 [00:12<00:00,  1.24it/s]

Epoch 12/15, Pérdida Media de Entrenamiento: 0.0220



Epoch 13/15 (Train): 100%|██████████| 16/16 [00:12<00:00,  1.24it/s]

Epoch 13/15, Pérdida Media de Entrenamiento: 0.0217



Epoch 14/15 (Train): 100%|██████████| 16/16 [00:13<00:00,  1.23it/s]

Epoch 14/15, Pérdida Media de Entrenamiento: 0.0223



Epoch 15/15 (Train): 100%|██████████| 16/16 [00:12<00:00,  1.23it/s]

Epoch 15/15, Pérdida Media de Entrenamiento: 0.0218
Gráfica de pérdida de entrenamiento guardada en: reports/vit_mae/plots/training_loss_wood.png

Evaluando en el conjunto de test para la categoría: wood (puntuación por instancia)...



Evaluando Instancias: 100%|██████████| 79/79 [00:04<00:00, 18.01it/s]


Resultados para la categoría: wood
ROC AUC a Nivel de Imagen: 0.8842





Gráfica de curva ROC guardada en: reports/vit_mae/plots/roc_curve_wood.png
Modelo fine-tuned guardado en reports/vit_mae/trained_models/vit_mae_finetuned_wood.pth

Usando dispositivo: cuda


Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`.



Iniciando fine-tuning para la categoría: leather...


Epoch 1/15 (Train): 100%|██████████| 16/16 [00:11<00:00,  1.42it/s]

Epoch 1/15, Pérdida Media de Entrenamiento: 0.0099



Epoch 2/15 (Train): 100%|██████████| 16/16 [00:10<00:00,  1.46it/s]

Epoch 2/15, Pérdida Media de Entrenamiento: 0.0097



Epoch 3/15 (Train): 100%|██████████| 16/16 [00:11<00:00,  1.40it/s]

Epoch 3/15, Pérdida Media de Entrenamiento: 0.0098



Epoch 4/15 (Train): 100%|██████████| 16/16 [00:11<00:00,  1.41it/s]

Epoch 4/15, Pérdida Media de Entrenamiento: 0.0096



Epoch 5/15 (Train): 100%|██████████| 16/16 [00:11<00:00,  1.36it/s]

Epoch 5/15, Pérdida Media de Entrenamiento: 0.0098



Epoch 6/15 (Train): 100%|██████████| 16/16 [00:11<00:00,  1.41it/s]

Epoch 6/15, Pérdida Media de Entrenamiento: 0.0094



Epoch 7/15 (Train): 100%|██████████| 16/16 [00:11<00:00,  1.40it/s]

Epoch 7/15, Pérdida Media de Entrenamiento: 0.0096



Epoch 8/15 (Train): 100%|██████████| 16/16 [00:11<00:00,  1.41it/s]

Epoch 8/15, Pérdida Media de Entrenamiento: 0.0094



Epoch 9/15 (Train): 100%|██████████| 16/16 [00:10<00:00,  1.48it/s]

Epoch 9/15, Pérdida Media de Entrenamiento: 0.0095



Epoch 10/15 (Train): 100%|██████████| 16/16 [00:11<00:00,  1.45it/s]

Epoch 10/15, Pérdida Media de Entrenamiento: 0.0096



Epoch 11/15 (Train): 100%|██████████| 16/16 [00:11<00:00,  1.39it/s]

Epoch 11/15, Pérdida Media de Entrenamiento: 0.0094



Epoch 12/15 (Train): 100%|██████████| 16/16 [00:11<00:00,  1.39it/s]

Epoch 12/15, Pérdida Media de Entrenamiento: 0.0097



Epoch 13/15 (Train): 100%|██████████| 16/16 [00:11<00:00,  1.36it/s]

Epoch 13/15, Pérdida Media de Entrenamiento: 0.0094



Epoch 14/15 (Train): 100%|██████████| 16/16 [00:11<00:00,  1.39it/s]

Epoch 14/15, Pérdida Media de Entrenamiento: 0.0095



Epoch 15/15 (Train): 100%|██████████| 16/16 [00:11<00:00,  1.38it/s]

Epoch 15/15, Pérdida Media de Entrenamiento: 0.0095





Gráfica de pérdida de entrenamiento guardada en: reports/vit_mae/plots/training_loss_leather.png

Evaluando en el conjunto de test para la categoría: leather (puntuación por instancia)...


Evaluando Instancias: 100%|██████████| 124/124 [00:06<00:00, 18.75it/s]


Resultados para la categoría: leather
ROC AUC a Nivel de Imagen: 0.4314





Gráfica de curva ROC guardada en: reports/vit_mae/plots/roc_curve_leather.png
Modelo fine-tuned guardado en reports/vit_mae/trained_models/vit_mae_finetuned_leather.pth


{'='*30} RESUMEN FINAL DE ROC AUC POR CATEGORÍA {'='*30}
Categoría: bottle          | ROC AUC: 0.7913
Categoría: carpet          | ROC AUC: 0.4872
Categoría: grid            | ROC AUC: 0.9081
Categoría: wood            | ROC AUC: 0.8842
Categoría: leather         | ROC AUC: 0.4314

ROC AUC Promedio (sobre categorías exitosas): 0.7004
