In [1]:
import os
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
from torchvision import transforms
from PIL import Image
import timm
from sklearn.metrics import roc_auc_score, accuracy_score, f1_score, precision_score, recall_score, balanced_accuracy_score, confusion_matrix
import time
import numpy as np
from collections import Counter
from isic_class import ISICDataset
import traceback

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Paths y nombres
DATA_PATH = "winner_data/"
METADATA_PATH = os.path.join(DATA_PATH, "metadata/")

IMAGES_PATH = os.path.join(DATA_PATH, "images/")
SYNTHETIC_DATA_PATH = os.path.join("winner_data/")

ORIGINAL_IMAGES_PATH = os.path.join(IMAGES_PATH, "original/")
SYNTHETIC_IMAGES_PATH = os.path.join(IMAGES_PATH, "synthetic/")

# Nombres csv con datos
TRAIN_ORIGINAL_DF_FILENAME = "train_original.csv"
VAL_DF_FILENAME = "val.csv"
TEST_DF_FILENAME = "test.csv"
SYNTHETIC_DF_FILENAME = "synthetic.csv"

TRAIN_ORIGINAL_DF_PATH = os.path.join(METADATA_PATH, TRAIN_ORIGINAL_DF_FILENAME)
VAL_DF_PATH = os.path.join(METADATA_PATH, VAL_DF_FILENAME)
SYNTHETIC_DF_PATH = os.path.join(METADATA_PATH, SYNTHETIC_DF_FILENAME)

# Modelos
MODELS_PATH = "models/"
EXPERIMENT_NAME = "edgenext_last_layer/"
MODEL_SAVE_PATH = os.path.join(MODELS_PATH, EXPERIMENT_NAME)

In [3]:
# Propiedades data
IMAGE_ID_COL = "isic_id"
IMAGE_PATH_COL = "image_path"
TARGET_COL = "target"

In [4]:
# Parametros modelo
MODEL_NAME = 'edgenext_base.in21k_ft_in1k'
NUM_CLASSES = 2

In [21]:
# Hiperparametros
BATCH_SIZE = 32
LEARNING_RATE = 1e-4
MAX_EPOCHS = 30
EARLY_STOPPING_PATIENCE = 5
WEIGHT_DECAY = 0.01  

In [22]:
# Configurar GPU
if torch.cuda.is_available():
    DEVICE = torch.device("cuda")
    print("GPU")
else:
    DEVICE = torch.device("cpu")
    print("CPU")

GPU


In [23]:
# Hilos Dataloader
if os.cpu_count() > 14:
    NUM_WORKERS = 14
else:
    NUM_WORKERS = 0

## 2. Transformaciones imagenes

In [24]:
# Obtener data de entrenamiento modelo preentrenado HuggingFace
model_cfg = timm.get_pretrained_cfg(MODEL_NAME)
IMG_SIZE = model_cfg.input_size[1]
NORM_MEAN = model_cfg.mean
NORM_STD = model_cfg.std

In [25]:
# Transformaciones para evitar overfitting en train
train_transforms = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomVerticalFlip(p=0.5),
    transforms.RandomRotation(degrees=15),
    transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.05),
    transforms.ToTensor(),
    transforms.Normalize(mean=NORM_MEAN, std=NORM_STD),
])

val_transforms = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize(mean=NORM_MEAN, std=NORM_STD),
])

## 3. Cargar datasets y dataloaders

In [26]:
# Cargar dataframes
train_original_df = pd.read_csv(TRAIN_ORIGINAL_DF_PATH)
val_df = pd.read_csv(VAL_DF_PATH)
synthetic_df = pd.read_csv(SYNTHETIC_DF_PATH)

In [27]:
train_original_df.head(5)

Unnamed: 0,isic_id,target,image_path
0,ISIC_4257482,0,winner_data/images/original/ISIC_4257482.jpg
1,ISIC_8604888,0,winner_data/images/original/ISIC_8604888.jpg
2,ISIC_0209050,0,winner_data/images/original/ISIC_0209050.jpg
3,ISIC_8099764,0,winner_data/images/original/ISIC_8099764.jpg
4,ISIC_6615693,0,winner_data/images/original/ISIC_6615693.jpg


In [28]:
val_df.head(5)

Unnamed: 0,isic_id,target,image_path
0,ISIC_4260773,0,winner_data/images/original/ISIC_4260773.jpg
1,ISIC_9697201,0,winner_data/images/original/ISIC_9697201.jpg
2,ISIC_8301065,0,winner_data/images/original/ISIC_8301065.jpg
3,ISIC_3753800,0,winner_data/images/original/ISIC_3753800.jpg
4,ISIC_0690097,0,winner_data/images/original/ISIC_0690097.jpg


In [29]:
synthetic_df.head(5)

Unnamed: 0,target,image_path
0,1,winner_data/images/synthetic/0/lr\00047c6d-ca1...
1,1,winner_data/images/synthetic/0/lr\00808b91-e14...
2,1,winner_data/images/synthetic/0/lr\008dcaf9-71b...
3,1,winner_data/images/synthetic/0/lr\009d9c71-b0d...
4,1,winner_data/images/synthetic/0/lr\00b846be-652...


## 4. Metrica custom

In [30]:
# Metrica competencia
def custom_metric(estimator, X, y_true):
    y_hat = estimator.predict_proba(X)[:, 1]
    min_tpr = 0.80
    max_fpr = abs(1 - min_tpr)
    
    v_gt = abs(y_true - 1)
    v_pred = np.array([1.0 - x for x in y_hat])
    
    partial_auc_scaled = roc_auc_score(v_gt, v_pred, max_fpr=max_fpr)
    partial_auc = 0.5 * max_fpr**2 + (max_fpr - 0.5 * max_fpr**2) / (1.0 - 0.5) * (partial_auc_scaled - 0.5)
    
    return partial_auc

In [31]:
def calculate_custom_partial_auc(y_true, y_pred_proba_positive_class):
    min_tpr_threshold = 0.80
    max_fpr_threshold = abs(1 - min_tpr_threshold)

    v_gt = np.abs(y_true - 1) 
    v_pred_proba_negative_class = 1.0 - y_pred_proba_positive_class
    
    partial_auc_scaled = roc_auc_score(v_gt, v_pred_proba_negative_class, max_fpr=max_fpr_threshold)
    custom_scaled_auc = partial_auc_scaled
    true_partial_auc = custom_scaled_auc * (max_fpr_threshold - 0.5 * max_fpr_threshold**2) + 0.5 * max_fpr_threshold**2
    
    return true_partial_auc

## 4. Funciones para entrenamiento y metricas por epoca

In [32]:
def train_one_epoch(model, train_loader, criterion, optimizer, device, epoch_num, num_epochs, num_classes):
    model.train()
    
    running_loss = 0.0
    all_labels_list = []
    all_preds_proba_list = []

    start_time = time.time()
    
    for batch_idx, (images, labels) in enumerate(train_loader):
        images = images.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * images.size(0)
        
        # Guardar metricas y labels de epoca
        all_labels_list.extend(labels.detach().cpu().numpy())
        probs = torch.softmax(outputs, dim=1)[:, 1].detach().cpu().numpy()
        all_preds_proba_list.extend(probs)

        if (batch_idx + 1) % 50 == 0:
            print(f"  Epoch [{epoch_num+1}/{num_epochs}] Batch [{batch_idx+1}/{len(train_loader)}] Train Loss: {loss.item():.4f}")

    epoch_loss = running_loss / len(train_loader.dataset)
    
    # Convertir lista a numpy array para sklearn
    all_labels_np = np.array(all_labels_list)
    all_preds_proba_np = np.array(all_preds_proba_list)
    
    # Calculo de metricas para la epoca
    predicted_classes_np = (all_preds_proba_np >= 0.5).astype(int)
    epoch_auc = roc_auc_score(all_labels_np, all_preds_proba_np)
    epoch_f1 = f1_score(all_labels_np, predicted_classes_np, pos_label=1, zero_division=0)
    epoch_recall = recall_score(all_labels_np, predicted_classes_np, pos_label=1, zero_division=0)
    epoch_precision = precision_score(all_labels_np, predicted_classes_np, pos_label=1, zero_division=0)
    epoch_custom_partial_auc = calculate_custom_partial_auc(all_labels_np, all_preds_proba_np)
    epoch_balanced_acc = balanced_accuracy_score(all_labels_np, predicted_classes_np)
    
    end_time = time.time()
    epoch_duration = end_time - start_time
    
    print(f"Epoch [{epoch_num+1}/{num_epochs}] Train Loss: {epoch_loss:.4f}, AUC: {epoch_auc:.4f}, PrAUC : {epoch_custom_partial_auc:.4f}, BalAcc: {epoch_balanced_acc:.4f}, F1: {epoch_f1:.4f}, Recall: {epoch_recall:.4f}, Precision: {epoch_precision:.4f}, Time: {epoch_duration:.2f}s")
    
    metrics = {
        'loss': epoch_loss,
        'auc': epoch_auc,
        'partial_auc': epoch_custom_partial_auc,
        'balanced_accuracy': epoch_balanced_acc,
        'f1_score': epoch_f1,
        'recall': epoch_recall,
        'precision': epoch_precision
    }
    return metrics

In [33]:
def validate_one_epoch(model, val_loader, criterion, device, epoch_num, num_epochs, num_classes):
    model.eval()
    
    running_loss = 0.0
    all_labels_list = []
    all_preds_proba_list = []

    start_time = time.time()

    with torch.no_grad():
        for batch_idx, (images, labels) in enumerate(val_loader):
            images = images.to(device)
            labels = labels.to(device)

            outputs = model(images)
            loss = criterion(outputs, labels)

            running_loss += loss.item() * images.size(0)
            
            all_labels_list.extend(labels.detach().cpu().numpy())
            probs = torch.softmax(outputs, dim=1)[:, 1].detach().cpu().numpy()
            all_preds_proba_list.extend(probs)

    epoch_loss = running_loss / len(val_loader.dataset)
    
    all_labels_np = np.array(all_labels_list)
    all_preds_proba_np = np.array(all_preds_proba_list)

    if len(np.unique(all_labels_np)) < 2 :
        print(f"Warning: Conjunto de validacion para epoca {epoch_num+1} le falta una clase!.")
        epoch_auc = 0.5
    else:
        epoch_auc = roc_auc_score(all_labels_np, all_preds_proba_np)
    
    predicted_classes_np = (all_preds_proba_np >= 0.5).astype(int)
    epoch_f1 = f1_score(all_labels_np, predicted_classes_np, pos_label=1, zero_division=0)
    epoch_recall = recall_score(all_labels_np, predicted_classes_np, pos_label=1, zero_division=0)
    epoch_precision = precision_score(all_labels_np, predicted_classes_np, pos_label=1, zero_division=0)
    conf_matrix = confusion_matrix(all_labels_np, predicted_classes_np, labels=[0,1])
    epoch_custom_partial_auc = calculate_custom_partial_auc(all_labels_np, all_preds_proba_np)
    epoch_balanced_acc = balanced_accuracy_score(all_labels_np, predicted_classes_np)

    end_time = time.time()
    epoch_duration = end_time - start_time

    print(f"Epoch [{epoch_num+1}/{num_epochs}] Val Loss: {epoch_loss:.4f}, AUC: {epoch_auc:.4f}, PrAUC: {epoch_custom_partial_auc:4f}, BalAcc: {epoch_balanced_acc:.4f}, F1(pos): {epoch_f1:.4f}, Recall(pos): {epoch_recall:.4f}, Precision(pos): {epoch_precision:.4f}, Time: {epoch_duration:.2f}s")
    if conf_matrix is not None:
        print(f"Matriz de confusion para epoca {epoch_num+1}:\n{conf_matrix}")
    
    metrics = {
        'loss': epoch_loss,
        'auc': epoch_auc,
        'partial_auc': epoch_custom_partial_auc,
        'balanced_accuracy': epoch_balanced_acc,
        'f1_score': epoch_f1,
        'recall': epoch_recall,
        'precision': epoch_precision,
        'conf_matrix': conf_matrix
    }
    return metrics

In [34]:
# Funcion para contener entrenamiento debido a NUM_WORKERS > 0
def start_training():
    # Combinar imagenes sinteticas y originales
    required_cols = [IMAGE_PATH_COL, TARGET_COL]
    train_df_combined = pd.concat(
        [train_original_df[required_cols], synthetic_df[required_cols]],
        ignore_index=True
    )

    # Crear dataset entrenamiento
    train_dataset = ISICDataset(
        dataframe=train_df_combined,
        image_path_col=IMAGE_PATH_COL,
        target_col=TARGET_COL,
        transforms=train_transforms
    )
    
    # Crear dataset validacion
    val_dataset = ISICDataset(
        dataframe=val_df,
        image_path_col=IMAGE_PATH_COL,
        target_col=TARGET_COL,
        transforms=val_transforms
    )

    # Cargar modelo preentrenado
    model = timm.create_model(
        MODEL_NAME,
        pretrained=True,
        num_classes=NUM_CLASSES
    )
    model = model.to(DEVICE)

    """
    # Descongelar capas
    for param in model.parameters():
        param.requires_grad = True
    """
    # Congelar capas
    for param in model.parameters():
        param.requires_grad = False
    # Descongelar ultima capa
    for param in model.head.fc.parameters():
        param.requires_grad = True
    # Pesos para clases en loss y randomsampler
    class_counts = train_df_combined[TARGET_COL].value_counts().sort_index()
    weight_class_0 = 1.0 / (class_counts.get(0, 1e-9))
    weight_class_1 = 1.0 / (class_counts.get(1, 1e-9))
    weight_sum = weight_class_0 + weight_class_1

    # Funcion loss
    loss_weight_class_0 = weight_class_0 / weight_sum
    loss_weight_class_1 = weight_class_1 / weight_sum
    loss_weights = torch.tensor([loss_weight_class_0, loss_weight_class_1], dtype=torch.float).to(DEVICE)
    criterion =  nn.CrossEntropyLoss(weight=loss_weights)
    optimizer = optim.AdamW(
        filter(lambda p: p.requires_grad, model.parameters()),
        lr=LEARNING_RATE,
        weight_decay=WEIGHT_DECAY
    )

    # Pesos para contrarrestar desbalance. Hacemos oversampling
    print(f"--- Creando WeightedRandomSampler ---")
    weights_train_list = [weight_class_0 if t == 0 else weight_class_1 for t in train_df_combined[TARGET_COL]]
    weights_train = torch.DoubleTensor(weights_train_list)
    train_sampler = WeightedRandomSampler(
        weights=weights_train,
        num_samples=len(weights_train),
        replacement=True
    )

    # Dataloaders
    print(f"--- Creando DataLoaders ---")
    train_loader = DataLoader(
        train_dataset, batch_size=BATCH_SIZE, sampler=train_sampler,
        num_workers=NUM_WORKERS, pin_memory=True if DEVICE.type == 'cuda' else False, drop_last=True
    )
    val_loader = DataLoader(
        val_dataset, batch_size=BATCH_SIZE * 2, shuffle=False,
        num_workers=NUM_WORKERS, pin_memory=True if DEVICE.type == 'cuda' else False
    )

    # Loop entrenamiento
    best_val_auc = 0.0
    epochs_no_improve = 0
    history = {
        'train_loss': [],
        'train_auc': [],
        'train_pr_auc': [],
        'train_bal_acc': [],
        'train_f1': [],
        'train_precision': [],
        'train_recall': [],
        'val_loss': [],
        'val_auc': [],
        'val_pr_auc': [],
        'val_bal_acc': [],
        'val_f1': [],
        'val_precision': [],
        'val_recall': [],
    }
    BEST_MODEL_FULL_PATH = os.path.join(MODEL_SAVE_PATH, "edgenext_best.pth")

    print(f"\n --- Comenzado loop entrenamiento por {MAX_EPOCHS} epochs ---")
    print(f"Mejor modelo se guardara como: {BEST_MODEL_FULL_PATH}")

    for epoch in range(MAX_EPOCHS):
        epoch_start_time = time.time()
        print(f"\n===== Epoch {epoch+1}/{MAX_EPOCHS} =====")
        print(f"Learning rate: {optimizer.param_groups[0]['lr']:.6e}")

        # Guardado de metricas
        train_metrics = train_one_epoch(model, train_loader, criterion, optimizer, DEVICE, epoch, MAX_EPOCHS, NUM_CLASSES)
        history['train_loss'].append(train_metrics['loss'])
        history['train_auc'].append(train_metrics['auc'])
        history['train_pr_auc'].append(train_metrics['partial_auc'])
        history['train_bal_acc'].append(train_metrics['balanced_accuracy'])
        history['train_f1'].append(train_metrics['f1_score'])
        history['train_precision'].append(train_metrics['precision'])
        history['train_recall'].append(train_metrics['recall'])

        val_metrics = validate_one_epoch(model, val_loader, criterion, DEVICE, epoch, MAX_EPOCHS, NUM_CLASSES)
        history['val_loss'].append(val_metrics['loss'])
        history['val_auc'].append(val_metrics['auc'])
        history['val_pr_auc'].append(val_metrics['partial_auc'])
        history['val_bal_acc'].append(val_metrics['balanced_accuracy'])
        history['val_f1'].append(val_metrics['f1_score'])
        history['val_precision'].append(val_metrics['precision'])
        history['val_recall'].append(val_metrics['recall'])
        
        current_val_auc = val_metrics['partial_auc']
        if current_val_auc > best_val_auc:
            print(f"Val PrAUC mejoro ({best_val_auc:.4f} --> {current_val_auc:.4f}). Guardando modelo")
            best_val_auc = current_val_auc
            torch.save(model.state_dict(), BEST_MODEL_FULL_PATH)
            epochs_no_improve = 0
        else:
            epochs_no_improve += 1
            print(f"Val AUC ({current_val_auc:.4f}) no mejoro desde la mejor ({best_val_auc:.4f}). Ninguna mejora durante {epochs_no_improve} epochs.")

        if epochs_no_improve >= EARLY_STOPPING_PATIENCE:
            print(f"Early stopping {EARLY_STOPPING_PATIENCE} epochs sin mejora")
            break
        
        print(f"Duracion epoca {epoch+1}: {time.time() - epoch_start_time:.2f}s")

    print(f"\n --- Entrenamiento finalizado ---")
    print(f"Mejor Val PrAUC: {best_val_auc:.4f}")
    print(f"Mejor modelo guardado en: {BEST_MODEL_FULL_PATH}")

    history_df = pd.DataFrame(history)
    history_csv_path = os.path.join(MODEL_SAVE_PATH, "training_history.csv")
    history_df.to_csv(history_csv_path, index=False)
    print(f"Historial de entrenamiento en: {history_csv_path}")

## 5. Entrenamiento

In [35]:
import traceback
if __name__ == '__main__':
    print(f"Empezando entrenamiento con NUM_WORKERS = {NUM_WORKERS}")
    start_training()

Empezando entrenamiento con NUM_WORKERS = 14
--- Creando WeightedRandomSampler ---
--- Creando DataLoaders ---

 --- Comenzado loop entrenamiento por 30 epochs ---
Mejor modelo se guardara como: models/edgenext_last_layer/edgenext_best.pth

===== Epoch 1/30 =====
Learning rate: 1.000000e-04
  Epoch [1/30] Batch [50/9467] Train Loss: 0.3627
  Epoch [1/30] Batch [100/9467] Train Loss: 0.2684
  Epoch [1/30] Batch [150/9467] Train Loss: 0.1854
  Epoch [1/30] Batch [200/9467] Train Loss: 0.2518
  Epoch [1/30] Batch [250/9467] Train Loss: 0.2246
  Epoch [1/30] Batch [300/9467] Train Loss: 0.2097
  Epoch [1/30] Batch [350/9467] Train Loss: 0.1995
  Epoch [1/30] Batch [400/9467] Train Loss: 0.1429
  Epoch [1/30] Batch [450/9467] Train Loss: 0.3667
  Epoch [1/30] Batch [500/9467] Train Loss: 0.3118
  Epoch [1/30] Batch [550/9467] Train Loss: 0.2731
  Epoch [1/30] Batch [600/9467] Train Loss: 0.1279
  Epoch [1/30] Batch [650/9467] Train Loss: 0.1465
  Epoch [1/30] Batch [700/9467] Train Loss: 0.