## Carga de datos

In [20]:
from pathlib import Path                
import numpy as np                       
from sklearn.model_selection import train_test_split  
import torch
from time import time
from sklearn.metrics import classification_report, confusion_matrix
from torch.utils.data import DataLoader, Subset
from torchvision import datasets, transforms         

DATA_DIR = Path("data/fold1")            
# transformaciones para entrenamiento
IMG_SIZE = 224                           # tamaño al que se llevan las imágenes
train_tfms = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),    # redimensionar
    transforms.ToTensor(),                      # pasar a tensor
    transforms.Normalize(                       # normalizar colores (como ImageNet)
        mean=[0.485,0.456,0.406],             
        std =[0.229,0.224,0.225]
    ),
])

# transformaciones para validación y test (sin aleatoriedad)
eval_tfms = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),    
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225]),
])

# cargar dataset y asignar clases a índices
full_train = datasets.ImageFolder(root=str(DATA_DIR), transform=train_tfms)
class_to_idx = full_train.class_to_idx            
print("Clases -> idx:", class_to_idx)            

# dividir en 70% train, 15% val, 15% test 
N = len(full_train)                             
targets = np.array([y for _, y in full_train.samples])  

idx_all = np.arange(N)                          
train_idx, tmp_idx = train_test_split(
    idx_all, test_size=0.30, random_state=42, stratify=targets
)
val_idx, test_idx = train_test_split(
    tmp_idx, test_size=0.50, random_state=42, stratify=targets[tmp_idx]
)

# datasets finales 
train_ds = Subset(full_train, train_idx)          
eval_base = datasets.ImageFolder(root=str(DATA_DIR), transform=eval_tfms)
val_ds   = Subset(eval_base,  val_idx)
test_ds  = Subset(eval_base,  test_idx)

# dataloaders 
BATCH_SIZE = 32
train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True,  num_workers=0, pin_memory=True)
val_loader   = DataLoader(val_ds,   batch_size=BATCH_SIZE, shuffle=False, num_workers=0, pin_memory=True)
test_loader  = DataLoader(test_ds,  batch_size=BATCH_SIZE, shuffle=False, num_workers=0, pin_memory=True)

print(f"Train: {len(train_ds)} | Val: {len(val_ds)} | Test: {len(test_ds)}")
print("Clases:", list(class_to_idx.keys()))




Clases -> idx: {'aca_bd': 0, 'aca_md': 1, 'aca_pd': 2, 'nor': 3, 'scc_bd': 4, 'scc_md': 5, 'scc_pd': 6}
Train: 242 | Val: 52 | Test: 52
Clases: ['aca_bd', 'aca_md', 'aca_pd', 'nor', 'scc_bd', 'scc_md', 'scc_pd']


## Entrenamiento y validación

In [21]:
def fit(model, train_loader, val_loader, optimizer, scheduler=None, epochs=10, patience=3, device="cpu"):
    
    best_val = np.inf              
    best_state = None             
    no_improve = 0                
    history = []                   

    t0 = time()                    # medir tiempo de entrenamiento
    for epoch in range(1, epochs+1):
        # entrenar una epoca 
        tr_loss, tr_acc = run_epoch(model, train_loader, optimizer, device)
        # evaluar en validacion
        val_loss, val_acc = run_epoch(model, val_loader, optimizer=None, device=device)

        # guardar resultados en historial
        history.append((tr_loss, tr_acc, val_loss, val_acc))
        print(f"[{epoch:02d}] train_loss={tr_loss:.4f} acc={tr_acc:.3f} | "
              f"val_loss={val_loss:.4f} acc={val_acc:.3f}")

        # guardar mejor modelo
        if val_loss < best_val - 1e-4:            
            best_val = val_loss
            best_state = {k: v.cpu().clone() for k, v in model.state_dict().items()}
            no_improve = 0
        else:                                   
            no_improve += 1
            if no_improve >= patience:           
                print("Early stopping.")
                break

    print(f"Tiempo total: {time()-t0:.1f}s")
    if best_state is not None:
        model.load_state_dict(best_state)       
    return history

# scheduler = baja el learning rate si la pérdida de validación no mejora
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, mode="min", factor=0.5, patience=1
)

history = fit(model, train_loader, val_loader, optimizer, scheduler, epochs=10, patience=3, device=device)




[01] train_loss=0.1411 acc=0.950 | val_loss=1.1696 acc=0.673
[02] train_loss=0.0479 acc=0.992 | val_loss=1.1858 acc=0.692
[03] train_loss=0.0511 acc=0.992 | val_loss=1.1694 acc=0.673
[04] train_loss=0.0393 acc=0.996 | val_loss=1.1829 acc=0.673
[05] train_loss=0.0256 acc=0.996 | val_loss=1.1426 acc=0.712
[06] train_loss=0.0344 acc=0.992 | val_loss=1.1582 acc=0.712
[07] train_loss=0.0180 acc=1.000 | val_loss=1.3628 acc=0.712
[08] train_loss=0.0273 acc=0.996 | val_loss=1.2640 acc=0.712
Early stopping.
Tiempo total: 294.0s


## Evaluar el modelo

In [23]:
def evaluate_on_loader(model, loader, class_names, device="cpu"):
    model.eval()             
    all_preds, all_true = [], []
    for xb, yb in loader: 
        xb = xb.to(device)
        logits = model(xb)                    
        preds = logits.argmax(dim=1).cpu().numpy()
        all_preds.append(preds)
        all_true.append(yb.numpy())
    y_pred = np.concatenate(all_preds)
    y_true = np.concatenate(all_true)

    print("Test Accuracy:", (y_pred == y_true).mean())  # exactitud
    print("\nReporte:\n", classification_report(y_true, y_pred, target_names=class_names))
    print("\nMatriz de confusión:\n", confusion_matrix(y_true, y_pred))
    return y_true, y_pred

# nombres de las clases 
try:
    class_names = train_loader.dataset.dataset.classes
except:
    class_names = getattr(train_loader.dataset, 'classes', [str(i) for i in range(num_classes)])

# correr evaluación en test
y_true_test, y_pred_test = evaluate_on_loader(model, test_loader, class_names, device=device)



Test Accuracy: 0.8269230769230769

Reporte:
               precision    recall  f1-score   support

      aca_bd       1.00      0.75      0.86         8
      aca_md       1.00      0.88      0.93         8
      aca_pd       1.00      0.50      0.67         6
         nor       0.80      1.00      0.89        12
      scc_bd       0.86      1.00      0.92         6
      scc_md       0.40      0.50      0.44         4
      scc_pd       0.78      0.88      0.82         8

    accuracy                           0.83        52
   macro avg       0.83      0.79      0.79        52
weighted avg       0.86      0.83      0.82        52


Matriz de confusión:
 [[ 6  0  0  2  0  0  0]
 [ 0  7  0  1  0  0  0]
 [ 0  0  3  0  0  3  0]
 [ 0  0  0 12  0  0  0]
 [ 0  0  0  0  6  0  0]
 [ 0  0  0  0  0  2  2]
 [ 0  0  0  0  1  0  7]]


El modelo alcanzó una exactitud general del 83 por ciento, mostrando un buen desempeño en clases como nor, scc_bd, aca_bd y aca_md, donde logró altos valores de precisión y recall. Sin embargo, se observaron dificultades en aca_pd y especialmente en scc_md, con f1-scores bajos debido a confusiones frecuentes: aca_pd fue confundido con scc_md, y scc_md con scc_pd. Esto indica que el modelo distingue bien la mayoría de los tejidos, pero aún tiene problemas para separar subtipos con características muy similares, lo que sugiere la necesidad de aplicar más técnicas de data augmentation, ajuste fino del modelo y balanceo de clases para mejorar su rendimiento en estas categorías.

