In [None]:
import torch

# Define the device (CPU or GPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


## **To train**

Model will ask for dfs, val dataloader, train dataloader images processed as 224*224and  normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]).



**dfs**

In [None]:
# prompt: montar disco de drive

from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [None]:
from google.colab import drive
import os

# load dfs
base_path = "/content/drive/MyDrive/Diagnovision/filtered_df"
csv_files = ["train_df.csv", "val_df.csv", "test_df.csv"]

# Verify if the files exist
for file in csv_files:
    path = os.path.join(base_path, file)
    if os.path.exists(path):
        print(f"{file} found.")
    else:
        print(f"⚠ ERROR: {file} Not found.")

train_df.csv found.
val_df.csv found.
test_df.csv found.


**tensor folder files**


*make sure the code to find the tensors or .pt, scans subfolders since .pt its going to be int the last root subfolder.

In [None]:
# Tensors (Preprocessed Images)
tensor_save_folder = "/content/drive/MyDrive/Diagnovision/preprocessed_tensors"

## **dataloaders**

In [None]:
import torch
from torch.utils.data import Dataset, DataLoader

# Cargar dataset de entrenamiento como memory-mapped tensor
train_tensor_path = "/content/drive/MyDrive/Diagnovision/filtered_df/tensor_df/train_tensor_df.pt"
all_data = torch.load(train_tensor_path, map_location="cpu")

class MemoryMappedDataset(Dataset):
    def __init__(self, data):
        self.data = data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx]

train_dataset = MemoryMappedDataset(all_data)
train_dataloader = DataLoader(train_dataset, batch_size=12, shuffle=True)

# Cargar dataset de validación como memory-mapped tensor
val_tensor_path = "/content/drive/MyDrive/Diagnovision/filtered_df/tensor_df/val_tensor_df.pt"
all_val_data = torch.load(val_tensor_path, map_location="cpu")

val_dataset = MemoryMappedDataset(all_val_data)
val_dataloader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=4)

print("✅ DataLoaders reconstruidos correctamente.")


  all_data = torch.load(train_tensor_path, map_location="cpu")
  all_val_data = torch.load(val_tensor_path, map_location="cpu")


✅ DataLoaders reconstruidos correctamente.


## **densenet 0303**

In [None]:
import torch
import torch.nn as nn
import torchvision.models as models
import torchvision.transforms as transforms
import torch.optim as optim
from torch.optim.lr_scheduler import CosineAnnealingLR
from tqdm import tqdm
import os
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import numpy as np
import pandas as pd
from torch.utils.data import DataLoader, Dataset

# ✅ Configurar dispositivo
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# ✅ Data Augmentation Mejorado
train_transform = transforms.Compose([
    transforms.RandomRotation(degrees=20),
    transforms.RandomHorizontalFlip(),
    transforms.RandomAffine(degrees=15, scale=(0.7, 1.3), shear=15),
    transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.3),
    transforms.RandomErasing(p=0.3, scale=(0.02, 0.2)),  # More aggressive
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

val_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# ✅ Cargar DenseNet121 preentrenado en ImageNet
model = models.densenet121(weights="IMAGENET1K_V1")
model.classifier = nn.Linear(model.classifier.in_features, 12, bias=True)
model.to(device)

# ✅ Apply Xavier Initialization (Only for Linear & Conv Layers)
for name, param in model.named_parameters():
    if 'weight' in name and len(param.shape) > 1:
        nn.init.xavier_uniform_(param)

# ✅ Load dataset
df_labels = pd.read_csv("/content/drive/MyDrive/Diagnovision/filtered_df/train_df.csv")

# ✅ Select only label columns
label_columns = df_labels.columns.difference(['path_to_image', 'clean_impression'])

# ✅ Convert to numeric and clip values (ensuring labels are between 0 and 1)
df_labels[label_columns] = df_labels[label_columns].apply(pd.to_numeric, errors='coerce')
df_labels[label_columns] = df_labels[label_columns].clip(0, 1)

# ✅ Apply balancing function
#df_labels = balance_dataset(df_labels)

# ✅ Recalculate `pos_weight`
label_counts = df_labels[label_columns].sum(axis=0).values.astype(float)
pos_weight = torch.tensor((1.0 / (label_counts + 1e-6)), dtype=torch.float32).clip(1,10).to(device)

# ✅ Define loss function with `pos_weight`
criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weight)

# ✅ Optimizer and Scheduler
optimizer = optim.AdamW(model.parameters(), lr=5e-5, weight_decay=0.01)
scheduler = CosineAnnealingLR(optimizer, T_max=10, eta_min=1e-6)

# ✅ Training parameters
num_epochs = 35
best_val_loss = float("inf")
best_threshold = 0.5
patience = 5
early_stop_counter = 0

# ✅ Load Checkpoint if available
checkpoint_path = "/content/drive/MyDrive/Diagnovision/modelo_img/best_models/checkpoint_densenet0303.pth"
if os.path.exists(checkpoint_path):
    checkpoint = torch.load(checkpoint_path)
    model.load_state_dict(checkpoint["model_state_dict"])
    optimizer.load_state_dict(checkpoint["optimizer_state_dict"])
    scheduler.load_state_dict(checkpoint["scheduler_state_dict"])
    epoch = checkpoint["epoch"]
    best_val_loss = checkpoint["best_val_loss"]
    best_threshold = checkpoint["best_threshold"]
    early_stop_counter = checkpoint["early_stop_counter"]
    print(f"Resuming training from epoch {epoch+1}...")
else:
    epoch = 0
    print("Starting training from scratch...")

# ✅ Training Loop
for epoch in range(epoch, num_epochs):
    model.train()
    train_loss = 0.0

    for images, labels in tqdm(train_dataloader, desc=f"Epoch {epoch+1}/{num_epochs} (Train)"):
        images, labels = images.to(device), labels.to(device)

        outputs = model(images)
        loss = criterion(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        optimizer.step()

        train_loss += loss.item()

    # ✅ Validation
    model.eval()
    val_loss = 0.0
    all_labels = []
    all_preds = []

    with torch.no_grad():
        for images, labels in tqdm(val_dataloader, desc=f"Epoch {epoch+1}/{num_epochs} (Val)"):
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            val_loss += loss.item()

            probs = torch.sigmoid(outputs)

            # ✅ Dynamic Thresholding
            if epoch % 5 == 0:
                best_threshold = np.linspace(0.1, 0.9, 9)[np.argmax([f1_score(labels.cpu().numpy(), probs.cpu().numpy() > t, average="micro", zero_division=0) for t in np.linspace(0.1, 0.9, 9)])]

            preds = probs > best_threshold
            all_labels.extend(labels.cpu().numpy())
            all_preds.extend(preds.cpu().numpy())

    # ✅ Print Metrics
    print(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss/len(train_dataloader):.6f}, Val Loss: {val_loss/len(val_dataloader):.6f}")

    scheduler.step()

    # ✅ Early Stopping
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        early_stop_counter = 0
        torch.save(model.state_dict(), "/content/drive/MyDrive/Diagnovision/modelo_img/best_models/densenet_best0303.pth")
    else:
        early_stop_counter += 1
        if early_stop_counter >= patience:
            print("Early stopping!")
            break

final_model_path = "/content/drive/MyDrive/Diagnovision/modelo_img/best_models/densenet_final0303.pth"
torch.save(model.state_dict(), final_model_path)
print(f"✅ Final model saved at: {final_model_path}")

Downloading: "https://download.pytorch.org/models/densenet121-a639ec97.pth" to /root/.cache/torch/hub/checkpoints/densenet121-a639ec97.pth
100%|██████████| 30.8M/30.8M [00:00<00:00, 213MB/s]


Starting training from scratch...


Epoch 1/35 (Train): 100%|██████████| 2838/2838 [03:42<00:00, 12.77it/s]
Epoch 1/35 (Val): 100%|██████████| 228/228 [00:19<00:00, 11.43it/s]


Epoch 1/35, Train Loss: 0.328366, Val Loss: 0.320500


Epoch 2/35 (Train): 100%|██████████| 2838/2838 [03:40<00:00, 12.87it/s]
Epoch 2/35 (Val): 100%|██████████| 228/228 [00:15<00:00, 15.07it/s]


Epoch 2/35, Train Loss: 0.320227, Val Loss: 0.320994


Epoch 3/35 (Train): 100%|██████████| 2838/2838 [03:42<00:00, 12.77it/s]
Epoch 3/35 (Val): 100%|██████████| 228/228 [00:15<00:00, 15.12it/s]


Epoch 3/35, Train Loss: 0.319877, Val Loss: 0.320101


Epoch 4/35 (Train): 100%|██████████| 2838/2838 [03:40<00:00, 12.85it/s]
Epoch 4/35 (Val): 100%|██████████| 228/228 [00:15<00:00, 15.03it/s]


Epoch 4/35, Train Loss: 0.319616, Val Loss: 0.320511


Epoch 5/35 (Train): 100%|██████████| 2838/2838 [03:40<00:00, 12.88it/s]
Epoch 5/35 (Val): 100%|██████████| 228/228 [00:15<00:00, 15.11it/s]


Epoch 5/35, Train Loss: 0.319412, Val Loss: 0.319973


Epoch 6/35 (Train): 100%|██████████| 2838/2838 [03:40<00:00, 12.86it/s]
Epoch 6/35 (Val): 100%|██████████| 228/228 [00:19<00:00, 11.65it/s]


Epoch 6/35, Train Loss: 0.319107, Val Loss: 0.320070


Epoch 7/35 (Train): 100%|██████████| 2838/2838 [03:41<00:00, 12.81it/s]
Epoch 7/35 (Val): 100%|██████████| 228/228 [00:15<00:00, 15.03it/s]


Epoch 7/35, Train Loss: 0.318794, Val Loss: 0.320036


Epoch 8/35 (Train): 100%|██████████| 2838/2838 [03:42<00:00, 12.74it/s]
Epoch 8/35 (Val): 100%|██████████| 228/228 [00:15<00:00, 15.00it/s]


Epoch 8/35, Train Loss: 0.318233, Val Loss: 0.320155


Epoch 9/35 (Train): 100%|██████████| 2838/2838 [03:43<00:00, 12.69it/s]
Epoch 9/35 (Val): 100%|██████████| 228/228 [00:15<00:00, 15.06it/s]


Epoch 9/35, Train Loss: 0.317660, Val Loss: 0.320168


Epoch 10/35 (Train): 100%|██████████| 2838/2838 [03:43<00:00, 12.69it/s]
Epoch 10/35 (Val): 100%|██████████| 228/228 [00:15<00:00, 14.94it/s]


Epoch 10/35, Train Loss: 0.317142, Val Loss: 0.320247
Early stopping!
✅ Final model saved at: /content/drive/MyDrive/Diagnovision/modelo_img/best_models/densenet_final0303.pth


**evaluacion densenet0303**

In [None]:
import torch
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from tqdm import tqdm

# Load the saved model
model_path = "/content/drive/MyDrive/Diagnovision/modelo_img/best_models/densenet_final0303.pth"

# 📌 Change to DenseNet121 to match the model used during training
model = models.densenet121(weights="IMAGENET1K_V1")
model.classifier = nn.Linear(model.classifier.in_features, 12)  # Adjust for 12 classes

# Load model weights
model.load_state_dict(torch.load(model_path, map_location=torch.device("cuda" if torch.cuda.is_available() else "cpu")))
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
model.eval()


# Evaluate on validation set
all_labels = []
all_preds = []
all_probs = []

with torch.no_grad():
    for images, labels in tqdm(val_dataloader, desc="Evaluating Model on Validation Set"):
        images, labels = images.to(device), labels.to(device)

        # Forward pass
        outputs = model(images)
        probs = torch.sigmoid(outputs)  # Convert logits to probabilities
        preds = (probs > 0.5).float()  # Convert to binary predictions

        all_labels.extend(labels.cpu().numpy())
        all_preds.extend(preds.cpu().numpy())
        all_probs.extend(probs.cpu().numpy())

# Convert lists to NumPy arrays
all_labels = np.array(all_labels)
all_preds = np.array(all_preds)

# Compute evaluation metrics
accuracy = accuracy_score(all_labels, all_preds)
precision_micro = precision_score(all_labels, all_preds, average="micro", zero_division=0)
recall_micro = recall_score(all_labels, all_preds, average="micro", zero_division=0)
f1_micro = f1_score(all_labels, all_preds, average="micro", zero_division=0)

# Print results
print("\n🔹 **FINAL VALIDATION METRICS** 🔹")
print(f"Validation Accuracy: {accuracy * 100:.2f}%")
print(f"Validation Precision (Micro): {precision_micro:.4f}")
print(f"Validation Recall (Micro): {recall_micro:.4f}")
print(f"Validation F1 Score (Micro): {f1_micro:.4f}")

  model.load_state_dict(torch.load(model_path, map_location=torch.device("cuda" if torch.cuda.is_available() else "cpu")))
Evaluating Model on Validation Set: 100%|██████████| 228/228 [00:15<00:00, 14.94it/s]


🔹 **FINAL VALIDATION METRICS** 🔹
Validation Accuracy: 21.41%
Validation Precision (Micro): 0.4586
Validation Recall (Micro): 0.0074
Validation F1 Score (Micro): 0.0147





**to eval in TEST**

**test dataloader**

In [None]:
import torch
from torch.utils.data import Dataset, DataLoader

# Cargar el dataset de test como tensor mapeado
test_tensor_path = "/content/drive/MyDrive/Diagnovision/filtered_df/tensor_df/test_tensor_df.pt"
all_test_data = torch.load(test_tensor_path, map_location="cpu")

# Crear una clase Dataset para el test
class MemoryMappedDataset(Dataset):
    def __init__(self, data):
        self.data = data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx]

# Crear el DataLoader de test
test_dataset = MemoryMappedDataset(all_test_data)
test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=4)

print("✅ DataLoader de test cargado correctamente")


  all_test_data = torch.load(test_tensor_path, map_location="cpu")


✅ DataLoader de test cargado correctamente


In [None]:
import torch
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from tqdm import tqdm
import torchvision.models as models
import torch.nn as nn

# ✅ Changed to a list to contain the model path
model_paths = ["/content/drive/MyDrive/Diagnovision/modelo_img/best_models/densenet_final0303.pth"]

# 📌 **Dispositivo (GPU si está disponible)**
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 📌 **Evaluar cada modelo en test**
for model_path in model_paths:
    print(f"\n🔹 Evaluating Model: {model_path}")

    # **Cargar modelo**
    # ✅ Changed to densenet121 to match the model used during training
    model = models.densenet121(weights="IMAGENET1K_V1")
    model.classifier = nn.Linear(model.classifier.in_features, 12)  # 12 clases

    # **Cargar pesos guardados**
    model.load_state_dict(torch.load(model_path, map_location=device))
    model.to(device)
    model.eval()

    # **Inicializar métricas**
    all_labels = []
    all_preds = []

    # **Evaluar en test**
    with torch.no_grad():
        for images, labels in tqdm(test_dataloader, desc="Evaluating Model on Test Set"):
            images, labels = images.to(device), labels.to(device)

            # **Predicciones**
            outputs = model(images)
            probs = torch.sigmoid(outputs)  # Logits a probabilidades
            preds = (probs > 0.5).float()  # Convertir a binario

            all_labels.extend(labels.cpu().numpy())
            all_preds.extend(preds.cpu().numpy())

    # **Convertir a NumPy**
    all_labels = np.array(all_labels)
    all_preds = np.array(all_preds)

    # **Calcular métricas**
    accuracy = accuracy_score(all_labels, all_preds)
    precision_micro = precision_score(all_labels, all_preds, average="micro", zero_division=0)
    recall_micro = recall_score(all_labels, all_preds, average="micro", zero_division=0)
    f1_micro = f1_score(all_labels, all_preds, average="micro", zero_division=0)

    # **Imprimir resultados**
    print("\n🔹 **FINAL TEST METRICS** 🔹")
    print(f"Test Accuracy: {accuracy * 100:.2f}%")
    print(f"Test Precision (Micro): {precision_micro:.4f}")
    print(f"Test Recall (Micro): {recall_micro:.4f}")
    print(f"Test F1 Score (Micro): {f1_micro:.4f}")


🔹 Evaluating Model: /content/drive/MyDrive/Diagnovision/modelo_img/best_models/densenet_final0303.pth


  model.load_state_dict(torch.load(model_path, map_location=device))
Evaluating Model on Test Set: 100%|██████████| 229/229 [00:15<00:00, 14.48it/s]


🔹 **FINAL TEST METRICS** 🔹
Test Accuracy: 21.30%
Test Precision (Micro): 0.4486
Test Recall (Micro): 0.0075
Test F1 Score (Micro): 0.0148



