In [None]:
data_path = "/content/drive/MyDrive/instances_images"

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# ============================================================
# Imports (Standard + Array/Image + PyTorch + TorchVision + Sklearn + Viz)
# ============================================================

# -------- Standard library --------
import os
import random

# -------- Array / image handling --------
import numpy as np
import pandas as pd
from PIL import Image

# -------- PyTorch --------
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, random_split, Subset

# -------- TorchVision (transforms) --------
from torchvision import transforms

# -------- Scikit-learn --------
from sklearn.model_selection import train_test_split
from sklearn.metrics import (
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    confusion_matrix,
    ConfusionMatrixDisplay,
)

# -------- Visualization --------
import matplotlib.pyplot as plt



In [None]:
data_path = "/content/drive/MyDrive/instances_images"


In [None]:
# ============================================================
# Configuración
# ============================================================
seed = 42
batch_size = 64
n_epochs = 29
learning_rate = 1e-2
train_ratio = 0.8

# ============================================================
# 1) Semilla
# ============================================================
torch.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", device)

# ============================================================
# 2) Transformaciones
# ============================================================
transform = transforms.Compose([
    transforms.Resize((32, 32)),
    transforms.ToTensor()
])

# ============================================================
# 3) Dataset
# ============================================================
class GraphImagesDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform

        valid_exts = (".png", ".jpg", ".jpeg")
        self.image_paths = [
            os.path.join(root_dir, f)
            for f in os.listdir(root_dir)
            if f.lower().endswith(valid_exts)
        ]

        self.class_names = ['barabasi', 'watts', 'erdos']
        self.class_to_idx = {name: idx for idx, name in enumerate(self.class_names)}

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        img = Image.open(img_path).convert("RGB")

        filename = os.path.basename(img_path)
        prefix = filename.split('_')[0].lower()

        if prefix not in self.class_to_idx:
            raise ValueError(f"Clase desconocida en archivo {filename}: {prefix}")

        label = self.class_to_idx[prefix]

        if self.transform:
            img = self.transform(img)

        return img, label

dataset = GraphImagesDataset(data_path, transform=transform)
print("Total de imágenes:", len(dataset))

# ============================================================
# 4) Train + Validación split
# ============================================================
train_size = int(train_ratio * len(dataset))
val_size = len(dataset) - train_size

train_ds, val_ds = random_split(
    dataset,
    [train_size, val_size],
    generator=torch.Generator().manual_seed(seed)
)

print("Train:", len(train_ds), "Val:", len(val_ds))

train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
val_loader   = DataLoader(val_ds,   batch_size=batch_size, shuffle=False)

# ============================================================
# 5) CNN
# ============================================================
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        # Entrada: 3 x 32 x 32
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1)

        # 32x32 -> 16x16 -> 8x8
        self.fc1 = nn.Linear(64 * 8 * 8, 128)
        self.fc2 = nn.Linear(128, 3)  # 3 clases

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(x, 2)        # 32x32 -> 16x16

        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, 2)        # 16x16 -> 8x8

        x = torch.flatten(x, 1)

        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

model = Net().to(device)
print(model)

# ============================================================
# 6) Train loop + métricas
# ============================================================
def train_loop(n_epochs, model, optimizer, loss_fn, train_loader, val_loader, device):
    train_losses, val_losses = [], []

    metricas_cnn = {
        "epoch": [],
        "accuracy_train": [], "precision_train": [], "recall_train": [], "f1_train": [],
        "accuracy_test":  [], "precision_test":  [], "recall_test":  [], "f1_test":  []  # test = val
    }

    for epoch in range(1, n_epochs + 1):

        # -------------------------
        # TRAIN
        # -------------------------
        model.train()
        running_loss = 0.0
        total = 0

        y_true_train_all = []
        y_pred_train_all = []

        for imgs, labels in train_loader:
            imgs = imgs.to(device)
            labels = labels.to(device)

            outputs = model(imgs)
            loss = loss_fn(outputs, labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            running_loss += loss.item() * labels.size(0)
            total += labels.size(0)

            preds = outputs.argmax(dim=1)

            y_true_train_all.append(labels.detach().cpu().numpy())
            y_pred_train_all.append(preds.detach().cpu().numpy())

        epoch_train_loss = running_loss / total
        train_losses.append(epoch_train_loss)

        y_true_train_all = np.concatenate(y_true_train_all)
        y_pred_train_all = np.concatenate(y_pred_train_all)

        acc_train  = accuracy_score(y_true_train_all, y_pred_train_all)
        prec_train = precision_score(y_true_train_all, y_pred_train_all, average="macro", zero_division=0)
        rec_train  = recall_score(y_true_train_all, y_pred_train_all, average="macro", zero_division=0)
        f1_train   = f1_score(y_true_train_all, y_pred_train_all, average="macro", zero_division=0)


        model.eval()
        val_running_loss = 0.0
        val_total = 0

        y_true_val_all = []
        y_pred_val_all = []

        with torch.no_grad():
            for imgs, labels in val_loader:
                imgs = imgs.to(device)
                labels = labels.to(device)

                outputs = model(imgs)
                loss = loss_fn(outputs, labels)

                val_running_loss += loss.item() * labels.size(0)
                val_total += labels.size(0)

                preds = outputs.argmax(dim=1)

                y_true_val_all.append(labels.detach().cpu().numpy())
                y_pred_val_all.append(preds.detach().cpu().numpy())

        epoch_val_loss = val_running_loss / val_total
        val_losses.append(epoch_val_loss)

        y_true_val_all = np.concatenate(y_true_val_all)
        y_pred_val_all = np.concatenate(y_pred_val_all)

        acc_val  = accuracy_score(y_true_val_all, y_pred_val_all)
        prec_val = precision_score(y_true_val_all, y_pred_val_all, average="macro", zero_division=0)
        rec_val  = recall_score(y_true_val_all, y_pred_val_all, average="macro", zero_division=0)
        f1_val   = f1_score(y_true_val_all, y_pred_val_all, average="macro", zero_division=0)


        # Guardar métricas

        metricas_cnn["epoch"].append(epoch)

        metricas_cnn["accuracy_train"].append(acc_train)
        metricas_cnn["precision_train"].append(prec_train)
        metricas_cnn["recall_train"].append(rec_train)
        metricas_cnn["f1_train"].append(f1_train)

        metricas_cnn["accuracy_test"].append(acc_val)
        metricas_cnn["precision_test"].append(prec_val)
        metricas_cnn["recall_test"].append(rec_val)
        metricas_cnn["f1_test"].append(f1_val)

        print(
            f"Época {epoch}/{n_epochs} - "
            f"Train Loss: {epoch_train_loss:.4f} | Val Loss: {epoch_val_loss:.4f} | "
            f"Train Acc: {acc_train:.3f} | Val Acc: {acc_val:.3f} | "
            f"Train F1: {f1_train:.3f} | Val F1: {f1_val:.3f}"
        )

    metricas_cnn_df = pd.DataFrame(metricas_cnn)

    metricas_cnn_long = metricas_cnn_df.melt(
        id_vars="epoch",
        var_name="metrica",
        value_name="valor"
    )

    return train_losses, val_losses, metricas_cnn_df, metricas_cnn_long

# ============================================================
# 7) Loss + Opt (usando Adam)
# ============================================================
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

train_losses, val_losses, metricas_cnn_df, metricas_cnn_long = train_loop(
    n_epochs=n_epochs,
    model=model,
    optimizer=optimizer,
    loss_fn=loss_fn,
    train_loader=train_loader,
    val_loader=val_loader,
    device=device
)

print("\nmetricas_cnn_df (tail):")
print(metricas_cnn_df.tail())

print("\nmetricas_cnn_long (head):")
print(metricas_cnn_long.head())

# ============================================================
# 8) Guardar gráficas Loss y Accuracy (para test y train)
# ============================================================
plt.figure(figsize=(9, 5))
plt.plot(range(1, n_epochs + 1), train_losses, label="Train Loss")
plt.plot(range(1, n_epochs + 1), val_losses, label="Val Loss")
plt.xlabel("Época")
plt.ylabel("Loss")
plt.legend()
plt.grid(True)
plt.savefig("loss_cnn.png", dpi=300, bbox_inches="tight")
plt.close()

plt.figure(figsize=(9, 5))
plt.plot(metricas_cnn_df["epoch"], metricas_cnn_df["accuracy_train"], label="Train Accuracy")
plt.plot(metricas_cnn_df["epoch"], metricas_cnn_df["accuracy_test"],  label="Val Accuracy")
plt.xlabel("Época")
plt.ylabel("Accuracy")
plt.ylim(0, 1)
plt.legend()
plt.grid(True)
plt.savefig("accuracy_cnn.png", dpi=300, bbox_inches="tight")
plt.close()

# (Opcional) Gráfica F1
plt.figure(figsize=(9, 5))
plt.plot(metricas_cnn_df["epoch"], metricas_cnn_df["f1_train"], label="Train F1")
plt.plot(metricas_cnn_df["epoch"], metricas_cnn_df["f1_test"],  label="Val F1")
plt.xlabel("Época")
plt.ylabel("F1 (macro)")
plt.ylim(0, 1)
plt.legend()
plt.grid(True)
plt.savefig("f1_cnn.png", dpi=300, bbox_inches="tight")
plt.close()

# ============================================================
# 9) Tabla para concatenar con las métricas de los demás modelos
# ============================================================
# Tu pipeline usa "fold". Aquí fold = epoch.
metricas_cnn_long = metricas_cnn_long.rename(columns={"epoch": "fold"})
metricas_cnn_long["modelo"] = "CNN"

print("\nListo: metricas_cnn_long compatible para pd.concat con los otros modelos.")


Device: cpu
Total de imágenes: 1440
Train: 1152 Val: 288
Net(
  (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (fc1): Linear(in_features=4096, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=3, bias=True)
)
Época 1/29 - Train Loss: 1.1609 | Val Loss: 1.0530 | Train Acc: 0.355 | Val Acc: 0.372 | Train F1: 0.315 | Val F1: 0.249
Época 2/29 - Train Loss: 0.6489 | Val Loss: 0.3463 | Train Acc: 0.741 | Val Acc: 0.872 | Train F1: 0.733 | Val F1: 0.873
Época 3/29 - Train Loss: 0.3444 | Val Loss: 0.2773 | Train Acc: 0.859 | Val Acc: 0.878 | Train F1: 0.858 | Val F1: 0.878
Época 4/29 - Train Loss: 0.3079 | Val Loss: 0.2199 | Train Acc: 0.887 | Val Acc: 0.903 | Train F1: 0.887 | Val F1: 0.903
Época 5/29 - Train Loss: 0.1488 | Val Loss: 0.1588 | Train Acc: 0.953 | Val Acc: 0.951 | Train F1: 0.953 | Val F1: 0.952
Época 6/29 - Train Loss: 0.0979 | Val Loss: 0.0821 | Tr

In [None]:
metricas_cnn_df.to_excel(
    "metricas_cnn.xlsx",
    index=False
)


In [None]:

def validate(model, train_loader, val_loader, device):
    model.eval()
    with torch.no_grad():
        for name, loader in [("train", train_loader), ("val", val_loader)]:
            all_labels = []
            all_preds = []

            for imgs, labels in loader:
                imgs = imgs.to(device)
                labels = labels.to(device)

                outputs = model(imgs)
                _, predicted = torch.max(outputs, dim=1)

                #guardar resultados para métricas
                all_labels.extend(labels.cpu().numpy())
                all_preds.extend(predicted.cpu().numpy())

            #calcular métricas
            acc  = accuracy_score(all_labels, all_preds)
            prec = precision_score(all_labels, all_preds, average='macro')
            rec  = recall_score(all_labels, all_preds, average='macro')
            f1   = f1_score(all_labels, all_preds, average='macro')

            print(f"{name.capitalize()} Accuracy : {acc:.4f}")
            print(f"{name.capitalize()} Precision: {prec:.4f}")
            print(f"{name.capitalize()} Recall   : {rec:.4f}")
            print(f"{name.capitalize()} F1-score : {f1:.4f}")
            print("-"*40)

#ejecutar
validate(model, train_loader, val_loader, device)

Train Accuracy : 0.9991
Train Precision: 0.9991
Train Recall   : 0.9991
Train F1-score : 0.9991
----------------------------------------
Val Accuracy : 0.9931
Val Precision: 0.9933
Val Recall   : 0.9931
Val F1-score : 0.9931
----------------------------------------
