In [1]:
# Colab: asegurar versiones recientes
# !pip install -q scikit-posthocs==0.7 seaborn==0.13
# !pip install --upgrade "tensorflow==2.16.*" "keras==3.*"
# !pip install scikit-learn

from pathlib import Path
import json, math, numpy as np, pandas as pd
import matplotlib.pyplot as plt
import scipy.stats as stats
from tqdm.auto import tqdm
import seaborn as sns
import matplotlib as mpl
from tensorflow import keras
RESULTS_ROOT = Path().resolve() / "outputs"

  from .autonotebook import tqdm as notebook_tqdm
2025-06-20 11:34:22.978076: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
def discover_keras(root: Path):
    """
    Devuelve rutas a todos los modelos Keras encontrados en el directorio
    especificado. Se espera que los modelos estén en subdirectorios con
    ARQ_*/ESC_*/rep_*/checkpoints/.
    """
    return sorted(root.glob("ARQ_*/ESC_*/rep_*/checkpoints/*.keras"))

keras_paths = discover_keras(RESULTS_ROOT)
print(f"Encontrados {len(keras_paths)} modelos Keras")

Encontrados 229 modelos Keras


In [3]:
# data_module.py
import numpy as np
from typing import Literal, Optional
import os, h5py, numpy as np
from pathlib import Path
import tensorflow as tf

import yaml                          # Lectura y mezcla de archivos YAML
import sys               # Conversión IPython Notebook → .py
from pathlib import Path             # Manejo robusto de rutas
from importlib import import_module  # Import dinámico del modelo


# Rutas base (ajusta a tu estructura en Google Drive si cambia)
CONFIG_ROOT = Path().resolve() / "configs"
MODELS_ROOT = Path().resolve() / "models"

def load_config(exp_name:str):
    exp_path = CONFIG_ROOT / "experiments" / f"{exp_name}.yaml"
    exp_cfg  = yaml.safe_load(exp_path.read_text())

    if "_base_" in exp_cfg:                                # herencia opcional
        base_cfg = yaml.safe_load((CONFIG_ROOT / exp_cfg["_base_"]).read_text())
        cfg = {**base_cfg, **exp_cfg}                      # exp > default
    else:
        cfg = exp_cfg
    return cfg

# ────────────────────────────────────────────────────────────────────
class SingleHDF5:
    """
    Envuelve *un* .hdf5 proveniente de Kaggle.

    Parámetros
    ----------
    kaggle_dataset_id : str
        slug «user/dataset» (ej. "ilikepizzaanddrones/modulated-iq-signals")
    local_download_dir : str | Path
        carpeta donde se guardará (y se buscará) el .hdf5
    keys : dict | None
        nombres de los grupos dentro del HDF5 (default {"X","Y","Z"})
    """

    def __init__(
        self,
        *,
        local_dir: str,
        keys: dict,
    ) -> None:

        # 0) Descarga / búsqueda local
        file_path = local_dir

        # 1) Lectura a memoria
        self.keys = keys or {"X": "X", "Y": "Y", "Z": "Z"}
        with h5py.File(file_path, "r") as f:
            self.X = f[self.keys["X"]][:]
            self.Y = f[self.keys["Y"]][:]
            self.Z = f[self.keys["Z"]][:] if self.keys["Z"] in f else None

            if "Effects" in f:
                grp   = f["Effects"]
                dtype = [(n, grp[n].dtype) for n in grp]
                eff   = np.empty(len(self.X), dtype=dtype)
                for n in grp: eff[n] = grp[n][:]
                self.Effects = eff
            else:
                self.Effects = None

        # índices activos (se sobre-escriben desde DataModule)
        n = len(self.X)
        self.train_idx = np.arange(n, dtype=np.int64)
        self.val_idx   = np.empty(0, dtype=np.int64)


    # ── API mínima (igual que antes) ─────────────────────────────
    def register_indices(self, train_idx, val_idx):
        self.train_idx = np.asarray(train_idx, dtype=np.int64)
        self.val_idx   = np.asarray(val_idx,   dtype=np.int64)

    def get_arrays(self, split: str = None):
        if split is None: return self.X, self.Y
        split = split.lower()
        if split == "train": return self.X[self.train_idx], self.Y[self.train_idx]
        if split == "val":   return self.X[self.val_idx],   self.Y[self.val_idx]
        raise ValueError("split debe ser 'train' o 'val'")

    # ————————————————————————————————————————————————————————
    def get_effects(
        self,
        *,
        split: str = None,
        fields: list[str] = None,
    ):
        """
        Devuelve un structured-array con los efectos alineados al `split`.

        Parameters
        ----------
        split : "train" | "val" | None
            None ⇒ dataset completo (o testset completo si proviene del DataModule).
        fields : list[str] | None
            Sub-conjunto de columnas a devolver. None ⇒ todas.
        """
        if self.Effects is None:
            raise ValueError("Este HDF5 no contiene grupo 'Effects'.")

        # Selección de índices según split
        if split is None:
            idx = (
                np.arange(len(self.X))               # testset completo
                if (not hasattr(self, "train_idx"))   # por seguridad
                else self.train_idx                   # SingleHDF5 sin register
            )
        else:
            split = split.lower()
            if split == "train":
                idx = self.train_idx
            elif split == "val":
                idx = self.val_idx
            else:
                raise ValueError("split debe ser 'train', 'val' o None")

        eff = self.Effects[idx]              # vista alineada
        if fields is not None:
            eff = eff[fields].copy()
        return eff
    
    # ────────────────────────────────────────────────                    
    def to_tf_dataset(
        self,
        *,                                      
        split: str = None,
        batch_size: int,
        shuffle: bool = True,
        seed: int,
        prefetch: bool = True,
        include_index: bool = False,
        buffer_size: int = None,
    ):
        """
        Devuelve un tf.data.Dataset con (X, Y) o (X, Y, idx).

        Parameters
        ----------
        split : "train" | "val" | None
            None ⇒ dataset completo (sin barajar).
        include_index : bool
            Si True, añade el índice absoluto dentro del HDF5
            (útil para métricas por muestra).
        buffer_size : int | None
            Tamaño del «shuffle buffer». Por defecto = len(split).
        """

        Xs, Ys = self.get_arrays(split)

        # --- índices opcionales ------------------------------------------------
        if include_index:
            if split == "train":
                idx = self.train_idx
            elif split == "val":
                idx = self.val_idx
            else:                               # split None  (o testset completo)
                idx = np.arange(len(self.X), dtype=np.int64)

            ds = tf.data.Dataset.from_tensor_slices((Xs, Ys, idx))
        else:
            ds = tf.data.Dataset.from_tensor_slices((Xs, Ys))

        # --- barajado sólo en train -------------------------------------------
        if shuffle and (split in (None, "train")):
            ds = ds.shuffle(
                buffer_size or len(Xs),
                seed=seed,
                reshuffle_each_iteration=True,
            )

        ds = ds.batch(batch_size)
        if prefetch:
            ds = ds.prefetch(tf.data.AUTOTUNE)
        return ds






class DataModule:
    """
    Descarga dos datasets de Kaggle y separa train / val (estratificado).
    """

    def __init__(
        self,
        *,
        local_download_dir: str,
        # ------------------------------------------------------------------
        keys: Optional[dict],
        seed: int,
    ):
        self.seed = seed

        # 2) TEST -----------------------------------------------------------
        self.testset = SingleHDF5(
            local_dir=local_download_dir,
            keys={"X": "X", "Y": "Y", "Z": "Z"},
        )

    # ————————————————— API pública —————————————————
    def get_arrays(self, split: Literal["train", "val", "test"]):
        if split in ("train", "val"):
            return self.trainset.get_arrays(split)
        if split == "test":
            return self.testset.get_arrays()
        raise ValueError("split debe ser 'train', 'val' o 'test'")

    def get_effects(self, **kw):
        return self.testset.get_effects(**kw)
    
    def to_tf_dataset(
        self,
        *,
        split: Literal["train", "val", "test"],
        batch_size: int,
        shuffle: bool = True,
        prefetch: bool = True,
        **kw,
    ):
        common_kw = dict(
            batch_size=batch_size,
            shuffle=shuffle,
            prefetch=prefetch,
            seed=self.seed,
            **kw,
        )
        return self.testset.to_tf_dataset(**common_kw)


In [4]:
def parse_meta(p: Path):
    parts = p.parts
    arch = parts[-5]     # ARQ_*
    esc  = parts[-4]     # ESC_*
    rep  = int(parts[-3].split("_")[1])
    return arch, esc, rep

def load_keras_model(p: Path):
    """
    Carga un *modelo completo* (.keras).  Compila automáticamente con la
    configuración original que quedó embebida en el checkpoint.
    """
    return keras.models.load_model(p, compile=True) 

ARCHS = ['ARQ_2', 'ARQ_3']
ESCS = ["ESC_1", "ESC_2", "ESC_3", "ESC_4", "ESC_5", "ESC_6", "ESC_7", "ESC_8", "ESC_9", "ESC_10", "ESC_11"]


for arch_key in ARCHS:
    for esc in ESCS:
        test_data_dir = Path().resolve() / 'datasets' / 'test' / esc
        dataset_path = sorted(test_data_dir.glob("*.hdf5"))

        for rep in range(10):
            matches = [
                p for p in keras_paths
                if p.parts[-5] == arch_key and p.parts[-4] == esc
                and int(p.parts[-3].split("_")[1]) == rep
            ]
            if not matches:
                continue

            model_path = matches[0]          # p.e. epoch_12.keras
            model = load_keras_model(model_path)
            datamodule = DataModule(
                local_download_dir=dataset_path[0],
                keys={"X": "X", "Y": "Y", "Z": "Z"},
                seed=42
            )
            cfg = load_config(f'{arch_key}_{esc}')
            tr = cfg.get("training", {})
            test_ds_idx = datamodule.to_tf_dataset(
                split="test", batch_size=tr.get("batch_size", 32),
                shuffle=False, prefetch=False, include_index=True
            )

            # // Modificar subdirectorio de acuerdo a número actual de repetición  \\
            cfg["experiment"]["output_subdir"] = cfg["experiment"]["output_subdir"] + "/" + f"_rep_{rep}"

            print(f"Evaluando modelo {arch_key} {esc} rep_{rep}... con el {model_path.name}")
            #  4A.6) Análisis resultados individual
            from utils.analysis.analysis import ExperimentAnalyzer
            analyzer = ExperimentAnalyzer(
                model=model,
                history=None,
                test_data=test_ds_idx,
                cfg=cfg,
                effects=datamodule.get_effects(),
                repeat_index=rep,
                show_plots=False,
                )

            analyzer.classification_report()
            analyzer.effect_report()
            analyzer.confusion_matrix(normalize="true")


Evaluando modelo ARQ_2 ESC_1 rep_0... con el epoch_33.keras


2025-06-20 11:34:32.441669: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
I0000 00:00:1750437273.675691  146316 service.cc:145] XLA service 0x7f81efc0a1f0 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1750437273.676462  146316 service.cc:153]   StreamExecutor device (0): Host, Default Version
2025-06-20 11:34:33.678430: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-06-20 11:34:33.814643: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
I0000 00:00:1750437274.905693  146316 device_compiler.h:

🔖 JSON de métricas y evaluación guardado en: /Users/juanpabloperezvargas/Desktop/TESIS/AMC_ARQ/outputs/ARQ_2/ESC_1/_rep_0/reports/classification_report.json

📄 Classification Report Summary

              precision    recall  f1-score   support

        bpsk     1.0000    1.0000    1.0000      1024
        qpsk     0.9971    0.9990    0.9980      1024
       16qam     0.7688    0.8604    0.8120      1024
 32qam cross     0.6103    0.6729    0.6400      1024
       64qam     0.8082    0.7285    0.7663      1024
128qam cross     0.6484    0.5674    0.6052      1024

    accuracy                         0.8047      6144
   macro avg     0.8055    0.8047    0.8036      6144
weighted avg     0.8055    0.8047    0.8036      6144


Eval loss: 0.4513, Eval accuracy: 0.8047
🔖 Gráfico guardado en: /Users/juanpabloperezvargas/Desktop/TESIS/AMC_ARQ/outputs/ARQ_2/ESC_1/_rep_0/reports/report_num_taps.png
🔖 Gráfico guardado en: /Users/juanpabloperezvargas/Desktop/TESIS/AMC_ARQ/outputs/ARQ_2/ESC_1/_re

KeyboardInterrupt: 