In [1]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

from tensorflow.keras import layers, models, Model, losses
from tensorflow.keras.applications import ResNet50V2, ResNet101V2
from tensorflow.keras.applications.resnet_v2 import preprocess_input

2025-06-17 22:37:39.448122: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-06-17 22:37:39.449237: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-06-17 22:37:39.455973: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-06-17 22:37:39.481604: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1750199859.517070    2803 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1750199859.52

# Carga de Datos

In [3]:
raw_train_ds = tf.keras.preprocessing.image_dataset_from_directory(
    "../data/raw/calltech/caltech-101/caltech-101/101_ObjectCategories/101_ObjectCategories",
    validation_split=0.2,
    subset="training",
    seed=123,
    image_size=(224, 224),
    batch_size=32
)

Found 9144 files belonging to 102 classes.
Using 7316 files for training.


2025-06-17 22:39:32.096005: E external/local_xla/xla/stream_executor/cuda/cuda_platform.cc:51] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)


In [4]:
# Dataset de validación/test
raw_test_ds = tf.keras.preprocessing.image_dataset_from_directory(
    "../data/raw/calltech/caltech-101/caltech-101/101_ObjectCategories/101_ObjectCategories",
    validation_split=0.2,
    subset="validation",
    seed=123,
    image_size=(224, 224),
    batch_size=32
)

Found 9144 files belonging to 102 classes.
Using 1828 files for validation.


In [5]:
AUTOTUNE = tf.data.AUTOTUNE
train_ds = raw_train_ds.map(lambda x, y: (preprocess_input(x), y), num_parallel_calls=AUTOTUNE).prefetch(AUTOTUNE)
test_ds = raw_test_ds.map(lambda x, y: (preprocess_input(x), y), num_parallel_calls=AUTOTUNE).prefetch(AUTOTUNE)

In [6]:
class_names = raw_train_ds.class_names

In [7]:
print(class_names)

['BACKGROUND_Google', 'Faces', 'Faces_easy', 'Leopards', 'Motorbikes', 'accordion', 'airplanes', 'anchor', 'ant', 'barrel', 'bass', 'beaver', 'binocular', 'bonsai', 'brain', 'brontosaurus', 'buddha', 'butterfly', 'camera', 'cannon', 'car_side', 'ceiling_fan', 'cellphone', 'chair', 'chandelier', 'cougar_body', 'cougar_face', 'crab', 'crayfish', 'crocodile', 'crocodile_head', 'cup', 'dalmatian', 'dollar_bill', 'dolphin', 'dragonfly', 'electric_guitar', 'elephant', 'emu', 'euphonium', 'ewer', 'ferry', 'flamingo', 'flamingo_head', 'garfield', 'gerenuk', 'gramophone', 'grand_piano', 'hawksbill', 'headphone', 'hedgehog', 'helicopter', 'ibis', 'inline_skate', 'joshua_tree', 'kangaroo', 'ketch', 'lamp', 'laptop', 'llama', 'lobster', 'lotus', 'mandolin', 'mayfly', 'menorah', 'metronome', 'minaret', 'nautilus', 'octopus', 'okapi', 'pagoda', 'panda', 'pigeon', 'pizza', 'platypus', 'pyramid', 'revolver', 'rhino', 'rooster', 'saxophone', 'schooner', 'scissors', 'scorpion', 'sea_horse', 'snoopy', 's

In [8]:
data_augmentation = tf.keras.Sequential([
    tf.keras.layers.RandomFlip("horizontal"),
    tf.keras.layers.RandomRotation(0.1),
    tf.keras.layers.RandomZoom(0.1),
])

# Se aplica como parte del modelo o en el mapeo:
train_ds = train_ds.map(lambda x, y: (data_augmentation(x, training=True), y))

# Modelo Neuronal

In [9]:
# Modelo base
#base_model = ResNet101V2(include_top=False, weights='imagenet', input_shape=(224, 224, 3))
#base_model.trainable = False

In [10]:
class DeepIB(Model):
    def __init__(self, z_dim, sampling=1, beta=1.0):
        super(DeepIB, self).__init__()
        self.sampling = sampling
        self.beta = beta

        self.base_model = ResNet101V2(include_top=False, weights='imagenet', input_shape=(224, 224, 3))
        self.base_model.trainable = False

        # Encoder
        self.encoder_x = tf.keras.Sequential([
            tf.keras.Input(shape=(224, 224, 3)),
            self.base_model,
            layers.GlobalAveragePooling2D(),
            layers.BatchNormalization(),
            layers.Dense(512, activation='relu'),
            layers.Dropout(0.3)
        ])
        self.encoder_mu = layers.Dense(z_dim)
        self.encoder_logvar = layers.Dense(z_dim)

        # Decoder
        self.decode_z = layers.Dense(len(class_names))  # Para clasificación en las clases

    def encode(self, x):
        x = self.encoder_x(x)
        mu = self.encoder_mu(x)
        logvar = tf.clip_by_value(self.encoder_logvar(x), -10, 10)
        return mu, logvar

    def reparametrize(self, mu, logvar):
        eps_shape = tf.concat([tf.shape(mu), [self.sampling]], axis=0)
        eps = tf.random.normal(eps_shape)
        sigma = tf.exp(0.5 * logvar)
        mu = tf.expand_dims(mu, -1)
        sigma = tf.expand_dims(sigma, -1)
        z = mu + sigma * eps
        z = tf.transpose(z, perm=[0, 2, 1])  # [batch, samples, z_dim]
        return z

    def call(self, x, training=False):
        mu, logvar = self.encode(x)
        z = self.reparametrize(mu, logvar)
        y_pred = self.decode_z(z)  # [batch, samples, 10]
        return y_pred, mu, logvar

    def compute_loss(self, x, y_true):
        y_pred, mu, logvar = self.call(x, training=True)
        y_pred = tf.reduce_mean(y_pred, axis=1)  # Promedio sobre muestras
        ce_loss = losses.SparseCategoricalCrossentropy(from_logits=True)(y_true, y_pred)
        # KL divergence
        var = tf.exp(logvar)
        kl = -0.5 * tf.reduce_sum(1 + tf.math.log(var) - tf.square(mu) - var, axis=1)
        _total_loss = tf.reduce_mean(ce_loss + self.beta * kl)
        # Accuracy
        _acc = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(y_pred, axis=1), tf.cast(y_true, tf.int64)), tf.float32))
        return _total_loss, _acc

    def train_step(self, data):
        x, y = data
        with tf.GradientTape() as tape:
            loss, acc = self.compute_loss(x, y)
        grads = tape.gradient(loss, self.trainable_variables)
        self.optimizer.apply_gradients(zip(grads, self.trainable_variables))
        return {"loss": loss, "accuracy": acc}

    def test_step(self, data):
        x, y = data
        loss, acc = self.compute_loss(x, y)
        return {"loss": loss, "accuracy": acc}

In [11]:
# Instanciar y entrenar el modelo
model = DeepIB(z_dim=3, sampling=1, beta=1e-4)
model.compile(optimizer=tf.keras.optimizers.Adam())

In [12]:
dummy_input = tf.keras.Input(shape=(224, 224, 3))
output = model(dummy_input)
model.summary()

In [None]:
checkpoint_cb = tf.keras.callbacks.ModelCheckpoint(
    filepath="../models/deep_ib/checkpoints/weights_epoch_{epoch:02d}.weights.h5",
    save_weights_only=True,
    save_best_only=True,
    monitor="val_loss",
    mode="min",
    verbose=1
)

# Entrenamiento

In [None]:
history = model.fit(train_ds, epochs=10, validation_data=test_ds, )

Epoch 1/10
[1m213/229[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m1:26[0m 5s/step - accuracy: 0.3012 - loss: 2.9863

# Visualización de Resultados

In [None]:
# Listas para almacenar las representaciones latentes y las etiquetas
latents = []
labels = []

# Desactiva el entrenamiento para evitar muestreo aleatorio (usa solo mu)
for x_batch, y_batch in test_ds:
    mu, _ = model.encode(x_batch)
    latents.append(mu.numpy())
    labels.append(y_batch.numpy())

# Concatenar todo en arrays
latents = np.concatenate(latents, axis=0)  # (N, 2)
labels = np.concatenate(labels, axis=0)    # (N,)

In [None]:
# Graficar
plt.figure(figsize=(8, 6))
scatter = plt.scatter(latents[:, 0], latents[:, 1], c=labels, cmap='tab20', alpha=0.7, s=15)
plt.colorbar(scatter, ticks=range(len(np.unique(labels))))
plt.xlabel("z₁")
plt.ylabel("z₂")
plt.title("Espacio latente (mu) - Deep Information Bottleneck")
plt.grid(True)
plt.tight_layout()
plt.show()

In [None]:
history = model.fit(train_ds, epochs=10, validation_data=test_ds)

In [None]:
# Listas para almacenar las representaciones latentes y las etiquetas
latents = []
labels = []

# Desactiva el entrenamiento para evitar muestreo aleatorio (usa solo mu)
for x_batch, y_batch in test_ds:
    mu, _ = model.encode(x_batch)
    latents.append(mu.numpy())
    labels.append(y_batch.numpy())

# Concatenar todo en arrays
latents = np.concatenate(latents, axis=0)  # (N, 2)
labels = np.concatenate(labels, axis=0)    # (N,)

In [None]:
# Graficar
plt.figure(figsize=(8, 6))
scatter = plt.scatter(latents[:, 0], latents[:, 1], c=labels, cmap='tab20', alpha=0.7, s=15)
plt.colorbar(scatter, ticks=range(len(np.unique(labels))))
plt.xlabel("z₁")
plt.ylabel("z₂")
plt.title("Espacio latente (mu) - Deep Information Bottleneck")
plt.grid(True)
plt.tight_layout()
plt.show()