<a href="https://colab.research.google.com/github/matbutom/maquina-de-contrapropaganda/blob/main/letras_generativas.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
%%bash
# Crea primero el directorio base si no existe
mkdir -p /content/recortes_letras

# Crea los subdirectorios de A a Z
for i in {A..Z}; do
  mkdir -p /content/recortes_letras/$i
done

In [None]:
%%bash

# 1. Definir el URL base de tu repositorio de GitHub
REPO_URL="https://github.com/matbutom/maquina-de-contrapropaganda.git"
REPO_NAME="maquina-de-contrapropaganda"
TARGET_DIR="/content/recortes_letras"

echo "Clonando el repositorio completo ($REPO_NAME) en el entorno de Colab..."
# Clonar el repositorio
git clone $REPO_URL

# 2. Mover las carpetas con im√°genes al directorio de trabajo (recortes_letras)
SOURCE_CONTENT="$REPO_NAME/recortes_letras/*"

echo "Moviendo el contenido de las carpetas de letras (A, B, C...) a $TARGET_DIR..."
# 'cp -r' copia recursivamente el contenido de las subcarpetas A-Z
cp -r $SOURCE_CONTENT $TARGET_DIR/

# 3. Limpiar el repositorio clonado (ya no se necesita)
echo "Limpiando el repositorio clonado..."
rm -rf $REPO_NAME

# 4. Verificaci√≥n: Mostrar el contenido de la carpeta 'A' para confirmar que las im√°genes se cargaron
echo "‚úÖ ¬°Carga completa! Verificando la carpeta 'A':"
ls -l $TARGET_DIR/A | head -n 5

In [None]:
!rm -rf ~/tensorflow_datasets/maquina_contrapropaganda


In [None]:
# ============================================================
# üß© Limpieza y redimensionado f√≠sico del dataset
# ============================================================

import os
from PIL import Image

base_dir = "/content/recortes_letras"
target_size = (64, 64)

for root, dirs, files in os.walk(base_dir):
    for f in files:
        if not f.lower().endswith((".jpg", ".jpeg", ".png")):
            continue
        path = os.path.join(root, f)
        try:
            im = Image.open(path).convert("RGB")
            im = im.resize(target_size, Image.LANCZOS)
            im.save(path)
        except Exception as e:
            print("‚ö†Ô∏è Error con", path, "‚Üí", e)

print("‚úÖ Todas las im√°genes fueron redimensionadas f√≠sicamente a 64√ó64 px.")


In [None]:
# ============================================================
# üß© Verificador de dataset ‚Äî reconstruye solo si hay letras nuevas
# ============================================================

import os
import tensorflow_datasets as tfds

# ruta base donde est√°n las letras (aj√∫stala si usas Drive)
data_dir = '/content/recortes_letras'
builder_dir = os.path.expanduser('~/tensorflow_datasets/maquina_contrapropaganda')

# funci√≥n auxiliar para listar carpetas v√°lidas
def contar_carpetas(path):
    return sorted([d for d in os.listdir(path) if os.path.isdir(os.path.join(path, d))])

# carpetas actuales detectadas
carpetas_actuales = contar_carpetas(data_dir)
num_actual = len(carpetas_actuales)

# cu√°ntas clases ten√≠a el dataset anterior (si existe)
prev_num = 0
if os.path.exists(builder_dir):
    try:
        info = tfds.builder('maquina_contrapropaganda').info
        prev_num = info.features["label"].num_classes
    except Exception:
        pass

print(f"üì¶ Letras actuales detectadas: {carpetas_actuales}")
print(f"üß† Dataset anterior: {prev_num} clases | Nuevo: {num_actual} clases")

# si hay nuevas letras, borrar dataset cacheado
if num_actual > prev_num:
    print("‚ö†Ô∏è Se detectaron nuevas letras. Regenerando dataset completo...")
    !rm -rf ~/tensorflow_datasets/maquina_contrapropaganda
else:
    print("‚úÖ No hay cambios en las clases, se mantiene el dataset anterior.")


In [None]:
# ============================================================
# üîç Verificaci√≥n f√≠sica de tama√±os reales en disco
# ============================================================

from PIL import Image
import os

base_dir = "/content/recortes_letras"
malas = []

for root, dirs, files in os.walk(base_dir):
    for f in files:
        if not f.lower().endswith((".jpg", ".jpeg", ".png")):
            continue
        path = os.path.join(root, f)
        try:
            with Image.open(path) as im:
                if im.size != (64, 64):
                    malas.append((path, im.size))
        except Exception as e:
            malas.append((path, "‚ùå error"))

print(f"Total de im√°genes fuera de tama√±o esperado: {len(malas)}")
for i, (p, s) in enumerate(malas[:10]):
    print(f"{i+1:02d}. {p} ‚Üí {s}")


In [None]:
# ============================================================
# üì¶ Custom Dataset ‚Äî M√°quina de Contrapropaganda
# ============================================================

import tensorflow_datasets as tfds
import tensorflow as tf
import os

_DESCRIPTION = """
Dataset visual para el proyecto 'M√°quina de Contrapropaganda'.
Contiene letras recortadas clasificadas por carpeta (A‚ÄìZ),
extra√≠das de carteles propagand√≠sticos.
"""

_CITATION = """
@misc{rafita2025maquinacontrapropaganda,
  title={M√°quina de Contrapropaganda Dataset},
  author={Arce, Mateo},
  year={2025},
  howpublished={Rafita Studio / Universidad de Chile}
}
"""

class MaquinaContrapropaganda(tfds.core.GeneratorBasedBuilder):
    VERSION = tfds.core.Version('1.0.0')

    def _info(self):
        return tfds.core.DatasetInfo(
            builder=self,
            description=_DESCRIPTION,
            features=tfds.features.FeaturesDict({
                "image": tfds.features.Image(shape=(None, None, 3)),
                "label": tfds.features.ClassLabel(names=[chr(i) for i in range(65, 91)])  # A‚ÄìZ
            }),
            supervised_keys=("image", "label"),
            citation=_CITATION,
        )

    def _split_generators(self, dl_manager):
        data_dir = os.path.expanduser('/content/recortes_letras')
        return {"train": self._generate_examples(data_dir)}

    def _generate_examples(self, path):
        for label_name in sorted(os.listdir(path)):
            label_dir = os.path.join(path, label_name)
            if not os.path.isdir(label_dir):
                continue
            for img_name in os.listdir(label_dir):
                if img_name.lower().endswith((".jpg", ".png", ".jpeg")):
                    yield img_name, {
                        "image": os.path.join(label_dir, img_name),
                        "label": label_name,
                    }

# === Construcci√≥n del dataset ===
builder = MaquinaContrapropaganda()
builder.download_and_prepare()

ds = builder.as_dataset(split="train", as_supervised=True)

print("‚úÖ Dataset cargado correctamente.")
print("Clases detectadas:", builder.info.features["label"].names)



In [None]:
# ============================================================
# üëÅÔ∏è Visualizaci√≥n de ejemplos del dataset
# ============================================================

import matplotlib.pyplot as plt

for image, label in ds.take(9):
    plt.figure(figsize=(2, 2))
    plt.imshow(image)
    plt.title(builder.info.features["label"].int2str(label.numpy()))
    plt.axis("off")
plt.show()


In [None]:
# ============================================================
# üõ†Ô∏è Redimensionado f√≠sico forzado (solo las malas)
# ============================================================

from PIL import Image

for path, size in malas:
    try:
        im = Image.open(path).convert("RGB")
        im = im.resize((64, 64), Image.LANCZOS)
        im.save(path)
    except Exception as e:
        print("‚ùå No se pudo reparar:", path)

print("‚úÖ Todas las im√°genes malas fueron corregidas.")


In [None]:
# ============================================================
# üß© Divisi√≥n autom√°tica del dataset en train / val / test
# ============================================================

import tensorflow as tf
import math

# tama√±o total del dataset
total = sum(1 for _ in ds)
train_size = math.floor(total * 0.8)
val_size = math.floor(total * 0.1)
test_size = total - train_size - val_size

print(f"üìä Total de ejemplos: {total}")
print(f"üîπ Train: {train_size} | üî∏ Val: {val_size} | ‚ö™ Test: {test_size}")

# --- dividir usando el m√©todo take() y skip() ---
train_ds = ds.take(train_size)
val_ds = ds.skip(train_size).take(val_size)
test_ds = ds.skip(train_size + val_size)

# --- normalizar im√°genes ---
AUTOTUNE = tf.data.AUTOTUNE

def preprocess(img, label):
    img = tf.image.convert_image_dtype(img, tf.float32)
    return img, label

train_ds = train_ds.map(preprocess).cache().shuffle(1000).batch(32).prefetch(AUTOTUNE)
val_ds = val_ds.map(preprocess).cache().batch(32).prefetch(AUTOTUNE)
test_ds = test_ds.map(preprocess).cache().batch(32).prefetch(AUTOTUNE)

print("‚úÖ Datasets divididos y listos para entrenamiento.")


In [None]:
# ============================================================
# ‚úÖ Comprobaci√≥n de tama√±o de batch y forma de im√°genes
# ============================================================

for imgs, labels in train_ds.take(1):
    print("‚úÖ batch shape:", imgs.shape)
    print("üîπ dtype:", imgs.dtype)
    print("üîπ rango de valores:", tf.reduce_min(imgs).numpy(), "‚Üí", tf.reduce_max(imgs).numpy())

    # muestra una de las im√°genes para confirmar visualmente
    import matplotlib.pyplot as plt
    plt.imshow(imgs[0])
    plt.title(f"Ejemplo de imagen ‚Äî tama√±o {imgs[0].shape}")
    plt.axis("off")
    plt.show()


In [None]:
# ============================================================
# üß© Configuraci√≥n general
# ============================================================

import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
import os

IMG_SIZE = 64
EPOCHS = 40

# ============================================================
# üîß Dataset sin etiquetas y con repetici√≥n infinita
# ============================================================

def ensure_valid_image(img):
    # normaliza y redimensiona cada imagen a 64x64
    img = tf.image.convert_image_dtype(img, tf.float32)
    img = tf.image.resize(img, [IMG_SIZE, IMG_SIZE])
    return tf.ensure_shape(img, [IMG_SIZE, IMG_SIZE, 3])

train_ds_no_labels = (
    train_ds.unbatch()
    .map(lambda x, y: ensure_valid_image(x), num_parallel_calls=tf.data.AUTOTUNE)
    .shuffle(512)
    .batch(32)
    .repeat()
    .prefetch(tf.data.AUTOTUNE)
)

val_ds_no_labels = (
    val_ds.unbatch()
    .map(lambda x, y: ensure_valid_image(x), num_parallel_calls=tf.data.AUTOTUNE)
    .batch(32)
    .repeat()
    .prefetch(tf.data.AUTOTUNE)
)

print("‚úÖ Datasets verificados:")
for imgs in train_ds_no_labels.take(1):
    print("train batch:", imgs.shape)
for imgs in val_ds_no_labels.take(1):
    print("val batch:", imgs.shape)


# ============================================================
# üé® VisualCallback corregido (seguro y estable)
# ============================================================

class VisualCallback(tf.keras.callbacks.Callback):
    def __init__(self, sample_batch, save_dir="/content/outputs", interval=5):
        super().__init__()
        self.sample_batch = sample_batch
        self.save_dir = save_dir
        self.interval = interval
        os.makedirs(save_dir, exist_ok=True)
        self.generated_images = [] # List to store generated images for GIF

    def on_epoch_end(self, epoch, logs=None):
        if (epoch + 1) % self.interval != 0:
            return

        sample_imgs = self.sample_batch[:8]
        z_mean, z_log_var, z = self.model.encoder(sample_imgs)
        reconstructed = self.model.decoder(z)

        n = 8
        fig, axes = plt.subplots(2, n, figsize=(n * 1.5, 3))
        for i in range(n):
            axes[0, i].imshow(sample_imgs[i])
            axes[0, i].axis("off")
            axes[1, i].imshow(reconstructed[i])
            axes[1, i].axis("off")
        plt.tight_layout()

        # Save the figure as an image for later GIF creation
        path = os.path.join(self.save_dir, f"epoch_{epoch+1:03d}.png")
        plt.savefig(path)
        plt.close(fig)
        print(f"üåÄ Letras alucinadas guardadas en: {path}")

        # Display the generated images live
        plt.figure(figsize=(n * 1.5, 3))
        for i in range(n):
             plt.subplot(2, n, i + 1)
             plt.imshow(sample_imgs[i])
             plt.axis("off")
             plt.subplot(2, n, i + n + 1)
             plt.imshow(reconstructed[i])
             plt.axis("off")
        plt.suptitle(f"Epoch {epoch+1}", fontsize=16)
        plt.tight_layout(rect=[0, 0.03, 1, 0.95])
        plt.show()


        # Store the generated image batch for GIF creation
        self.generated_images.append(reconstructed.numpy())


# ============================================================
# ‚öôÔ∏è Definici√≥n de p√©rdida del VAE
# ============================================================

# This loss function is no longer directly used by vae.fit because
# we define a custom train_step in the VAE model.
def vae_total_loss(y_true, y_pred):
    reconstruction_loss = tf.reduce_mean(
        tf.keras.losses.binary_crossentropy(y_true, y_pred)
    ) * IMG_SIZE * IMG_SIZE * 3
    # KL divergence loss is calculated in the train_step
    return reconstruction_loss # This will be combined with KL loss in train_step


# # ============================================================
# # üß† Entrenamiento del VAE (versi√≥n estable) - DEPRECATED
# # ============================================================

# # obtenemos un batch de muestra para el callback
# sample_batch = next(iter(train_ds_no_labels))

# vae = VAE(encoder, decoder)
# vae.compile(optimizer=tf.keras.optimizers.Adam(), loss=vae_total_loss)

# vae.fit(
#     train_ds_no_labels,
#     validation_data=val_ds_no_labels,
#     epochs=EPOCHS,
#     steps_per_epoch=50,
#     validation_steps=10,
#     callbacks=[VisualCallback(sample_batch)],
#     verbose=1
# )


# # ============================================================
# # üíæ Guardado de modelos entrenados - DEPRECATED
# # ============================================================

# decoder.save("/content/drive/MyDrive/maquina-de-contrapropaganda/models/decoder_solo.keras")
# encoder.save("/content/drive/MyDrive/maquina-de-contrapropaganda/models/encoder_solo.keras")
# vae.save("/content/drive/MyDrive/maquina-de-contrapropaganda/models/vae_completo.keras")

# print("‚úÖ Modelos guardados correctamente en Drive.")

In [None]:
# ============================================================
# ‚öôÔ∏è Funci√≥n para preparar el Dataset por Letra (GRIS)
# ============================================================

def prepare_dataset_for_letter(base_ds, target_label_name):
    """
    Filtra, preprocesa y prepara el dataset para entrenar una sola letra en GRIS.
    """
    global IMG_SIZE
    BATCH_SIZE = 32
    AUTOTUNE = tf.data.AUTOTUNE

    target_label_int = builder.info.features["label"].str2int(target_label_name)

    # 1. Filtrar el dataset base
    ds_filtered = base_ds.filter(lambda x, y: tf.equal(y, target_label_int)).map(lambda x, y: x)

    def ensure_valid_image(img):
        # Normalizar a [0, 1]
        img = tf.image.convert_image_dtype(img, tf.float32)
        # üö® CAMBIO CLAVE: Conversi√≥n a escala de grises
        img = tf.image.rgb_to_grayscale(img)
        img = tf.image.resize(img, [IMG_SIZE, IMG_SIZE])
        # Aseguramos la forma: [64, 64, 1] (UN canal)
        return tf.ensure_shape(img, [IMG_SIZE, IMG_SIZE, 1])

    ds_final = (
        ds_filtered
        .map(ensure_valid_image, num_parallel_calls=AUTOTUNE)
        .shuffle(512)
        .batch(BATCH_SIZE)
        .repeat()
        .prefetch(AUTOTUNE)
    )

    return ds_final

print("‚úÖ Funci√≥n prepare_dataset_for_letter actualizada a GRIS (1 canal).")

In [None]:
# ============================================================
# üß† Definici√≥n del Encoder (versi√≥n estable GRIS)
# ============================================================

from tensorflow import keras
from tensorflow.keras import layers
import tensorflow as tf
# Se asumen LATENT_DIM=20, IMG_SIZE=64

def sampling(args):
    z_mean, z_log_var = args
    batch = tf.shape(z_mean)[0]
    dim = tf.shape(z_mean)[1]
    epsilon = tf.random.normal(shape=(batch, dim))
    return z_mean + tf.exp(0.5 * z_log_var) * epsilon

def make_encoder_model():
    # üö® CAMBIO CLAVE: Input shape es IMG_SIZE x IMG_SIZE x 1 (GRIS)
    encoder_inputs = keras.Input(shape=(IMG_SIZE, IMG_SIZE, 1))

    # Resto de capas Conv2D... (la cantidad de filtros se mantiene igual)
    x = layers.Conv2D(32, 3, activation="relu", strides=2, padding="same")(encoder_inputs)
    x = layers.Conv2D(64, 3, activation="relu", strides=2, padding="same")(x)
    x = layers.Conv2D(64, 3, activation="relu", strides=2, padding="same")(x)

    x = layers.Flatten()(x)
    x = layers.Dense(256, activation="relu")(x)

    z_mean = layers.Dense(LATENT_DIM, name="z_mean")(x)
    z_log_var = layers.Dense(LATENT_DIM, name="z_log_var")(x)
    z = layers.Lambda(sampling, name="z")([z_mean, z_log_var])

    encoder = keras.Model(encoder_inputs, [z_mean, z_log_var, z], name="encoder")
    return encoder

print("‚úÖ Funci√≥n make_encoder_model actualizada a GRIS (1 canal de entrada).")

In [None]:
# ============================================================
# üß† Definici√≥n del Decoder (versi√≥n estable GRIS)
# ============================================================

def make_decoder_model():
    # Decoder network
    latent_inputs = keras.Input(shape=(LATENT_DIM,))
    x = layers.Dense(8 * 8 * 64, activation="relu")(latent_inputs)
    x = layers.Reshape((8, 8, 64))(x)
    x = layers.Conv2DTranspose(64, 3, activation="relu", strides=2, padding="same")(x) # 16x16
    x = layers.Conv2DTranspose(32, 3, activation="relu", strides=2, padding="same")(x) # 32x32

    # üö® CAMBIO CLAVE: Capa final con 1 canal de salida
    x = layers.Conv2DTranspose(1, 3, activation="sigmoid", strides=2, padding="same")(x) # 64x64x1

    decoder_outputs = x
    decoder = keras.Model(latent_inputs, decoder_outputs, name="decoder")
    return decoder

print("‚úÖ Funci√≥n make_decoder_model actualizada a GRIS (1 canal de salida).")

In [None]:
# ============================================================
# ‚öôÔ∏è Definiciones adicionales para el entrenamiento
# ============================================================

# M√≠nimo n√∫mero de ejemplos para entrenar una letra
MIN_SAMPLES_FOR_TRAINING = 10

# Tama√±o del batch para el entrenamiento
BATCH_SIZE = 32

# N√∫mero de √©pocas por cada letra
EPOCHS_PER_LETTER = 50 # Puedes ajustar esto seg√∫n tus necesidades

# Directorio para guardar los modelos entrenados
MODEL_DIR = "/content/drive/MyDrive/maquina-de-contrapropaganda/models_por_letra"
import os
os.makedirs(MODEL_DIR, exist_ok=True)


# Definici√≥n de la clase VAE (aseg√∫rate de que esta definici√≥n coincida con la que necesitas)
from tensorflow import keras
import tensorflow as tf

class VAE(keras.Model):
    def __init__(self, encoder, decoder, **kwargs):
        super().__init__(**kwargs)
        self.encoder = encoder
        self.decoder = decoder

    def train_step(self, data):
        with tf.GradientTape() as tape:
            z_mean, z_log_var, z = self.encoder(data)
            reconstruction = self.decoder(z)
            reconstruction_loss = tf.reduce_mean(
                tf.keras.losses.binary_crossentropy(data, reconstruction)
            ) * IMG_SIZE * IMG_SIZE * 1 # Multiply by image dimensions and channels (1 for grayscale)
            kl_loss = -0.5 * (1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var))
            kl_loss = tf.reduce_mean(tf.reduce_sum(kl_loss, axis=1))
            total_loss = reconstruction_loss + kl_loss
        grads = tape.gradient(total_loss, self.trainable_weights)
        self.optimizer.apply_gradients(zip(grads, self.trainable_weights))
        return {"loss": total_loss, "reconstruction_loss": reconstruction_loss, "kl_loss": kl_loss}

    def call(self, inputs):
        z_mean, z_log_var, z = self.encoder(inputs)
        reconstruction = self.decoder(z)
        return reconstruction

print("‚úÖ Variables y clase VAE definidas.")

In [None]:
# ============================================================
# üß† CELDAS 1/2: ENTRENAMIENTO VAE - PRIMERA MITAD (A-M)
# ============================================================

# Asumimos que los imports est√°n cargados y las funciones VAE est√°n definidas.

# Define a helper function to convert label name to integer
def get_label_int(label_name):
    """Converts a letter label name (string) to its integer representation."""
    return builder.info.features["label"].str2int(label_name)

# 1. Definir la lista de entrenamiento para esta celda
num_letras_total = len(builder.info.features["label"].names)
mitad = math.ceil(num_letras_total / 2) # Calcula 13 (26 / 2 = 13)

# Lista de la A a la M
LETRAS_A_ENTRENAR_PRIMERA_MITAD = builder.info.features["label"].names[:mitad]

# Rutas y configuraci√≥n (se mantienen iguales)
# ... (debes copiar el bloque de configuraci√≥n aqu√≠, o asegurar que ya se ejecut√≥)

# Iniciar el bucle solo para la primera mitad
for letter in LETRAS_A_ENTRENAR_PRIMERA_MITAD:
    print(f"\n====================== INICIANDO ENTRENAMIENTO: {letter} ======================")

    # ... (Todo el c√≥digo del bucle anterior: CONTEO, C√ÅLCULO, OBTENCI√ìN DE BATCH,
    # RE-INICIALIZACI√ìN, COMPILACI√ìN, VAE.FIT, y GUARDADO) ...

    # --- (BLOQUE INTERNO DEL BUCLE ORIGINAL VA AQU√ç) ---

    target_label_int = get_label_int(letter)

    # 1. CONTEO y VERIFICACI√ìN DE DATOS
    # Use the original ds (dataset with labels) to count
    count = sum(1 for _ in ds.filter(lambda x, y: tf.equal(y, target_label_int)))


    if count < MIN_SAMPLES_FOR_TRAINING:
        print(f"‚ö†Ô∏è Saltando la letra '{letter}': {count} ejemplos. Se requieren al menos {MIN_SAMPLES_FOR_TRAINING}.")
        continue

    # 2. C√ÅLCULO DIN√ÅMICO DE STEPS_PER_EPOCH
    STEPS_PER_EPOCH = math.ceil(count / BATCH_SIZE)

    # 3. Preparar datasets espec√≠ficos para esta letra (Filtra y convierte a Gris)
    # Use the original ds (dataset with labels) as base
    train_ds_letter = prepare_dataset_for_letter(ds, letter)


    # 4. OBTENER BATCH DE MUESTRA
    try:
        sample_batch = next(iter(train_ds_letter.take(1).unbatch().take(8).batch(8)))
        print(f"   (Total de im√°genes: {count} | Steps/Epoch: {STEPS_PER_EPOCH})")
    except Exception as e:
        print(f"‚ùå Fall√≥ la obtenci√≥n del sample_batch para '{letter}': {e}. Saltando.")
        continue

    # 5. RE-INICIALIZAR MODELOS
    try:
        encoder = make_encoder_model()
        decoder = make_decoder_model()
        vae = VAE(encoder, decoder)
    except NameError:
        print("‚ùå ERROR GRAVE DE ARQUITECTURA. Verifica que las celdas de arquitectura est√©n ejecutadas.")
        continue

    # 6. Compilar y Entrenar
    vae.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4))
    print(f"üîπ Iniciando entrenamiento VAE para la letra '{letter}'...")

    save_dir = f"/content/outputs/{letter}"

    vae.fit(
        train_ds_letter,
        epochs=EPOCHS_PER_LETTER,
        steps_per_epoch=STEPS_PER_EPOCH,
        callbacks=[VisualCallback(sample_batch, save_dir=save_dir)],
        verbose=1
    )

    # 7. Guardar los modelos entrenados
    decoder.save(os.path.join(MODEL_DIR, f"decoder_{letter}.keras"))
    encoder.save(os.path.join(MODEL_DIR, f"encoder_{letter}.keras"))

    print(f"‚úÖ Modelos y salidas para la letra '{letter}' guardados en: {MODEL_DIR}")

print("\nüéâ PRIMERA MITAD (A-M) COMPLETADA. ¬°Guarda tu trabajo y pasa a la Celda 2!")

In [None]:
# ============================================================
# ‚öôÔ∏è CONVERSI√ìN MASIVA A TENSORFLOW.JS Y COMPRESI√ìN ZIP
# ============================================================

import os
import zipfile
import subprocess # Para ejecutar comandos del sistema (como la conversi√≥n)
import tensorflow as tf
from tensorflow import keras

# 1. Instalaci√≥n de la herramienta de conversi√≥n
print("1. Instalando tensorflowjs...")
!pip install tensorflowjs -q

# --- Rutas de Configuraci√≥n ---
# Directorio donde se guardaron tus modelos Keras (Decoders y Encoders)
MODEL_DIR = "/content/drive/MyDrive/maquina-de-contrapropaganda/models_por_letra"

# Directorio donde se guardar√°n los modelos convertidos a TF.js
TFJS_DIR = "/content/tfjs_models_final"
os.makedirs(TFJS_DIR, exist_ok=True)

# Nombre del archivo ZIP final
ZIP_FILE = "/content/tfjs_models_for_github.zip"

print(f"2. Convertiendo modelos de {MODEL_DIR} a {TFJS_DIR}...")
print("-" * 50)

# 2. Conversi√≥n Masiva (Iterar sobre cada archivo .keras)
for filename in os.listdir(MODEL_DIR):
    if filename.endswith(".keras"):

        # Ejemplo: 'decoder_A.keras' -> 'A_decoder'
        base_name = filename.replace(".keras", "").replace("_", "_")

        # Rutas de origen y destino
        keras_path = os.path.join(MODEL_DIR, filename)
        tfjs_output_path = os.path.join(TFJS_DIR, base_name)

        # Comando de conversi√≥n: usa subprocess para ejecutar la CLI de tfjs
        command = [
            "tensorflowjs_converter",
            "--input_format=keras",
            "--output_format=tfjs_graph_model",
            keras_path,
            tfjs_output_path
        ]

        try:
            # Ejecuta la conversi√≥n
            subprocess.run(command, check=True, capture_output=True, text=True)
            print(f"‚úÖ Convertido: {filename} -> {base_name}/")
        except subprocess.CalledProcessError as e:
            print(f"‚ùå ERROR al convertir {filename}:")
            print(e.stderr)

print("-" * 50)

# 3. Compresi√≥n de los Modelos Convertidos
print(f"3. Comprimiendo la carpeta {TFJS_DIR} en {ZIP_FILE}...")

# Crear el archivo ZIP
with zipfile.ZipFile(ZIP_FILE, 'w', zipfile.ZIP_DEFLATED) as zipf:
    # Recorrer el directorio TFJS_DIR y a√±adir todos los archivos
    for root, dirs, files in os.walk(TFJS_DIR):
        for file in files:
            file_path = os.path.join(root, file)
            # El arcname es el nombre del archivo dentro del ZIP (ruta relativa)
            arcname = os.path.relpath(file_path, TFJS_DIR)
            zipf.write(file_path, os.path.join("tfjs_models", arcname))

print("‚úÖ Compresi√≥n completada.")
print("-" * 50)

# 4. Instrucciones para la Descarga
print("\n¬°FINALIZADO! Descarga el archivo ZIP:")
print(f"El archivo '{ZIP_FILE.split('/')[-1]}' est√° listo para descargar.")

# C√≥digo para forzar la descarga en Colab
from google.colab import files
files.download(ZIP_FILE)

In [None]:
# ============================================================
# üß† CELDAS 2/2: ENTRENAMIENTO VAE - SEGUNDA MITAD (N-Z)
# ============================================================

# Define a helper function to convert label name to integer
def get_label_int(label_name):
    """Converts a letter label name (string) to its integer representation."""
    return builder.info.features["label"].str2int(label_name)

# 1. Definir la lista de entrenamiento para esta celda
num_letras_total = len(builder.info.features["label"].names)
mitad = math.ceil(num_letras_total / 2)

# Lista de la N a la Z
LETRAS_A_ENTRENAR_SEGUNDA_MITAD = builder.info.features["label"].names[mitad:]

print(f"Iniciando el entrenamiento de la segunda mitad: {LETRAS_A_ENTRENAR_SEGUNDA_MITAD}")

# Iniciar el bucle solo para la segunda mitad (todo el bloque de c√≥digo interno es id√©ntico)
for letter in LETRAS_A_ENTRENAR_SEGUNDA_MITAD:
    print(f"\n====================== INICIANDO ENTRENAMIENTO: {letter} ======================")

    # ... (Todo el c√≥digo del bucle anterior: CONTEO, C√ÅLCULO, OBTENCI√ìN DE BATCH,
    # RE-INICIALIZACI√ìN, COMPILACI√ìN, VAE.FIT, y GUARDADO) ...

    # --- (BLOQUE INTERNO DEL BUCLE ORIGINAL VA AQU√ç) ---

    target_label_int = get_label_int(letter)

    # 1. CONTEO y VERIFICACI√ìN DE DATOS
    # Use the original ds (dataset with labels) to count
    count = sum(1 for _ in ds.filter(lambda x, y: tf.equal(y, target_label_int)))

    if count < MIN_SAMPLES_FOR_TRAINING:
        print(f"‚ö†Ô∏è Saltando la letra '{letter}': {count} ejemplos. Se requieren al menos {MIN_SAMPLES_FOR_TRAINING}.")
        continue

    # 2. C√ÅLCULO DIN√ÅMICO DE STEPS_PER_EPOCH
    STEPS_PER_EPOCH = math.ceil(count / BATCH_SIZE)

    # 3. Preparar datasets espec√≠ficos para esta letra (Filtra y convierte a Gris)
    # Use the original ds (dataset with labels) as base
    train_ds_letter = prepare_dataset_for_letter(ds, letter)


    # 4. OBTENER BATCH DE MUESTRA
    try:
        sample_batch = next(iter(train_ds_letter.take(1).unbatch().take(8).batch(8)))
        print(f"   (Total de im√°genes: {count} | Steps/Epoch: {STEPS_PER_EPOCH})")
    except Exception as e:
        print(f"‚ùå Fall√≥ la obtenci√≥n del sample_batch para '{letter}': {e}. Saltando.")
        continue

    # 5. RE-INICIALIZAR MODELOS
    try:
        encoder = make_encoder_model()
        decoder = make_decoder_model()
        vae = VAE(encoder, decoder)
    except NameError:
        print("‚ùå ERROR GRAVE DE ARQUITECTURA. Verifica que las celdas de arquitectura est√©n ejecutadas.")
        continue

    # 6. Compilar y Entrenar
    vae.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4))
    print(f"üîπ Iniciando entrenamiento VAE para la letra '{letter}'...")

    save_dir = f"/content/outputs/{letter}"

    vae.fit(
        train_ds_letter,
        epochs=EPOCHS_PER_LETTER,
        steps_per_epoch=STEPS_PER_EPOCH,
        callbacks=[VisualCallback(sample_batch, save_dir=save_dir)],
        verbose=1
    )

    # 7. Guardar los modelos entrenados
    decoder.save(os.path.join(MODEL_DIR, f"decoder_{letter}.keras"))
    encoder.save(os.path.join(MODEL_DIR, f"encoder_{letter}.keras"))

    print(f"‚úÖ Modelos y salidas para la letra '{letter}' guardados en: {MODEL_DIR}")

print("\nüéâ ENTRENAMIENTO COMPLETO DE TODAS LAS LETRAS (A-Z).")

In [None]:
# ============================================================
# ‚öôÔ∏è CONVERSI√ìN MASIVA A TENSORFLOW.JS Y COMPRESI√ìN ZIP
# ============================================================

import os
import zipfile
import subprocess # Para ejecutar comandos del sistema (como la conversi√≥n)
import tensorflow as tf
from tensorflow import keras

# 1. Instalaci√≥n de la herramienta de conversi√≥n
print("1. Instalando tensorflowjs...")
!pip install tensorflowjs -q

# --- Rutas de Configuraci√≥n ---
# Directorio donde se guardaron tus modelos Keras (Decoders y Encoders)
MODEL_DIR = "/content/drive/MyDrive/maquina-de-contrapropaganda/models_por_letra"

# Directorio donde se guardar√°n los modelos convertidos a TF.js
TFJS_DIR = "/content/tfjs_models_final"
os.makedirs(TFJS_DIR, exist_ok=True)

# Nombre del archivo ZIP final
ZIP_FILE = "/content/tfjs_models_for_github.zip"

print(f"2. Convertiendo modelos de {MODEL_DIR} a {TFJS_DIR}...")
print("-" * 50)

# 2. Conversi√≥n Masiva (Iterar sobre cada archivo .keras)
for filename in os.listdir(MODEL_DIR):
    if filename.endswith(".keras"):

        # Ejemplo: 'decoder_A.keras' -> 'A_decoder'
        base_name = filename.replace(".keras", "").replace("_", "_")

        # Rutas de origen y destino
        keras_path = os.path.join(MODEL_DIR, filename)
        tfjs_output_path = os.path.join(TFJS_DIR, base_name)

        # Comando de conversi√≥n: usa subprocess para ejecutar la CLI de tfjs
        command = [
            "tensorflowjs_converter",
            "--input_format=keras",
            "--output_format=tfjs_graph_model",
            keras_path,
            tfjs_output_path
        ]

        try:
            # Ejecuta la conversi√≥n
            subprocess.run(command, check=True, capture_output=True, text=True)
            print(f"‚úÖ Convertido: {filename} -> {base_name}/")
        except subprocess.CalledProcessError as e:
            print(f"‚ùå ERROR al convertir {filename}:")
            print(e.stderr)

print("-" * 50)

# 3. Compresi√≥n de los Modelos Convertidos
print(f"3. Comprimiendo la carpeta {TFJS_DIR} en {ZIP_FILE}...")

# Crear el archivo ZIP
with zipfile.ZipFile(ZIP_FILE, 'w', zipfile.ZIP_DEFLATED) as zipf:
    # Recorrer el directorio TFJS_DIR y a√±adir todos los archivos
    for root, dirs, files in os.walk(TFJS_DIR):
        for file in files:
            file_path = os.path.join(root, file)
            # El arcname es el nombre del archivo dentro del ZIP (ruta relativa)
            arcname = os.path.relpath(file_path, TFJS_DIR)
            zipf.write(file_path, os.path.join("tfjs_models", arcname))

print("‚úÖ Compresi√≥n completada.")
print("-" * 50)

# 4. Instrucciones para la Descarga
print("\n¬°FINALIZADO! Descarga el archivo ZIP:")
print(f"El archivo '{ZIP_FILE.split('/')[-1]}' est√° listo para descargar.")

# C√≥digo para forzar la descarga en Colab
from google.colab import files
files.download(ZIP_FILE)