**Entorno (TF/Keras, seeds, GPU)**

In [None]:
# Celda 0: entorno
import os, random, json, math, sys
from pathlib import Path
import numpy as np
import tensorflow as tf
from tensorflow import keras

print("Python:", sys.version)
print("TensorFlow:", tf.__version__)
print("Keras (tf.keras):", keras.__version__)

# Semillas para reproducibilidad
SEED = 1337
random.seed(SEED); np.random.seed(SEED); tf.random.set_seed(SEED)

# Limitar hilos CPU (opcional; ayuda en Windows a que no se ‚Äútrabe‚Äù)
tf.config.threading.set_intra_op_parallelism_threads(2)
tf.config.threading.set_inter_op_parallelism_threads(2)

# Habilitar growth de memoria GPU si existe
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        print("GPU disponible:", gpus)
    except Exception as e:
        print("No se pudo activar memory_growth:", e)

AUTOTUNE = tf.data.AUTOTUNE



**Par√°metros, rutas y semilla**

In [None]:
# ==== Celda 2: par√°metros y rutas ====
from pathlib import Path

# Detecta autom√°ticamente la carpeta del proyecto.
# Si este cuaderno NO est√° dentro de "mansory", usa el fallback expl√≠cito.
PROJECT_ROOT = Path.cwd()
if not (PROJECT_ROOT / "train").exists():
    PROJECT_ROOT = Path(r"C:\Users\User\mansory")  # ‚Üê CAMBIA si tu ruta es distinta

# Directorios de datos
TRAIN_DIR = PROJECT_ROOT / "train"   # entrenamiento
VAL_DIR   = PROJECT_ROOT / "test"    # validaci√≥n/pruebas
REAL_DIR  = PROJECT_ROOT / "real"    # (opcional) uso posterior

# Directorio de modelos y logs
MODEL_DIR = PROJECT_ROOT / "modelos"
MODEL_DIR.mkdir(parents=True, exist_ok=True)

LOGS_DIR = MODEL_DIR / "logs"
LOGS_DIR.mkdir(parents=True, exist_ok=True)

# Archivos de modelo y clases
MODEL_REGULAR = MODEL_DIR / "modelo_mansory.keras"       # mejor por val_accuracy
MODEL_BEST95  = MODEL_DIR / "modelo_mansory_95.keras"    # mejor ‚â•95% (si se alcanza)
CLASSES_JSON  = MODEL_DIR / "classes.json"               # nombres de clases

# Mostrar para verificaci√≥n
print("PROJECT_ROOT:", PROJECT_ROOT.resolve())
print("TRAIN_DIR   :", TRAIN_DIR)
print("VAL_DIR     :", VAL_DIR)
print("REAL_DIR    :", REAL_DIR)
print("MODEL_DIR   :", MODEL_DIR)
print("LOGS_DIR    :", LOGS_DIR)
print("MODEL_REGULAR:", MODEL_REGULAR)
print("MODEL_BEST95 :", MODEL_BEST95)
print("CLASSES_JSON :", CLASSES_JSON)




**Escaneo de carpetas, construcci√≥n de datasets y class weights**

In [None]:
# ==== Celda 3B: dataset balanceado + val ====
from pathlib import Path
import glob, os, json, random
import numpy as np
import tensorflow as tf
from tensorflow import keras

# --- usa lo que ya tengas definido ---
# TRAIN_DIR, VAL_DIR (o TEST_DIR), IMG_SIZE, BATCH_SIZE
# MODEL_DIR, CLASSES_JSON ya vienen de tu Celda 2

AUTOTUNE = tf.data.AUTOTUNE
random.seed(42)

IMG_SIZE = (160, 160)  # debe coincidir con tu modelo
BATCH_SIZE = 32

# -------- escaneo de carpetas -> nombres de clases + rutas ----------
def canonical_name(parent, child=None):
    norm = lambda s: s.strip().replace(" ", "_")
    p = norm(parent)
    if p.lower().startswith("sin"):
        return "sin_grietas"
    return f"{p}_{norm(child)}"

def scan_leaf_dirs(root: Path):
    # Devuelve: class_names (orden estable) y lista de (files, label)
    root = Path(root)
    class_to_files = {}
    for ptype in sorted([d for d in root.iterdir() if d.is_dir()]):
        if ptype.name.lower().startswith("sin"):
            files = sum([glob.glob(str(ptype / f"*{ext}")) for ext in [".jpg",".jpeg",".png",".bmp",".webp"]], [])
            if files:
                cname = "sin_grietas"
                class_to_files.setdefault(cname, []).extend(sorted(files))
        else:
            for sev in sorted([d for d in ptype.iterdir() if d.is_dir()]):
                files = sum([glob.glob(str(sev / f"*{ext}")) for ext in [".jpg",".jpeg",".png",".bmp",".webp"]], [])
                if files:
                    cname = canonical_name(ptype.name, sev.name)
                    class_to_files.setdefault(cname, []).extend(sorted(files))
    class_names = sorted(class_to_files.keys())
    files = []
    labels = []
    for i, cname in enumerate(class_names):
        fs = class_to_files[cname]
        files.extend(fs)
        labels.extend([i]*len(fs))
    return class_names, files, labels, class_to_files

# --- escanea train y val ---
class_names, train_files_all, train_labels_all, train_map = scan_leaf_dirs(Path(TRAIN_DIR))
_,          val_files_all,   val_labels_all,   _         = scan_leaf_dirs(Path(VAL_DIR))

NUM_CLASSES = len(class_names)
print("Clases:", NUM_CLASSES, class_names)

# ---- guarda clases (para inferencia) ----
CLASSES_JSON.write_text(json.dumps({"class_names": class_names}, ensure_ascii=False, indent=2), encoding="utf-8")
print("Clases guardadas en", CLASSES_JSON)

# ------------- muestreo balanceado para train ------------------
# objetivo por clase / √©poca
TARGET_PER_CLASS = 300
CAP_SIN_GRIETAS  = 600  # para sin_grietas permitimos m√°s si quieres
IMG_EXTS = [".jpg",".jpeg",".png",".bmp",".webp"]

def make_aug_layer():
    return keras.Sequential([
        keras.layers.RandomFlip("horizontal"),
        keras.layers.RandomRotation(0.08),
        keras.layers.RandomZoom(0.2),
        keras.layers.RandomContrast(0.2),
        keras.layers.RandomBrightness(0.2),
    ], name="aug")

AUG = make_aug_layer()

def load_preprocess(path, label):
    img = tf.io.read_file(path)
    img = tf.image.decode_image(img, channels=3, expand_animations=False)
    img = tf.image.resize(img, IMG_SIZE)
    img = tf.cast(img, tf.float32) / 255.0
    img = (img - 0.5) * 2.0
    y   = tf.one_hot(label, depth=NUM_CLASSES)
    return img, y

def load_preprocess_val(path, label):
    img = tf.io.read_file(path)
    img = tf.image.decode_image(img, channels=3, expand_animations=False)
    img = tf.image.resize(img, IMG_SIZE)
    img = tf.cast(img, tf.float32) / 255.0
    img = (img - 0.5) * 2.0
    y   = tf.one_hot(label, depth=NUM_CLASSES)
    return img, y

# construir datasets por clase con oversampling + augment
per_class_datasets = []
counts = []
for cid, cname in enumerate(class_names):
    paths = train_map[cname]
    n = len(paths)
    counts.append(n)
    tgt = CAP_SIN_GRIETAS if cname == "sin_grietas" else TARGET_PER_CLASS
    if n >= tgt:
        chosen = random.sample(paths, tgt)
    else:
        # oversampling con reposici√≥n
        chosen = [random.choice(paths) for _ in range(tgt)]
    ds = tf.data.Dataset.from_tensor_slices((chosen, [cid]*len(chosen)))
    ds = ds.shuffle(len(chosen), seed=42)
    ds = ds.map(lambda p,l: load_preprocess(p,l), num_parallel_calls=AUTOTUNE)
    ds = ds.map(lambda x,y: (AUG(x, training=True), y), num_parallel_calls=AUTOTUNE)
    per_class_datasets.append(ds.repeat())

print("Recuento original por clase:", dict(zip(class_names, counts)))

# mezcla balanceada (cada clase aporta ~1/NUM_CLASSES)
train_ds = tf.data.Dataset.sample_from_datasets(per_class_datasets, weights=[1.0/NUM_CLASSES]*NUM_CLASSES, seed=42)
train_ds = train_ds.batch(BATCH_SIZE).prefetch(AUTOTUNE)

# ----- val ds (sin augment, sin balance) -----
val_ds = tf.data.Dataset.from_tensor_slices((val_files_all, val_labels_all))
val_ds = val_ds.map(load_preprocess_val, num_parallel_calls=AUTOTUNE).batch(BATCH_SIZE).prefetch(AUTOTUNE)



**Modelo (MobileNetV2 + cabeza densa)**

In [None]:
# ==== Celda 4B: Focal Loss + entrenamiento ====
import numpy as np
from tensorflow import keras
import tensorflow as tf

# calcula alpha (Cui et al. "effective number of samples")
beta = 0.9999
counts_arr = np.array([len(train_map[c]) for c in class_names], dtype=np.float32)
eff_num = (1 - np.power(beta, counts_arr)) / (1 - beta)
alpha = (np.sum(eff_num) / eff_num)
alpha = alpha / np.mean(alpha)  # normaliza
alpha_tf = tf.constant(alpha, dtype=tf.float32)
print("alpha:", np.round(alpha, 3))

def categorical_focal_loss(alpha_vec, gamma=2.0):
    def loss(y_true, y_pred):
        eps = tf.keras.backend.epsilon()
        y_pred = tf.clip_by_value(y_pred, eps, 1. - eps)
        ce = -y_true * tf.math.log(y_pred)
        weight = tf.pow(1 - y_pred, gamma) * alpha_vec
        return tf.reduce_sum(weight * ce, axis=-1)
    return loss

# modelo (puedes mantener tu arquitectura actual si ya la tienes)
base = keras.applications.MobileNetV2(
    include_top=False, weights="imagenet", input_shape=(*IMG_SIZE,3)
)
base.trainable = False  # warmup
x = keras.layers.GlobalAveragePooling2D()(base.output)
x = keras.layers.Dropout(0.3)(x)
out = keras.layers.Dense(NUM_CLASSES, activation="softmax")(x)
model = keras.Model(base.input, out, name="mansory_mobilenetv2")

loss_fn = categorical_focal_loss(alpha_tf, gamma=2.0)
model.compile(optimizer=keras.optimizers.Adam(3e-4), loss=loss_fn, metrics=["accuracy"])

# callbacks
es  = keras.callbacks.EarlyStopping(patience=6, restore_best_weights=True, monitor="val_accuracy")
rlr = keras.callbacks.ReduceLROnPlateau(patience=2, factor=0.5, monitor="val_accuracy")
ck  = keras.callbacks.ModelCheckpoint(str(MODEL_REGULAR), monitor="val_accuracy",
                                      save_best_only=True, save_weights_only=False)

class Save95(keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs=None):
        if (logs or {}).get("val_accuracy", 0) >= 0.95:
            self.model.save(str(MODEL_BEST95), include_optimizer=False)
            print(f"\n‚úÖ Guardado {MODEL_BEST95} (val_acc ‚â• 95%)")

history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=30,
    callbacks=[es, rlr, ck, Save95()],
    verbose=1
)

# unfreeze fino (opcional): afina √∫ltimas capas si val_acc se estanca
base.trainable = True
for layer in base.layers[:-40]:
    layer.trainable = False
model.compile(optimizer=keras.optimizers.Adam(1e-4), loss=loss_fn, metrics=["accuracy"])
history2 = model.fit(
    train_ds, validation_data=val_ds, epochs=10, callbacks=[es, rlr, ck, Save95()], verbose=1
)

# guardamos clases de nuevo por si entrenas desde cero
CLASSES_JSON.write_text(json.dumps({"class_names": class_names}, ensure_ascii=False, indent=2), encoding="utf-8")
print("\nClases guardadas en", CLASSES_JSON)
print("Modelo mejor por val_acc en:", MODEL_REGULAR)
if Path(MODEL_BEST95).exists():
    print("Tambi√©n se guard√≥:", MODEL_BEST95)


**Callbacks (EarlyStopping, LR, Checkpoints, ‚â•95%)**

In [22]:
# Celda 5: callbacks
from datetime import datetime

# Guarda siempre el mejor por val_acc
ckpt_best = keras.callbacks.ModelCheckpoint(
    filepath=str(MODEL_REGULAR),
    monitor="val_acc",
    mode="max",
    save_best_only=True,
    save_weights_only=False,
    verbose=1
)

# EarlyStopping
early = keras.callbacks.EarlyStopping(
    monitor="val_acc",
    mode="max",
    patience=6,
    restore_best_weights=True,
    verbose=1
)

# Reduce LR si no mejora
rlrop = keras.callbacks.ReduceLROnPlateau(
    monitor="val_loss",
    mode="min",
    factor=0.3,
    patience=2,
    min_lr=1e-6,
    verbose=1
)

# Log a CSV (permite ver progreso y reanudar)
csvlog = keras.callbacks.CSVLogger(str(TRAIN_LOG), append=True)

# Guardar autom√°ticamente si llega a ‚â•95% val_acc
class SaveOn95(keras.callbacks.Callback):
    def __init__(self, path, monitor="val_acc", threshold=0.95):
        super().__init__()
        self.path = str(path)
        self.monitor = monitor
        self.threshold = threshold
        self.saved = False
    def on_epoch_end(self, epoch, logs=None):
        logs = logs or {}
        val = logs.get(self.monitor)
        if val is not None and val >= self.threshold and not self.saved:
            self.model.save(self.path)
            self.saved = True
            print(f"\n‚úÖ Alcanzado {self.monitor} ‚â• {self.threshold:.2f}. Guardado: {self.path}\n")

class TrainingEndPrinter(keras.callbacks.Callback):
    def on_train_end(self, logs=None):
        if Path(MODEL_BEST95).exists():
            print(f"\nüéâ Entrenamiento finalizado: se alcanz√≥ ‚â•95% y se guard√≥ {MODEL_BEST95}")
        else:
            print("\n‚ÑπÔ∏è Entrenamiento finalizado: no se alcanz√≥ 95%. Revisa el historial y el mejor modelo por val_acc.")

save95 = SaveOn95(MODEL_BEST95, monitor="val_acc", threshold=0.95)
cbs = [ckpt_best, early, rlrop, csvlog, save95, TrainingEndPrinter()]



**Entrenamiento (con reanudaci√≥n si ya hay modelo)**

In [47]:
# Celda 6: entrenamiento

# Si ya existe un modelo mejor por val_acc, lo cargamos para reanudar desde ah√≠
if MODEL_REGULAR.exists():
    print("Cargando el mejor modelo previo por val_acc:", MODEL_REGULAR)
    model = keras.models.load_model(MODEL_REGULAR)

EPOCHS_TARGET = 30  # intenta llegar hasta aqu√≠ (EarlyStopping puede parar antes)

history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=EPOCHS_TARGET,
    class_weight=class_weight,
    callbacks=cbs,
    verbose=1
)

# Guardar nombres de clases (para inferencia)
with open(CLASSES_JSON, "w", encoding="utf-8") as f:
    json.dump({"class_names": class_names}, f, ensure_ascii=False, indent=2)
print("\nClases guardadas en", CLASSES_JSON)

# Resumen final .3f}")
print("Modelo mejor por val_acc en:", MODEL_REGULAR)
if Path(MODEL_BEST95).exists():
    print("Tambi√©n se guard√≥ (‚â•95%):", MODEL_BEST95)


Cargando el mejor modelo previo por val_acc: C:\Users\User\mansory\modelo_mansory.keras
Epoch 1/30
[1m1356/1356[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m0s[0m 341ms/step - acc: 0.9437 - loss: 0.5380 - top3: 0.9938  
Epoch 1: val_acc improved from None to 0.94227, saving model to modelos\modelo_mansory.keras
[1m1356/1356[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m578s[0m 413ms/step - acc: 0.9432 - loss: 0.5091 - top3: 0.9938 - val_acc: 0.9423 - val_loss: 0.1905 - val_top3: 0.9884 - learning_rate: 3.0000e-05
Epoch 2/30
[1m1356/1356[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m0s[0m 334ms/step - acc: 0.9408 - loss: 0.5066 - top3: 0.9949  
Epoch 2: val_acc improved from 0.94227 to 0.94319, saving model to modelos\modelo_mansory.keras
[1m1356/1356[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m555s[0m 404ms/step -

In [48]:
from pathlib import Path
import json, numpy as np
import keras
from tensorflow.keras.utils import load_img, img_to_array

# Carpeta base donde est√°n tus archivos (aj√∫stala si fuera distinto)
BASE = Path(r"C:\Users\User\mansory")

# 1) Cargar clases (prioriza el classes.json de la ra√≠z)
CLASSES_JSON = BASE / "classes.json"
if not CLASSES_JSON.exists():
    CLASSES_JSON = BASE / "modelos" / "classes.json"
class_names = json.loads(CLASSES_JSON.read_text(encoding="utf-8"))["class_names"]
print("N¬∫ de clases en JSON:", len(class_names), "‚Üí", CLASSES_JSON)

# 2) Listar modelos .keras (en ra√≠z y en /modelos)
keras_files = [*BASE.glob("*.keras"), *(BASE/"modelos").glob("*.keras")]
print("Modelos encontrados:")
for kf in keras_files: print(" -", kf)

# 3) Cargar cada modelo y quedarnos con el que tenga el mismo n¬∫ de salidas
picked = None
picked_in_size = None
for kf in keras_files:
    try:
        m = keras.models.load_model(kf)
        num_out = m.output_shape[-1]
        in_shape = m.input_shape[1:4]  # (H, W, C)
        print(f"   {kf.name}: salidas={num_out}, input={in_shape}")
        if num_out == len(class_names):
            picked = kf
            picked_in_size = (int(in_shape[0]), int(in_shape[1]))
            break
    except Exception as e:
        print(f"   {kf.name}: no se pudo cargar -> {e}")

assert picked is not None, (
    "No encontr√© un .keras con el mismo n√∫mero de clases que tu JSON. "
    "Deja solo el modelo correcto (de 10 clases) o borra/renombra los viejos."
)

print("\n‚úÖ Usar√©:", picked)
print("   Tama√±o de entrada esperado:", picked_in_size)

# Deja el modelo y tama√±o listos para la siguiente celda
MODEL_PATH = picked
TARGET_SIZE = picked_in_size
model = keras.models.load_model(MODEL_PATH)



N¬∫ de clases en JSON: 10 ‚Üí C:\Users\User\mansory\classes.json
Modelos encontrados:
 - C:\Users\User\mansory\modelo_mansory.keras
 - C:\Users\User\mansory\modelo_mansory_95.keras
 - C:\Users\User\mansory\modelos\modelo_mansory.keras
   modelo_mansory.keras: salidas=4, input=(160, 160, 3)
   modelo_mansory_95.keras: salidas=10, input=(224, 224, 3)

‚úÖ Usar√©: C:\Users\User\mansory\modelo_mansory_95.keras
   Tama√±o de entrada esperado: (224, 224)


In [51]:
def split_type_severity(name: str):
    name = name.strip()
    if name.lower().replace("__", "_") == "sin_grietas":
        return "sin grietas", "-"
    parts = name.split("_")
    # La √∫ltima palabra es la severidad, lo dem√°s es el tipo
    severidad = parts[-1]
    tipo = " ".join(parts[:-1]).replace("_", " ")
    return tipo, severidad

def predict_image(img_path):
    img = load_img(img_path, target_size=TARGET_SIZE)
    x = img_to_array(img)
    x = x / 255.0
    x = (x - 0.5) * 2.0
    x = np.expand_dims(x, 0)

    pred = model.predict(x, verbose=0)
    pred_id = int(np.argmax(pred[0]))
    if pred_id >= len(class_names):
        raise RuntimeError(
            f"pred_id {pred_id} fuera de rango para {len(class_names)} clases. "
            "Aseg√∫rate de que el modelo y el JSON corresponden."
        )
    tipo, sev = split_type_severity(class_names[pred_id])
    print(f"Tipo de grieta: {tipo}\nSeveridad: {sev}")

# Prueba con tu imagen (aj√∫stala si quieres otra)
predict_image(BASE / "grieta-muro.jpg")
# Ejemplos:
# predict_image(BASE / "test" / "compresion_vertical_Grave" / "55.jpg")
# predict_image(BASE / "grieta-muro.jpg")


Tipo de grieta: sin grietas
Severidad: -
