**Entorno (TF/Keras, seeds, GPU)**

In [2]:
# Celda 0: entorno
import os, random, json, math, sys
from pathlib import Path
import numpy as np
import tensorflow as tf
from tensorflow import keras

print("Python:", sys.version)
print("TensorFlow:", tf.__version__)
print("Keras (tf.keras):", keras.__version__)

# Semillas para reproducibilidad
SEED = 1337
random.seed(SEED); np.random.seed(SEED); tf.random.set_seed(SEED)

# Limitar hilos CPU (opcional; ayuda en Windows a que no se “trabe”)
tf.config.threading.set_intra_op_parallelism_threads(2)
tf.config.threading.set_inter_op_parallelism_threads(2)

# Habilitar growth de memoria GPU si existe
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        print("GPU disponible:", gpus)
    except Exception as e:
        print("No se pudo activar memory_growth:", e)

AUTOTUNE = tf.data.AUTOTUNE



Python: 3.11.14 | packaged by Anaconda, Inc. | (main, Oct 21 2025, 18:30:03) [MSC v.1929 64 bit (AMD64)]
TensorFlow: 2.20.0
Keras (tf.keras): 3.11.3


**Parámetros, rutas y semilla**

In [3]:
# ==== Celda 2: parámetros y rutas ====
from pathlib import Path

# Detecta automáticamente la carpeta del proyecto.
# Si este cuaderno NO está dentro de "mansory", usa el fallback explícito.
PROJECT_ROOT = Path.cwd()
if not (PROJECT_ROOT / "train").exists():
    PROJECT_ROOT = Path(r"C:\Users\User\mansory")  # ← CAMBIA si tu ruta es distinta

# Directorios de datos
TRAIN_DIR = PROJECT_ROOT / "train"   # entrenamiento
VAL_DIR   = PROJECT_ROOT / "test"    # validación/pruebas
REAL_DIR  = PROJECT_ROOT / "real"    # (opcional) uso posterior

# Directorio de modelos y logs
MODEL_DIR = PROJECT_ROOT / "modelos"
MODEL_DIR.mkdir(parents=True, exist_ok=True)

LOGS_DIR = MODEL_DIR / "logs"
LOGS_DIR.mkdir(parents=True, exist_ok=True)

# Archivos de modelo y clases
MODEL_REGULAR = MODEL_DIR / "modelo_mansory.keras"       # mejor por val_accuracy
MODEL_BEST95  = MODEL_DIR / "modelo_mansory_95.keras"    # mejor ≥95% (si se alcanza)
CLASSES_JSON  = MODEL_DIR / "classes.json"               # nombres de clases

# Mostrar para verificación
print("PROJECT_ROOT:", PROJECT_ROOT.resolve())
print("TRAIN_DIR   :", TRAIN_DIR)
print("VAL_DIR     :", VAL_DIR)
print("REAL_DIR    :", REAL_DIR)
print("MODEL_DIR   :", MODEL_DIR)
print("LOGS_DIR    :", LOGS_DIR)
print("MODEL_REGULAR:", MODEL_REGULAR)
print("MODEL_BEST95 :", MODEL_BEST95)
print("CLASSES_JSON :", CLASSES_JSON)




PROJECT_ROOT: C:\Users\User\mansory
TRAIN_DIR   : C:\Users\User\mansory\train
VAL_DIR     : C:\Users\User\mansory\test
REAL_DIR    : C:\Users\User\mansory\real
MODEL_DIR   : C:\Users\User\mansory\modelos
LOGS_DIR    : C:\Users\User\mansory\modelos\logs
MODEL_REGULAR: C:\Users\User\mansory\modelos\modelo_mansory.keras
MODEL_BEST95 : C:\Users\User\mansory\modelos\modelo_mansory_95.keras
CLASSES_JSON : C:\Users\User\mansory\modelos\classes.json


**Escaneo de carpetas, construcción de datasets y class weights**

In [4]:
# ==== Celda 3B: dataset balanceado + val ====
from pathlib import Path
import glob, os, json, random
import numpy as np
import tensorflow as tf
from tensorflow import keras

# --- usa lo que ya tengas definido ---
# TRAIN_DIR, VAL_DIR (o TEST_DIR), IMG_SIZE, BATCH_SIZE
# MODEL_DIR, CLASSES_JSON ya vienen de tu Celda 2

AUTOTUNE = tf.data.AUTOTUNE
random.seed(42)

IMG_SIZE = (160, 160)  # debe coincidir con tu modelo
BATCH_SIZE = 32

# -------- escaneo de carpetas -> nombres de clases + rutas ----------
def canonical_name(parent, child=None):
    norm = lambda s: s.strip().replace(" ", "_")
    p = norm(parent)
    if p.lower().startswith("sin"):
        return "sin_grietas"
    return f"{p}_{norm(child)}"

def scan_leaf_dirs(root: Path):
    # Devuelve: class_names (orden estable) y lista de (files, label)
    root = Path(root)
    class_to_files = {}
    for ptype in sorted([d for d in root.iterdir() if d.is_dir()]):
        if ptype.name.lower().startswith("sin"):
            files = sum([glob.glob(str(ptype / f"*{ext}")) for ext in [".jpg",".jpeg",".png",".bmp",".webp"]], [])
            if files:
                cname = "sin_grietas"
                class_to_files.setdefault(cname, []).extend(sorted(files))
        else:
            for sev in sorted([d for d in ptype.iterdir() if d.is_dir()]):
                files = sum([glob.glob(str(sev / f"*{ext}")) for ext in [".jpg",".jpeg",".png",".bmp",".webp"]], [])
                if files:
                    cname = canonical_name(ptype.name, sev.name)
                    class_to_files.setdefault(cname, []).extend(sorted(files))
    class_names = sorted(class_to_files.keys())
    files = []
    labels = []
    for i, cname in enumerate(class_names):
        fs = class_to_files[cname]
        files.extend(fs)
        labels.extend([i]*len(fs))
    return class_names, files, labels, class_to_files

# --- escanea train y val ---
class_names, train_files_all, train_labels_all, train_map = scan_leaf_dirs(Path(TRAIN_DIR))
_,          val_files_all,   val_labels_all,   _         = scan_leaf_dirs(Path(VAL_DIR))

NUM_CLASSES = len(class_names)
print("Clases:", NUM_CLASSES, class_names)

# ---- guarda clases (para inferencia) ----
CLASSES_JSON.write_text(json.dumps({"class_names": class_names}, ensure_ascii=False, indent=2), encoding="utf-8")
print("Clases guardadas en", CLASSES_JSON)

# ------------- muestreo balanceado para train ------------------
# objetivo por clase / época
TARGET_PER_CLASS = 300
CAP_SIN_GRIETAS  = 600  # para sin_grietas permitimos más si quieres
IMG_EXTS = [".jpg",".jpeg",".png",".bmp",".webp"]

def make_aug_layer():
    return keras.Sequential([
        keras.layers.RandomFlip("horizontal"),
        keras.layers.RandomRotation(0.08),
        keras.layers.RandomZoom(0.2),
        keras.layers.RandomContrast(0.2),
        keras.layers.RandomBrightness(0.2),
    ], name="aug")

AUG = make_aug_layer()

def load_preprocess(path, label):
    img = tf.io.read_file(path)
    img = tf.image.decode_image(img, channels=3, expand_animations=False)
    img = tf.image.resize(img, IMG_SIZE)
    img = tf.cast(img, tf.float32) / 255.0
    img = (img - 0.5) * 2.0
    y   = tf.one_hot(label, depth=NUM_CLASSES)
    return img, y

def load_preprocess_val(path, label):
    img = tf.io.read_file(path)
    img = tf.image.decode_image(img, channels=3, expand_animations=False)
    img = tf.image.resize(img, IMG_SIZE)
    img = tf.cast(img, tf.float32) / 255.0
    img = (img - 0.5) * 2.0
    y   = tf.one_hot(label, depth=NUM_CLASSES)
    return img, y

# construir datasets por clase con oversampling + augment
per_class_datasets = []
counts = []
for cid, cname in enumerate(class_names):
    paths = train_map[cname]
    n = len(paths)
    counts.append(n)
    tgt = CAP_SIN_GRIETAS if cname == "sin_grietas" else TARGET_PER_CLASS
    if n >= tgt:
        chosen = random.sample(paths, tgt)
    else:
        # oversampling con reposición
        chosen = [random.choice(paths) for _ in range(tgt)]
    ds = tf.data.Dataset.from_tensor_slices((chosen, [cid]*len(chosen)))
    ds = ds.shuffle(len(chosen), seed=42)
    ds = ds.map(lambda p,l: load_preprocess(p,l), num_parallel_calls=AUTOTUNE)
    ds = ds.map(lambda x,y: (AUG(x, training=True), y), num_parallel_calls=AUTOTUNE)
    per_class_datasets.append(ds.repeat())

print("Recuento original por clase:", dict(zip(class_names, counts)))

# mezcla balanceada (cada clase aporta ~1/NUM_CLASSES)
train_ds = tf.data.Dataset.sample_from_datasets(per_class_datasets, weights=[1.0/NUM_CLASSES]*NUM_CLASSES, seed=42)
train_ds = train_ds.batch(BATCH_SIZE).prefetch(AUTOTUNE)

# ----- val ds (sin augment, sin balance) -----
val_ds = tf.data.Dataset.from_tensor_slices((val_files_all, val_labels_all))
val_ds = val_ds.map(load_preprocess_val, num_parallel_calls=AUTOTUNE).batch(BATCH_SIZE).prefetch(AUTOTUNE)



Clases: 10 ['compresion_vertical_Grave', 'compresion_vertical_Leve', 'compresion_vertical_Moderada', 'friccion_cortante_escalonada_Grave', 'friccion_cortante_escalonada_Leve', 'friccion_cortante_escalonada_Moderada', 'sin_grietas', 'tension_diagonal_inclinadas_Grave', 'tension_diagonal_inclinadas_Leve', 'tension_diagonal_inclinadas_Moderada']
Clases guardadas en C:\Users\User\mansory\modelos\classes.json
Recuento original por clase: {'compresion_vertical_Grave': 279, 'compresion_vertical_Leve': 22, 'compresion_vertical_Moderada': 688, 'friccion_cortante_escalonada_Grave': 330, 'friccion_cortante_escalonada_Leve': 28, 'friccion_cortante_escalonada_Moderada': 1959, 'sin_grietas': 23553, 'tension_diagonal_inclinadas_Grave': 18, 'tension_diagonal_inclinadas_Leve': 3, 'tension_diagonal_inclinadas_Moderada': 231}


**Modelo (MobileNetV2 + cabeza densa)**

In [None]:
# ==== Celda 4B: Focal Loss + entrenamiento ====
import numpy as np
from tensorflow import keras
import tensorflow as tf

# calcula alpha (Cui et al. "effective number of samples")
beta = 0.9999
counts_arr = np.array([len(train_map[c]) for c in class_names], dtype=np.float32)
eff_num = (1 - np.power(beta, counts_arr)) / (1 - beta)
alpha = (np.sum(eff_num) / eff_num)
alpha = alpha / np.mean(alpha)  # normaliza
alpha_tf = tf.constant(alpha, dtype=tf.float32)
print("alpha:", np.round(alpha, 3))

def categorical_focal_loss(alpha_vec, gamma=2.0):
    def loss(y_true, y_pred):
        eps = tf.keras.backend.epsilon()
        y_pred = tf.clip_by_value(y_pred, eps, 1. - eps)
        ce = -y_true * tf.math.log(y_pred)
        weight = tf.pow(1 - y_pred, gamma) * alpha_vec
        return tf.reduce_sum(weight * ce, axis=-1)
    return loss

# modelo (puedes mantener tu arquitectura actual si ya la tienes)
base = keras.applications.MobileNetV2(
    include_top=False, weights="imagenet", input_shape=(*IMG_SIZE,3)
)
base.trainable = False  # warmup
x = keras.layers.GlobalAveragePooling2D()(base.output)
x = keras.layers.Dropout(0.3)(x)
out = keras.layers.Dense(NUM_CLASSES, activation="softmax")(x)
model = keras.Model(base.input, out, name="mansory_mobilenetv2")

loss_fn = categorical_focal_loss(alpha_tf, gamma=2.0)
model.compile(optimizer=keras.optimizers.Adam(3e-4), loss=loss_fn, metrics=["accuracy"])

# callbacks
es  = keras.callbacks.EarlyStopping(patience=6, restore_best_weights=True, monitor="val_accuracy")
rlr = keras.callbacks.ReduceLROnPlateau(patience=2, factor=0.5, monitor="val_accuracy")
ck  = keras.callbacks.ModelCheckpoint(str(MODEL_REGULAR), monitor="val_accuracy",
                                      save_best_only=True, save_weights_only=False)

class Save95(keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs=None):
        if (logs or {}).get("val_accuracy", 0) >= 0.95:
            self.model.save(str(MODEL_BEST95), include_optimizer=False)
            print(f"\n✅ Guardado {MODEL_BEST95} (val_acc ≥ 95%)")

history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=30,
    callbacks=[es, rlr, ck, Save95()],
    verbose=1
)

# unfreeze fino (opcional): afina últimas capas si val_acc se estanca
base.trainable = True
for layer in base.layers[:-40]:
    layer.trainable = False
model.compile(optimizer=keras.optimizers.Adam(1e-4), loss=loss_fn, metrics=["accuracy"])
history2 = model.fit(
    train_ds, validation_data=val_ds, epochs=10, callbacks=[es, rlr, ck, Save95()], verbose=1
)

# guardamos clases de nuevo por si entrenas desde cero
CLASSES_JSON.write_text(json.dumps({"class_names": class_names}, ensure_ascii=False, indent=2), encoding="utf-8")
print("\nClases guardadas en", CLASSES_JSON)
print("Modelo mejor por val_acc en:", MODEL_REGULAR)
if Path(MODEL_BEST95).exists():
    print("También se guardó:", MODEL_BEST95)


alpha: [7.500e-02 9.410e-01 3.100e-02 6.400e-02 7.400e-01 1.200e-02 2.000e-03
 1.150e+00 6.895e+00 9.100e-02]
Epoch 1/30
  37373/Unknown [1m31064s[0m 831ms/step - accuracy: 0.1664 - loss: 0.5181

**Callbacks (EarlyStopping, LR, Checkpoints, ≥95%)**

In [48]:
from pathlib import Path
import json, numpy as np
import keras
from tensorflow.keras.utils import load_img, img_to_array

# Carpeta base donde están tus archivos (ajústala si fuera distinto)
BASE = Path(r"C:\Users\User\mansory")

# 1) Cargar clases (prioriza el classes.json de la raíz)
CLASSES_JSON = BASE / "classes.json"
if not CLASSES_JSON.exists():
    CLASSES_JSON = BASE / "modelos" / "classes.json"
class_names = json.loads(CLASSES_JSON.read_text(encoding="utf-8"))["class_names"]
print("Nº de clases en JSON:", len(class_names), "→", CLASSES_JSON)

# 2) Listar modelos .keras (en raíz y en /modelos)
keras_files = [*BASE.glob("*.keras"), *(BASE/"modelos").glob("*.keras")]
print("Modelos encontrados:")
for kf in keras_files: print(" -", kf)

# 3) Cargar cada modelo y quedarnos con el que tenga el mismo nº de salidas
picked = None
picked_in_size = None
for kf in keras_files:
    try:
        m = keras.models.load_model(kf)
        num_out = m.output_shape[-1]
        in_shape = m.input_shape[1:4]  # (H, W, C)
        print(f"   {kf.name}: salidas={num_out}, input={in_shape}")
        if num_out == len(class_names):
            picked = kf
            picked_in_size = (int(in_shape[0]), int(in_shape[1]))
            break
    except Exception as e:
        print(f"   {kf.name}: no se pudo cargar -> {e}")

assert picked is not None, (
    "No encontré un .keras con el mismo número de clases que tu JSON. "
    "Deja solo el modelo correcto (de 10 clases) o borra/renombra los viejos."
)

print("\n✅ Usaré:", picked)
print("   Tamaño de entrada esperado:", picked_in_size)

# Deja el modelo y tamaño listos para la siguiente celda
MODEL_PATH = picked
TARGET_SIZE = picked_in_size
model = keras.models.load_model(MODEL_PATH)



Nº de clases en JSON: 10 → C:\Users\User\mansory\classes.json
Modelos encontrados:
 - C:\Users\User\mansory\modelo_mansory.keras
 - C:\Users\User\mansory\modelo_mansory_95.keras
 - C:\Users\User\mansory\modelos\modelo_mansory.keras
   modelo_mansory.keras: salidas=4, input=(160, 160, 3)
   modelo_mansory_95.keras: salidas=10, input=(224, 224, 3)

✅ Usaré: C:\Users\User\mansory\modelo_mansory_95.keras
   Tamaño de entrada esperado: (224, 224)


In [51]:
def split_type_severity(name: str):
    name = name.strip()
    if name.lower().replace("__", "_") == "sin_grietas":
        return "sin grietas", "-"
    parts = name.split("_")
    # La última palabra es la severidad, lo demás es el tipo
    severidad = parts[-1]
    tipo = " ".join(parts[:-1]).replace("_", " ")
    return tipo, severidad

def predict_image(img_path):
    img = load_img(img_path, target_size=TARGET_SIZE)
    x = img_to_array(img)
    x = x / 255.0
    x = (x - 0.5) * 2.0
    x = np.expand_dims(x, 0)

    pred = model.predict(x, verbose=0)
    pred_id = int(np.argmax(pred[0]))
    if pred_id >= len(class_names):
        raise RuntimeError(
            f"pred_id {pred_id} fuera de rango para {len(class_names)} clases. "
            "Asegúrate de que el modelo y el JSON corresponden."
        )
    tipo, sev = split_type_severity(class_names[pred_id])
    print(f"Tipo de grieta: {tipo}\nSeveridad: {sev}")

# Prueba con tu imagen (ajústala si quieres otra)
predict_image(BASE / "grieta-muro.jpg")
# Ejemplos:
# predict_image(BASE / "test" / "compresion_vertical_Grave" / "55.jpg")
# predict_image(BASE / "grieta-muro.jpg")


Tipo de grieta: sin grietas
Severidad: -
