In [None]:
# 0) Instalar dependencias
!pip install -q tensorflow opencv-python numpy matplotlib scikit-learn pandas

# 1) Imports + seed
import os, re, cv2, numpy as np, tensorflow as tf, matplotlib.pyplot as plt, pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow import keras
from tensorflow.keras import layers

SEED = 123
tf.keras.utils.set_random_seed(SEED)

# 2) Configuración
DATA_DIR = "/ruta/a/PlantVillage/color"   # <-- CAMBIAR a tu carpeta
OUT_DIR = "models"
os.makedirs(OUT_DIR, exist_ok=True)

IMG_SIZE = 300   # EfficientNetB3
BATCH = 32

# Patrón para clases de maíz
MAIZ_REGEX = r"^Corn(?:_\(maize\))?___"
LIMPIA = lambda s: re.sub(MAIZ_REGEX, "", s).replace("_", " ").strip()

# 3) Filtrar solo maíz
filepaths, labels = [], []
for cls in os.listdir(DATA_DIR):
    d = os.path.join(DATA_DIR, cls)
    if not os.path.isdir(d): 
        continue
    if re.match(MAIZ_REGEX, cls, flags=re.IGNORECASE):
        for f in os.listdir(d):
            if f.lower().endswith((".jpg",".jpeg",".png")):
                filepaths.append(os.path.join(d,f))
                labels.append(LIMPIA(cls))

df = pd.DataFrame({"filepath": filepaths, "label": labels})
classes = sorted(df["label"].unique())
print("Clases:", classes, "Total imágenes:", len(df))

# 4) Splits
train_df, tmp_df = train_test_split(df, train_size=0.8, stratify=df["label"], random_state=SEED, shuffle=True)
valid_df, test_df = train_test_split(tmp_df, train_size=0.5, stratify=tmp_df["label"], random_state=SEED, shuffle=True)

# 5) Preprocesado CLAHE + AMF aprox
CLAHE_CLIP, CLAHE_TILE = 2.0, (8,8)
def preprocess_cv(rgb):
    lab = cv2.cvtColor(rgb, cv2.COLOR_RGB2LAB)
    L,a,b = cv2.split(lab)
    clahe = cv2.createCLAHE(clipLimit=CLAHE_CLIP, tileGridSize=CLAHE_TILE)
    Lc = clahe.apply(L)
    rgb_c = cv2.cvtColor(cv2.merge([Lc,a,b]), cv2.COLOR_LAB2RGB)
    rgb_c = cv2.medianBlur(rgb_c, 3)
    rgb_r = cv2.resize(rgb_c, (IMG_SIZE, IMG_SIZE), interpolation=cv2.INTER_AREA)
    return rgb_r.astype("float32")/255.0

# 6) tf.data pipeline
label_to_id = {c:i for i,c in enumerate(classes)}
id_to_label = {i:c for c,i in label_to_id.items()}

def gen(rows):
    for _,r in rows.iterrows():
        yield r["filepath"], label_to_id[r["label"]]

def load_and_preprocess(path, y):
    img = tf.io.read_file(path)
    img = tf.io.decode_image(img, channels=3, expand_animations=False)
    img = tf.numpy_function(preprocess_cv, [img], Tout=tf.float32)
    img.set_shape((IMG_SIZE, IMG_SIZE, 3))
    y = tf.one_hot(y, depth=len(classes))
    return img, y

def make_ds(dataframe, shuffle=False):
    ds = tf.data.Dataset.from_generator(lambda: gen(dataframe), output_signature=(
        tf.TensorSpec(shape=(), dtype=tf.string),
        tf.TensorSpec(shape=(), dtype=tf.int32)
    ))
    if shuffle: 
        ds = ds.shuffle(4096, seed=SEED)
    ds = ds.map(load_and_preprocess, num_parallel_calls=tf.data.AUTOTUNE)
    ds = ds.batch(BATCH).prefetch(tf.data.AUTOTUNE)
    return ds

train_ds = make_ds(train_df, shuffle=True)
valid_ds = make_ds(valid_df)
test_ds  = make_ds(test_df)

# 7) Modelo EfficientNetB3
base = keras.applications.EfficientNetB3(include_top=False, weights="imagenet",
                                         input_shape=(IMG_SIZE,IMG_SIZE,3), pooling="avg")
x = layers.BatchNormalization()(base.output)
x = layers.Dense(256, activation="relu", kernel_regularizer=keras.regularizers.l2(1e-4))(x)
x = layers.Dropout(0.4)(x)
out = layers.Dense(len(classes), activation="softmax")(x)
model = keras.Model(base.input, out)

# Warm-up
for l in base.layers: 
    l.trainable = False
model.compile(optimizer=keras.optimizers.Adam(1e-3), loss="categorical_crossentropy", metrics=["accuracy"])

cb = [
    keras.callbacks.ReduceLROnPlateau(monitor="val_loss", factor=0.5, patience=2, min_lr=1e-6, verbose=1),
    keras.callbacks.EarlyStopping(monitor="val_loss", patience=6, restore_best_weights=True)
]

model.fit(train_ds, validation_data=valid_ds, epochs=5, callbacks=cb, verbose=1)

# Fine-tune
for l in base.layers: 
    l.trainable = True
model.compile(optimizer=keras.optimizers.Adam(1e-4), loss="categorical_crossentropy", metrics=["accuracy"])
model.fit(train_ds, validation_data=valid_ds, epochs=20, callbacks=cb, verbose=1)

# 8) Evaluación y guardado
print("Test:", model.evaluate(test_ds, verbose=0))
MODEL_PATH = os.path.join(OUT_DIR, "maiz_efficientnetb3.h5")
model.save(MODEL_PATH)
print("Modelo guardado en:", MODEL_PATH)

# 9) Inferencia sobre una foto
FOTO_EXTERNA = "/ruta/a/tu_foto.jpg"  # <-- CAMBIAR
def cargar_rgb(path):
    bgr = cv2.imread(path)
    if bgr is None:
        raise FileNotFoundError(f"No se pudo leer {path}")
    return cv2.cvtColor(bgr, cv2.COLOR_BGR2RGB)

try:
    rgb = cargar_rgb(FOTO_EXTERNA)
    x  = preprocess_cv(rgb)
    probs = model.predict(x[None,...], verbose=0)[0]
    k = int(np.argmax(probs)); p = float(probs[k])
    print(f"pred: {id_to_label[k]}  p={p:.3f}")
    plt.imshow(rgb); plt.axis("off"); plt.title(f"{id_to_label[k]}  p={p:.3f}")
    plt.show()
except Exception as e:
    print("Error en inferencia:", e)
