In [2]:
import os
from pathlib import Path
import numpy as np
import pandas as pd
import tensorflow as tf

from sklearn.preprocessing import MultiLabelBinarizer
from iterstrat.ml_stratifiers import MultilabelStratifiedKFold

import keras
from keras.applications import EfficientNetB4, ResNet50
from keras import layers, models
from keras.metrics import AUC, Precision, Recall
from keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from keras import mixed_precision

# =========================
# Mixed precision (fp16)
# =========================
mixed_precision.set_global_policy("mixed_float16")

# ------------------------------------------------------------
# 1) Caminhos e hiperparâmetros
# ------------------------------------------------------------
DATA_DIR = Path("../data")  # ajuste se necessário
TRAIN_CSV = DATA_DIR / "train.csv"
TRAIN_DIR = DATA_DIR / "train_images"

IMG_SIZE = (224, 224)
BATCH_SIZE = 64
SEED = 42
EPOCHS = 10
AUTOTUNE = tf.data.AUTOTUNE
rng = np.random.default_rng(SEED)

# ------------------------------------------------------------
# 2) Carregar labels e montar vetor multi-hot
# ------------------------------------------------------------
df = pd.read_csv(TRAIN_CSV)
df["labels"] = df["labels"].astype(str).str.strip().str.split()
mlb = MultiLabelBinarizer()
y = mlb.fit_transform(df["labels"]).astype("float32")
X = df["image"].values
class_names = list(mlb.classes_)
num_classes = len(class_names)
print("Classes:", class_names)

# ------------------------------------------------------------
# 3) tf.data helpers (leitura, resize, normalização, augment)
# ------------------------------------------------------------
# Augment na GPU
data_augment = keras.Sequential(
    [
        layers.RandomFlip("horizontal"),
        layers.RandomRotation(0.1),
        layers.RandomZoom(0.2),
        layers.RandomContrast(0.1),
    ],
    name="augment",
)


def _load_image(path):
    img = tf.io.read_file(path)
    # use decode_png se suas imagens forem .png
    img = tf.io.decode_jpeg(img, channels=3)
    img = tf.image.resize(img, IMG_SIZE, antialias=True)
    img = tf.cast(img, tf.float32) / 255.0
    return img


def make_ds(paths, labels, training: bool):
    """Cria um tf.data.Dataset de (image, multi_hot) com pipeline rápido."""
    paths = tf.convert_to_tensor(paths)
    labels = tf.convert_to_tensor(labels, dtype=tf.float32)

    ds = tf.data.Dataset.from_tensor_slices((paths, labels))
    if training:
        # embaralha bem (não gigante pra não explodir RAM)
        buffer = min(10000, len(paths))
        ds = ds.shuffle(buffer, seed=SEED, reshuffle_each_iteration=True)

    # paraleliza leitura/decodificação
    ds = ds.map(lambda p, y: (_load_image(p), y), num_parallel_calls=AUTOTUNE)

    ds = ds.cache(
        f".cache/cached_{'train' if training else 'val'}"
    )  # habilite se couber na RAM/SSD (ou use .cache('arquivo'))

    if training:
        ds = ds.map(
            lambda x, y: (data_augment(x, training=True), y),
            num_parallel_calls=AUTOTUNE,
        )

    ds = ds.batch(BATCH_SIZE, drop_remainder=False)
    ds = ds.prefetch(AUTOTUNE)

    # para máximo throughput
    options = tf.data.Options()
    options.experimental_deterministic = False
    ds = ds.with_options(options)
    return ds


# ------------------------------------------------------------
# helper: cria modelo (novo a cada fold)
# ------------------------------------------------------------
def build_model(num_classes: int):
    # base_model = ResNet50(
    #     weights="imagenet", include_top=False, input_shape=(IMG_SIZE[0], IMG_SIZE[1], 3)
    # )
    base_model = EfficientNetB4(
        weights="imagenet", include_top=False, input_shape=(IMG_SIZE[0], IMG_SIZE[1], 3)
    )
    base_model.trainable = False  # fase 1: só a cabeça

    inputs = keras.Input(shape=(IMG_SIZE[0], IMG_SIZE[1], 3))
    x = base_model(inputs, training=False)
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dense(256, activation="relu")(x)
    x = layers.Dropout(0.5)(x)

    pos = y.sum(axis=0)  # positivos por classe
    neg = y.shape[0] - pos
    prior = np.clip(pos / (pos + neg), 1e-6, 1 - 1e-6)
    bias_init = np.log(prior / (1 - prior))
    outputs = layers.Dense(
        num_classes,
        activation="sigmoid",
        bias_initializer=keras.initializers.Constant(bias_init),
        dtype="float32",
    )(x)

    model = models.Model(inputs, outputs)
    model.compile(
        optimizer="adam",
        loss="binary_crossentropy",
        metrics=[
            "accuracy",
            Precision(name="precision", thresholds=0.3),
            Recall(name="recall", thresholds=0.3),
            AUC(name="auc", multi_label=True),
        ],
    )
    return model


# ------------------------------------------------------------
# 4) K-Fold Cross-Validation (4 folds) com tf.data
# ------------------------------------------------------------
mskf = MultilabelStratifiedKFold(n_splits=4, shuffle=True, random_state=SEED)
fold_metrics = []

os.makedirs(".cache", exist_ok=True)

for fold, (train_idx, val_idx) in enumerate(mskf.split(X, y), start=1):
    print(f"\n===== FOLD {fold}/4 =====")
    X_train, X_val = X[train_idx], X[val_idx]
    y_train, y_val = y[train_idx].astype("float32"), y[val_idx].astype("float32")

    X_train_paths = [str(TRAIN_DIR / fname) for fname in X_train]
    X_val_paths = [str(TRAIN_DIR / fname) for fname in X_val]

    train_ds = make_ds(X_train_paths, y_train, training=True)
    val_ds = make_ds(X_val_paths, y_val, training=False)

    # modelo novo por fold
    model = build_model(num_classes)

    callbacks = [
        EarlyStopping(
            monitor="val_loss", patience=4, min_delta=0.003, restore_best_weights=True
        ),
        ReduceLROnPlateau(
            monitor="val_loss", factor=0.2, patience=2, min_delta=0.005, min_lr=1e-6
        ),
        ModelCheckpoint(
            f"best_fold{fold}.keras", monitor="val_loss", save_best_only=True
        ),
    ]

    history = model.fit(
        train_ds,
        validation_data=val_ds,
        epochs=EPOCHS,
        verbose=1,
        callbacks=callbacks,
    )

    # Avaliação neste fold
    fold_result = model.evaluate(val_ds, return_dict=True, verbose=1)
    fold_result["fold"] = fold
    fold_metrics.append(fold_result)

    print(
        f"Fold {fold} -> "
        f"val_loss={fold_result['loss']:.4f} | "
        f"val_acc={fold_result['accuracy']:.4f} | "
        f"val_prec={fold_result['precision']:.4f} | "
        f"val_rec={fold_result['recall']:.4f} | "
        f"val_auc={fold_result['auc']:.4f}"
    )

    # Limpa cache para o próximo fold
    keras.backend.clear_session()

    for arquivo in os.listdir(".cache"):
        caminho_arquivo = os.path.join(".cache", arquivo)
        if os.path.isfile(caminho_arquivo):
            os.remove(caminho_arquivo)


# ------------------------------------------------------------
# 5) Resumo dos 4 folds (média ± desvio)
# ------------------------------------------------------------
fold_df = pd.DataFrame(fold_metrics).set_index("fold")
print("\nResultados por fold:")
print(fold_df.round(4))

summary = fold_df.agg(["mean", "std"]).round(4)
print("\nMédia e desvio (4 folds):")
print(summary)

# ------------------------------------------------------------
# (Opcional) Fine-tuning por fold
# ------------------------------------------------------------
# Depois da fase inicial, você pode descongelar parte da ResNet e rodar mais épocas:
# base_model = model.layers[1]  # se usar Model(inputs, outputs), ajuste índice conforme seu grafo
# base_model.trainable = True
# for layer in base_model.layers[:-30]:
#     layer.trainable = False
# model.compile(
#     optimizer=keras.optimizers.Adam(1e-5),
#     loss="binary_crossentropy",
#     metrics=["accuracy", Precision(name="precision"), Recall(name="recall"),
#              AUC(name="auc", multi_label=True)]
# )
# model.fit(train_ds, validation_data=val_ds, epochs=EPOCHS_FT, callbacks=callbacks)

Classes: ['complex', 'frog_eye_leaf_spot', 'healthy', 'powdery_mildew', 'rust', 'scab']

===== FOLD 1/4 =====
Epoch 1/10


2025-10-04 14:39:33.429702: I external/local_xla/xla/service/service.cc:163] XLA service 0x71a544001e20 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2025-10-04 14:39:33.429879: I external/local_xla/xla/service/service.cc:171]   StreamExecutor device (0): NVIDIA GeForce RTX 3070, Compute Capability 8.6
2025-10-04 14:39:34.777513: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2025-10-04 14:39:37.192869: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:473] Loaded cuDNN version 91301
I0000 00:00:1759599606.111314   54657 device_compiler.h:196] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m218/219[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 330ms/step - accuracy: 0.2575 - auc: 0.4939 - loss: 0.4504 - precision: 0.2930 - recall: 0.2060

2025-10-04 14:41:34.710286: I external/local_xla/xla/service/gpu/autotuning/dot_search_space.cc:208] All configs were filtered out because none of them sufficiently match the hints. Maybe the hints set does not contain a good representative set of valid configs? Working around this by using the full hints set instead.


[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 529ms/step - accuracy: 0.2575 - auc: 0.4939 - loss: 0.4504 - precision: 0.2930 - recall: 0.2061

2025-10-04 14:43:00.011645: W tensorflow/core/kernels/data/cache_dataset_ops.cc:333] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.


[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m255s[0m 897ms/step - accuracy: 0.2575 - auc: 0.4939 - loss: 0.4504 - precision: 0.2930 - recall: 0.2062 - val_accuracy: 0.2580 - val_auc: 0.5001 - val_loss: 0.4472 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00 - learning_rate: 0.0010
Epoch 2/10
[1m  1/219[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m43s[0m 200ms/step - accuracy: 0.3281 - auc: 0.4312 - loss: 0.4323 - precision: 0.2759 - recall: 0.1194

2025-10-04 14:43:35.230002: W tensorflow/core/kernels/data/cache_dataset_ops.cc:333] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.


[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 129ms/step - accuracy: 0.2646 - auc: 0.4917 - loss: 0.4458 - precision: 0.3075 - recall: 0.2698 - val_accuracy: 0.2580 - val_auc: 0.5000 - val_loss: 0.4471 - val_precision: 0.3078 - val_recall: 0.2829 - learning_rate: 0.0010
Epoch 3/10
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 133ms/step - accuracy: 0.2650 - auc: 0.4953 - loss: 0.4457 - precision: 0.3124 - recall: 0.2849 - val_accuracy: 0.2580 - val_auc: 0.5000 - val_loss: 0.4471 - val_precision: 0.3078 - val_recall: 0.2829 - learning_rate: 0.0010
Epoch 4/10
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 138ms/step - accuracy: 0.2650 - auc: 0.4993 - loss: 0.4456 - precision: 0.3113 - recall: 0.2883 - val_accuracy: 0.2580 - val_auc: 0.5000 - val_loss: 0.4471 - val_precision: 0.3078 - val_recall: 0.2829 - learning_rate: 2.0000e-04
Epoch 5/10
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 146ms/step - accurac

2025-10-04 14:46:21.380129: E external/local_xla/xla/stream_executor/cuda/cuda_timer.cc:86] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please investigate in Nsight Systems.
2025-10-04 14:46:21.582028: E external/local_xla/xla/stream_executor/cuda/cuda_timer.cc:86] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please investigate in Nsight Systems.
2025-10-04 14:46:22.043733: E external/local_xla/xla/stream_executor/cuda/cuda_timer.cc:86] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please investigate in Nsight Systems.
2025-10-04 14:46:22.246139: E external/local_xla/xla/stream_executor/cuda/cuda_timer.cc:86] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please investigate in Nsight Systems.
2025-10-04 14:46:22.886108: E external/local_xla/xla/stream_

[1m218/218[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 348ms/step - accuracy: 0.2501 - auc: 0.4982 - loss: 0.4523 - precision: 0.2901 - recall: 0.1903

2025-10-04 14:48:32.873849: W tensorflow/core/kernels/data/cache_dataset_ops.cc:333] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.
2025-10-04 14:48:39.337784: E external/local_xla/xla/stream_executor/cuda/cuda_timer.cc:86] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please investigate in Nsight Systems.
2025-10-04 14:48:39.526545: E external/local_xla/xla/stream_executor/cuda/cuda_timer.cc:86] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please investigate in Nsight Systems.
2025-10-04 14:48:39.720491: E external/local_xla/xla/stream_executor/cuda/cuda_timer.cc:86] Delay kernel t

[1m218/218[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m185s[0m 607ms/step - accuracy: 0.2501 - auc: 0.4983 - loss: 0.4523 - precision: 0.2901 - recall: 0.1904 - val_accuracy: 0.2607 - val_auc: 0.4995 - val_loss: 0.4454 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00 - learning_rate: 0.0010
Epoch 2/10


2025-10-04 14:48:53.666050: W tensorflow/core/kernels/data/cache_dataset_ops.cc:333] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.


[1m218/218[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 176ms/step - accuracy: 0.2633 - auc: 0.5025 - loss: 0.4466 - precision: 0.3118 - recall: 0.2640 - val_accuracy: 0.2607 - val_auc: 0.5000 - val_loss: 0.4450 - val_precision: 0.3051 - val_recall: 0.2829 - learning_rate: 0.0010
Epoch 3/10
[1m218/218[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 173ms/step - accuracy: 0.2617 - auc: 0.5014 - loss: 0.4465 - precision: 0.3083 - recall: 0.2847 - val_accuracy: 0.2607 - val_auc: 0.5000 - val_loss: 0.4450 - val_precision: 0.3051 - val_recall: 0.2829 - learning_rate: 0.0010
Epoch 4/10
[1m218/218[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 175ms/step - accuracy: 0.2617 - auc: 0.5000 - loss: 0.4465 - precision: 0.3083 - recall: 0.2847 - val_accuracy: 0.2607 - val_auc: 0.5000 - val_loss: 0.4450 - val_precision: 0.3051 - val_recall: 0.2829 - learning_rate: 2.0000e-04
Epoch 5/10
[1m218/218[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 171ms/step - accurac


2025-10-04 14:53:36.372760: E external/local_xla/xla/stream_executor/cuda/cuda_timer.cc:86] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please investigate in Nsight Systems.
2025-10-04 14:53:36.565544: E external/local_xla/xla/stream_executor/cuda/cuda_timer.cc:86] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please investigate in Nsight Systems.
2025-10-04 14:53:37.029704: E external/local_xla/xla/stream_executor/cuda/cuda_timer.cc:86] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please investigate in Nsight Systems.
2025-10-04 14:53:37.222612: E external/local_xla/xla/stream_executor/cuda/cuda_timer.cc:86] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please investigate in Nsight Systems.
2025-10-04 14:53:37.641297: E external/local_xla/xla/stream

[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 472ms/step - accuracy: 0.2592 - auc: 0.4980 - loss: 0.4505 - precision: 0.3011 - recall: 0.2334

2025-10-04 14:54:23.567656: W tensorflow/core/kernels/data/cache_dataset_ops.cc:333] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.
2025-10-04 14:54:29.332101: E external/local_xla/xla/stream_executor/cuda/cuda_timer.cc:86] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please investigate in Nsight Systems.
2025-10-04 14:54:29.525869: E external/local_xla/xla/stream_executor/cuda/cuda_timer.cc:86] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please investigate in Nsight Systems.
2025-10-04 14:54:29.927819: E external/local_xla/xla/stream_executor/cuda/cuda_timer.cc:86] Delay kernel t

[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m184s[0m 704ms/step - accuracy: 0.2592 - auc: 0.4980 - loss: 0.4505 - precision: 0.3011 - recall: 0.2335 - val_accuracy: 0.2606 - val_auc: 0.5000 - val_loss: 0.4451 - val_precision: 0.3053 - val_recall: 0.2829 - learning_rate: 0.0010
Epoch 2/10


2025-10-04 14:54:42.263090: W tensorflow/core/kernels/data/cache_dataset_ops.cc:333] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.


[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 169ms/step - accuracy: 0.2637 - auc: 0.4983 - loss: 0.4463 - precision: 0.3114 - recall: 0.2815 - val_accuracy: 0.2606 - val_auc: 0.5000 - val_loss: 0.4451 - val_precision: 0.3053 - val_recall: 0.2829 - learning_rate: 0.0010
Epoch 3/10
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 166ms/step - accuracy: 0.2637 - auc: 0.4989 - loss: 0.4463 - precision: 0.3107 - recall: 0.2868 - val_accuracy: 0.2606 - val_auc: 0.5000 - val_loss: 0.4451 - val_precision: 0.3053 - val_recall: 0.2829 - learning_rate: 0.0010
Epoch 4/10
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 168ms/step - accuracy: 0.2637 - auc: 0.4980 - loss: 0.4463 - precision: 0.3107 - recall: 0.2868 - val_accuracy: 0.2606 - val_auc: 0.5000 - val_loss: 0.4451 - val_precision: 0.3053 - val_recall: 0.2829 - learning_rate: 2.0000e-04
Epoch 5/10
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 167ms/step - accurac

2025-10-04 14:59:18.713638: I external/local_xla/xla/service/gpu/autotuning/dot_search_space.cc:208] All configs were filtered out because none of them sufficiently match the hints. Maybe the hints set does not contain a good representative set of valid configs? Working around this by using the full hints set instead.



2025-10-04 14:59:26.552511: E external/local_xla/xla/stream_executor/cuda/cuda_timer.cc:86] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please investigate in Nsight Systems.
2025-10-04 14:59:26.749971: E external/local_xla/xla/stream_executor/cuda/cuda_timer.cc:86] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please investigate in Nsight Systems.
2025-10-04 14:59:27.167106: E external/local_xla/xla/stream_executor/cuda/cuda_timer.cc:86] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please inv

[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 465ms/step - accuracy: 0.2517 - auc: 0.5009 - loss: 0.4511 - precision: 0.3025 - recall: 0.2255

2025-10-04 15:00:16.947493: W tensorflow/core/kernels/data/cache_dataset_ops.cc:333] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.
2025-10-04 15:00:23.797650: E external/local_xla/xla/stream_executor/cuda/cuda_timer.cc:86] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please investigate in Nsight Systems.
2025-10-04 15:00:23.990386: E external/local_xla/xla/stream_executor/cuda/cuda_timer.cc:86] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please investigate in Nsight Systems.
2025-10-04 15:00:24.185202: E external/local_xla/xla/stream_executor/cuda/cuda_timer.cc:86] Delay kernel t

[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m189s[0m 717ms/step - accuracy: 0.2517 - auc: 0.5009 - loss: 0.4511 - precision: 0.3025 - recall: 0.2256 - val_accuracy: 0.2568 - val_auc: 0.5000 - val_loss: 0.4472 - val_precision: 0.3082 - val_recall: 0.2831 - learning_rate: 0.0010
Epoch 2/10


2025-10-04 15:00:38.336579: W tensorflow/core/kernels/data/cache_dataset_ops.cc:333] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.


[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 170ms/step - accuracy: 0.2589 - auc: 0.4974 - loss: 0.4476 - precision: 0.3052 - recall: 0.2821 - val_accuracy: 0.2568 - val_auc: 0.5000 - val_loss: 0.4472 - val_precision: 0.3082 - val_recall: 0.2831 - learning_rate: 0.0010
Epoch 3/10
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 165ms/step - accuracy: 0.2589 - auc: 0.4971 - loss: 0.4476 - precision: 0.3052 - recall: 0.2821 - val_accuracy: 0.2568 - val_auc: 0.5000 - val_loss: 0.4472 - val_precision: 0.3082 - val_recall: 0.2831 - learning_rate: 0.0010
Epoch 4/10
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 166ms/step - accuracy: 0.2589 - auc: 0.5000 - loss: 0.4476 - precision: 0.3052 - recall: 0.2821 - val_accuracy: 0.2568 - val_auc: 0.5000 - val_loss: 0.4472 - val_precision: 0.3082 - val_recall: 0.2831 - learning_rate: 2.0000e-04
Epoch 5/10
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 166ms/step - accurac

In [3]:
fold_df.to_csv("folds_results_efficientnetb4.csv")