In [8]:
import os
import pandas as pd
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout, Layer, Reshape, Multiply, Conv2D, BatchNormalization, Activation, Add, Input, ZeroPadding2D, MaxPooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping, ModelCheckpoint
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.initializers import glorot_uniform
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, roc_curve, auc
from sklearn.preprocessing import label_binarize
from sklearn.model_selection import StratifiedKFold
from tensorflow.keras.utils import register_keras_serializable


os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

tf.keras.mixed_precision.set_global_policy('mixed_float16')

In [9]:
base = '/Users/robp/scm/personal/github.com/robpickerill/thesis'
csv_file = os.path.join(base, 'ham10000_data/HAM10000_metadata.csv')
img_dir = os.path.join(base, 'ham10000_data/images')
file_ext = '.jpg'

df = pd.read_csv(csv_file)
df['image_path'] = df['image_id'].apply(lambda x: os.path.join(img_dir, x + file_ext))

print(df.head())
print(f"Total samples in dataset: {len(df)}")

     lesion_id      image_id   dx dx_type   age   sex localization  \
0  HAM_0000118  ISIC_0027419  bkl   histo  80.0  male        scalp   
1  HAM_0000118  ISIC_0025030  bkl   histo  80.0  male        scalp   
2  HAM_0002730  ISIC_0026769  bkl   histo  80.0  male        scalp   
3  HAM_0002730  ISIC_0025661  bkl   histo  80.0  male        scalp   
4  HAM_0001466  ISIC_0031633  bkl   histo  75.0  male          ear   

                                          image_path  
0  /Users/robp/scm/personal/github.com/robpickeri...  
1  /Users/robp/scm/personal/github.com/robpickeri...  
2  /Users/robp/scm/personal/github.com/robpickeri...  
3  /Users/robp/scm/personal/github.com/robpickeri...  
4  /Users/robp/scm/personal/github.com/robpickeri...  
Total samples in dataset: 10015


In [10]:
import tensorflow as tf
from tensorflow.keras import layers

class SEBlock(layers.Layer):
    """
    Squeeze-and-Excitation (SE) Block

    The SE block recalibrates channel-wise feature responses by explicitly modeling
    interdependencies between channels. Given an input feature map of shape
    (batch, height, width, channels), it:

    1. Squeezes global spatial information into a channel descriptor by using
       global average pooling.
    2. Excites each channel by passing this descriptor through a fully-connected
       bottleneck and expanding transformation, utilizing a sigmoid activation
       to generate channel-wise weights.
    3. Scales the original feature map by these learned weights,
       reinforcing channels that are relevant and suppressing those that are not.

    Parameters
    ----------
    reduction_ratio : int, optional (default=16)
        The reduction ratio used to compute the size of the bottleneck layer.
        For an input with `C` channels, the bottleneck layer will have
        `C // reduction_ratio` channels.

    name : str, optional
        String name for the layer.

    Attributes
    ----------
    fc1 : Dense layer
        The first fully connected layer that reduces the channel dimension.
    fc2 : Dense layer
        The second fully connected layer that expands back to the original channel dimension.
    """
    def __init__(self, reduction_ratio=16, name=None, **kwargs):
        super(SEBlock, self).__init__(name=name, **kwargs)
        self.reduction_ratio = reduction_ratio

    def build(self, input_shape):
        """
        Build the internal layers of the SE block. This method is called
        automatically once the shape of the inputs is known.

        Parameters
        ----------
        input_shape : TensorShape
            The shape of the input tensor. Typically (batch, height, width, channels).
        """
        if len(input_shape) != 4:
            raise ValueError("SEBlock only supports inputs with shape (batch, height, width, channels).")

        channels = input_shape[-1]
        reduced_channels = max(channels // self.reduction_ratio, 1)

        # First FC layer for reduction (C -> C//r)
        self.fc1 = layers.Dense(units=reduced_channels,
                                activation='relu',  # Non-linear activation
                                use_bias=True,
                                name='se_fc1')

        # Second FC layer to restore original dimension (C//r -> C)
        self.fc2 = layers.Dense(units=channels,
                                activation='sigmoid',  # Outputs channel-wise gating weights
                                use_bias=True,
                                name='se_fc2')

        super(SEBlock, self).build(input_shape)

    def call(self, inputs, training=False):
        """
        Forward pass of the SE block.

        Steps:
        1. Global average pooling to get channel-wise statistics of shape (batch, channels).
        2. Pass through the first FC layer (reduction).
        3. Pass through the second FC layer (expansion) with sigmoid activation to get weights.
        4. Reshape weights to (batch, 1, 1, channels) to match the input shape.
        5. Multiply the original inputs by these weights (channel-wise scaling).

        Parameters
        ----------
        inputs : tf.Tensor
            The input tensor with shape (batch, height, width, channels).

        training : bool, optional
            Specifies if the layer should behave in training mode or inference mode.
            This parameter can be used by certain layers that behave differently
            during training and inference.

        Returns
        -------
        tf.Tensor
            The output tensor with the same shape as `inputs`, after channel-wise rescaling.
        """
        # Step 1: Squeeze
        # Global average pooling: (batch, H, W, C) -> (batch, C)
        squeeze_tensor = tf.reduce_mean(inputs, axis=[1, 2], keepdims=False)

        # Step 2 & 3: Excitation
        # FC reduce: (batch, C) -> (batch, C//r)
        reduced = self.fc1(squeeze_tensor, training=training)
        # FC expand: (batch, C//r) -> (batch, C)
        excitation = self.fc2(reduced, training=training)

        # Reshape excitation to (batch, 1, 1, C) for broadcasting
        excitation = tf.reshape(excitation, [-1, 1, 1, tf.shape(inputs)[-1]])

        # Step 4: Scale
        # Scale input by the learned weights: (batch, H, W, C)
        scaled_inputs = inputs * excitation
        return scaled_inputs

    def compute_output_shape(self, input_shape):
        """
        Compute the output shape of the SE block. It remains the same as the input shape.

        Parameters
        ----------
        input_shape : Tuple[int]
            Shape of the input tensor.

        Returns
        -------
        Tuple[int]
            The output shape, identical to the input shape.
        """
        return input_shape

    def get_config(self):
        """
        Returns the configuration of the layer. This enables the layer
        to be serialized and deserialized, for example, when saving and loading models.

        Returns
        -------
        dict
            A dictionary containing the layer configuration.
        """
        config = super(SEBlock, self).get_config()
        config.update({
            'reduction_ratio': self.reduction_ratio
        })
        return config


In [11]:
def identity_block(input_tensor, kernel_size, filters, stage, block):
    bn_axis = 3
    filters1, filters2, filters3 = filters
    conv_name_base = f'conv{stage}_block{block}_'

    x = Conv2D(filters1, (1, 1),
               use_bias=True,
               kernel_initializer='he_normal',
               name=conv_name_base + '1_conv')(input_tensor)
    x = BatchNormalization(axis=bn_axis, epsilon=1.001e-5, name=conv_name_base + '1_bn')(x)
    x = Activation('relu', name=conv_name_base + '1_relu')(x)

    x = Conv2D(filters2, kernel_size,
               padding='same',
               use_bias=True,
               kernel_initializer='he_normal',
               name=conv_name_base + '2_conv')(x)
    x = BatchNormalization(axis=bn_axis, epsilon=1.001e-5, name=conv_name_base + '2_bn')(x)
    x = Activation('relu', name=conv_name_base + '2_relu')(x)

    x = Conv2D(filters3, (1, 1),
               use_bias=True,
               kernel_initializer='he_normal',
               name=conv_name_base + '3_conv')(x)
    x = BatchNormalization(axis=bn_axis, epsilon=1.001e-5, name=conv_name_base + '3_bn')(x)

    x = Add(name=conv_name_base + 'add')([x, input_tensor])
    x = Activation('relu', name=conv_name_base + 'out')(x)
    return x

def conv_block(input_tensor, kernel_size, filters, stage, block, strides=(2, 2)):
    bn_axis = 3
    filters1, filters2, filters3 = filters
    conv_name_base = f'conv{stage}_block{block}_'

    x = Conv2D(filters1, (1, 1), strides=strides,
               use_bias=True,
               kernel_initializer='he_normal',
               name=conv_name_base + '1_conv')(input_tensor)
    x = BatchNormalization(axis=bn_axis, epsilon=1.001e-5, name=conv_name_base + '1_bn')(x)
    x = Activation('relu', name=conv_name_base + '1_relu')(x)

    x = Conv2D(filters2, kernel_size, padding='same',
               use_bias=True,
               kernel_initializer='he_normal',
               name=conv_name_base + '2_conv')(x)
    x = BatchNormalization(axis=bn_axis, epsilon=1.001e-5, name=conv_name_base + '2_bn')(x)
    x = Activation('relu', name=conv_name_base + '2_relu')(x)

    x = Conv2D(filters3, (1, 1),
               use_bias=True,
               kernel_initializer='he_normal',
               name=conv_name_base + '3_conv')(x)
    x = BatchNormalization(axis=bn_axis, epsilon=1.001e-5, name=conv_name_base + '3_bn')(x)

    shortcut = Conv2D(filters3, (1, 1), strides=strides,
                      use_bias=True,
                      kernel_initializer='he_normal',
                      name=conv_name_base + '0_conv')(input_tensor)
    shortcut = BatchNormalization(axis=bn_axis, epsilon=1.001e-5, name=conv_name_base + '0_bn')(shortcut)

    x = Add(name=conv_name_base + 'add')([x, shortcut])
    x = Activation('relu', name=conv_name_base + 'out')(x)
    return x

def build_resnet50(input_shape, num_classes):
    inputs = Input(shape=input_shape, name='input_1')
    bn_axis = 3  # channels_last

    # Stage 1
    x = ZeroPadding2D(padding=(3, 3), name='conv1_pad')(inputs)
    x = Conv2D(64, (7, 7), strides=(2, 2),
               padding='valid', use_bias=True,
               kernel_initializer='he_normal',
               name='conv1_conv')(x)
    x = BatchNormalization(axis=bn_axis, epsilon=1.001e-5, name='conv1_bn')(x)
    x = Activation('relu', name='conv1_relu')(x)
    x = ZeroPadding2D(padding=(1, 1), name='pool1_pad')(x)
    x = MaxPooling2D((3, 3), strides=(2, 2), name='pool1_pool')(x)

    # Stage 2
    x = conv_block(x, 3, [64, 64, 256], stage=2, block=1, strides=(1, 1))
    x = identity_block(x, 3, [64, 64, 256], stage=2, block=2)
    x = identity_block(x, 3, [64, 64, 256], stage=2, block=3)

    # Stage 3
    x = conv_block(x, 3, [128, 128, 512], stage=3, block=1)
    x = identity_block(x, 3, [128, 128, 512], stage=3, block=2)
    x = identity_block(x, 3, [128, 128, 512], stage=3, block=3)
    x = identity_block(x, 3, [128, 128, 512], stage=3, block=4)

    # Stage 4
    x = conv_block(x, 3, [256, 256, 1024], stage=4, block=1)
    x = identity_block(x, 3, [256, 256, 1024], stage=4, block=2)
    x = identity_block(x, 3, [256, 256, 1024], stage=4, block=3)
    x = identity_block(x, 3, [256, 256, 1024], stage=4, block=4)
    x = identity_block(x, 3, [256, 256, 1024], stage=4, block=5)
    x = identity_block(x, 3, [256, 256, 1024], stage=4, block=6)

    # Stage 5
    x = conv_block(x, 3, [512, 512, 2048], stage=5, block=1)
    x = identity_block(x, 3, [512, 512, 2048], stage=5, block=2)
    x = identity_block(x, 3, [512, 512, 2048], stage=5, block=3)

    # CBAM
    x = SEBlock()(x)

    # Global Pooling & Classifier
    x = GlobalAveragePooling2D(name='avg_pool')(x)
    x = Dropout(0.5)(x)
    outputs = Dense(num_classes, activation='softmax', name='predictions')(x)

    model = Model(inputs, outputs, name='resnet50')
    return model

def transfer_weights(base_model, target_model):
    """
    Transfer weights from a Keras ResNet50 base_model into the custom ResNet50
    architecture whenever layer names match.
    """
    for layer in target_model.layers:
        try:
            pretrained_layer = base_model.get_layer(layer.name)
            layer.set_weights(pretrained_layer.get_weights())
        except Exception:
            # If layer doesn't exist in pretrained base, skip
            pass


In [12]:
def focal_loss(alpha=0.25, gamma=2.0):
    def focal_loss_fixed(y_true, y_pred):
        y_pred = tf.clip_by_value(y_pred, tf.keras.backend.epsilon(), 1. - tf.keras.backend.epsilon())
        cross_entropy = -y_true * tf.math.log(y_pred)
        weights = alpha * y_true * tf.math.pow((1 - y_pred), gamma)
        return tf.reduce_mean(tf.reduce_sum(weights * cross_entropy, axis=-1))
    return focal_loss_fixed

In [13]:
k_folds = 5
skf = StratifiedKFold(n_splits=k_folds, shuffle=True, random_state=42)

# For tracking the best fold, to later evaluate performance
best_fold = None
best_auc_across_folds = 0.0

# For collecting metrics across folds
fold_accuracies = []
fold_aucs = []
fold_losses = []

# Epochs for each phase
EPOCHS_PHASE1 = 20   # Freeze the backbone
EPOCHS_PHASE2 = 200  # Fine-tuning

IMG_SIZE = 224
BATCH_SIZE = 32
NUM_CLASSES = 7

base_resnet50 = ResNet50(weights='imagenet', include_top=False, pooling=None)

# Data augmentation
datagen_train = ImageDataGenerator(
    rescale=1.0/255.0,
    rotation_range=5,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.1,
    zoom_range=0.1,
    brightness_range=[0.9, 1.1],
    horizontal_flip=True,
    fill_mode='nearest'
)
datagen_val = ImageDataGenerator(rescale=1.0/255.0)

# Get unique class labels (strings)
all_class_labels = sorted(df['dx'].unique())

# Compute global class weights for the entire dataset
class_weights_array = compute_class_weight(
    class_weight='balanced',
    classes=np.array(all_class_labels),
    y=df['dx'].values
)
# Map to dictionary: {class_index: weight}
class_weights_dict = {}
for label, w in zip(all_class_labels, class_weights_array):
    # Index of this label in the alphabetical-sorted list
    idx = all_class_labels.index(label)
    class_weights_dict[idx] = w
print("Class weights dict:", class_weights_dict)

fold_index = 1
for train_idx, val_idx in skf.split(df, df['dx']):
    print(f"\n\n===== FOLD {fold_index}/{k_folds} =====")

    # ------------------------------------------------------
    # 4.1) CREATE DATAFRAMES FOR THIS FOLD
    # ------------------------------------------------------
    train_df = df.iloc[train_idx].reset_index(drop=True)
    val_df = df.iloc[val_idx].reset_index(drop=True)

    print("Train size:", len(train_df), "Val size:", len(val_df))
    print("Train distribution:\n", train_df['dx'].value_counts())
    print("Val distribution:\n", val_df['dx'].value_counts())

    # ------------------------------------------------------
    # 4.2) FLOW FROM DATAFRAME: TRAIN & VAL
    # ------------------------------------------------------
    train_generator = datagen_train.flow_from_dataframe(
        dataframe=train_df,
        x_col='image_path',
        y_col='dx',
        target_size=(IMG_SIZE, IMG_SIZE),
        batch_size=BATCH_SIZE,
        class_mode='categorical',
        shuffle=True
    )
    val_generator = datagen_val.flow_from_dataframe(
        dataframe=val_df,
        x_col='image_path',
        y_col='dx',
        target_size=(IMG_SIZE, IMG_SIZE),
        batch_size=BATCH_SIZE,
        class_mode='categorical',
        shuffle=False
    )

    # Extract class names from the train_generator
    # The generator creates an internal mapping of classes -> indices
    class_names = list(train_generator.class_indices.keys())

    # ------------------------------------------------------
    # 4.3) BUILD & COMPILE MODEL FROM SCRATCH FOR EACH FOLD
    # ------------------------------------------------------
    custom_resnet = build_resnet50(input_shape=(IMG_SIZE, IMG_SIZE, 3), num_classes=NUM_CLASSES)
    # Transfer weights from base Keras ResNet50 (ImageNet) to custom ResNet
    transfer_weights(base_resnet50, custom_resnet)

    # Freeze layers in the main body
    for layer in custom_resnet.layers:
        if layer.name.startswith('conv'):
            layer.trainable = False
        else:
            layer.trainable = True

    custom_resnet.compile(
        optimizer=Adam(learning_rate=1e-3),
        loss=focal_loss(),
        metrics=['accuracy', tf.keras.metrics.AUC(name='auc')]
    )

    # ------------------------------------------------------
    # 4.4) CALLBACKS & TRAIN (PHASE 1)
    # ------------------------------------------------------
    model_path_phase1 = f"resnet50_cbam_classificationhead_fold{fold_index}_phase1.keras"
    callbacks_phase1 = [
        EarlyStopping(monitor='val_auc', patience=3, restore_best_weights=True, mode='max', verbose=1),
        ModelCheckpoint(model_path_phase1, monitor='val_auc', save_best_only=True, mode='max', verbose=1)
    ]

    history_phase1 = custom_resnet.fit(
        train_generator,
        validation_data=val_generator,
        epochs=EPOCHS_PHASE1,
        class_weight=class_weights_dict,
        callbacks=callbacks_phase1
    )

    # Load best weights from Phase 1
    custom_resnet.load_weights(model_path_phase1)

    # ------------------------------------------------------
    # 4.5) UNFREEZE & FINE-TUNING (PHASE 2)
    # ------------------------------------------------------
    for layer in custom_resnet.layers:
        layer.trainable = True

    custom_resnet.compile(
        optimizer=Adam(learning_rate=1e-5),
        loss=focal_loss(),
        metrics=['accuracy', tf.keras.metrics.AUC(name='auc')]
    )

    model_path_phase2 = f"resnet50_cbam_classificationhead_fold{fold_index}_phase2.keras"
    callbacks_phase2 = [
        ReduceLROnPlateau(monitor='val_auc', factor=0.1, patience=3, mode='max', verbose=1),
        EarlyStopping(monitor='val_auc', patience=7, restore_best_weights=True, mode='max', verbose=1),
        ModelCheckpoint(model_path_phase2, monitor='val_auc', save_best_only=True, mode='max', verbose=1)
    ]

    history_phase2 = custom_resnet.fit(
        train_generator,
        validation_data=val_generator,
        epochs=EPOCHS_PHASE2,
        class_weight=class_weights_dict,
        callbacks=callbacks_phase2
    )

    # Load best weights from Phase 2
    custom_resnet.load_weights(model_path_phase2)

    # ------------------------------------------------------
    # 4.6) EVALUATE ON THIS FOLD
    # ------------------------------------------------------
    loss, accuracy, auc_val = custom_resnet.evaluate(val_generator, verbose=0)
    print(f"FOLD {fold_index} - Loss: {loss:.4f}, Accuracy: {accuracy:.4f}, AUC: {auc_val:.4f}")

    fold_losses.append(loss)
    fold_accuracies.append(accuracy)
    fold_aucs.append(auc_val)

    # Check if this fold is the best so far
    if auc_val > best_auc_across_folds:
        best_auc_across_folds = auc_val
        best_fold = fold_index

    # Move to next fold
    fold_index += 1

# -------------------------------------------------------------------------
# 5) CROSS-VALIDATION RESULTS
# -------------------------------------------------------------------------
print("\n=== CROSS-VALIDATION RESULTS ===")
print(f"Average Loss over {k_folds} folds  : {np.mean(fold_losses):.4f} (± {np.std(fold_losses):.4f})")
print(f"Average Accuracy over {k_folds} folds: {np.mean(fold_accuracies):.4f} (± {np.std(fold_accuracies):.4f})")
print(f"Average AUC over {k_folds} folds     : {np.mean(fold_aucs):.4f} (± {np.std(fold_aucs):.4f})")

Class weights dict: {0: np.float64(4.375273044997815), 1: np.float64(2.78349082823791), 2: np.float64(1.301832835044846), 3: np.float64(12.440993788819876), 4: np.float64(1.2854575792581184), 5: np.float64(0.21338020666879728), 6: np.float64(10.075452716297788)}


===== FOLD 1/5 =====
Train size: 8012 Val size: 2003
Train distribution:
 dx
nv       5364
mel       890
bkl       879
bcc       411
akiec     262
vasc      114
df         92
Name: count, dtype: int64
Val distribution:
 dx
nv       1341
mel       223
bkl       220
bcc       103
akiec      65
vasc       28
df         23
Name: count, dtype: int64
Found 8012 validated image filenames belonging to 7 classes.
Found 2003 validated image filenames belonging to 7 classes.


  self._warn_if_super_not_called()


Epoch 1/20
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 781ms/step - accuracy: 0.6260 - auc: 0.8542 - loss: 0.2159
Epoch 1: val_auc improved from -inf to 0.88311, saving model to resnet50_cbam_classificationhead_fold1_phase1.keras
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m248s[0m 977ms/step - accuracy: 0.6261 - auc: 0.8542 - loss: 0.2158 - val_accuracy: 0.6695 - val_auc: 0.8831 - val_loss: 0.1728
Epoch 2/20
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 788ms/step - accuracy: 0.6745 - auc: 0.8768 - loss: 0.1828
Epoch 2: val_auc improved from 0.88311 to 0.89101, saving model to resnet50_cbam_classificationhead_fold1_phase1.keras
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m245s[0m 976ms/step - accuracy: 0.6745 - auc: 0.8768 - loss: 0.1828 - val_accuracy: 0.6695 - val_auc: 0.8910 - val_loss: 0.1703
Epoch 3/20
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 764ms/step - accuracy: 0.6799 - auc: 0.882

  self._warn_if_super_not_called()


Epoch 1/20
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 711ms/step - accuracy: 0.6408 - auc: 0.8567 - loss: 0.1997
Epoch 1: val_auc improved from -inf to 0.87930, saving model to resnet50_cbam_classificationhead_fold2_phase1.keras
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m226s[0m 889ms/step - accuracy: 0.6409 - auc: 0.8567 - loss: 0.1997 - val_accuracy: 0.6695 - val_auc: 0.8793 - val_loss: 0.1725
Epoch 2/20
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 718ms/step - accuracy: 0.6684 - auc: 0.8720 - loss: 0.1902
Epoch 2: val_auc improved from 0.87930 to 0.88706, saving model to resnet50_cbam_classificationhead_fold2_phase1.keras
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m225s[0m 895ms/step - accuracy: 0.6684 - auc: 0.8720 - loss: 0.1902 - val_accuracy: 0.6695 - val_auc: 0.8871 - val_loss: 0.1711
Epoch 3/20
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 707ms/step - accuracy: 0.6812 - auc: 0.883

  self._warn_if_super_not_called()


Epoch 1/20
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 717ms/step - accuracy: 0.6303 - auc: 0.8458 - loss: 0.2146
Epoch 1: val_auc improved from -inf to 0.88420, saving model to resnet50_cbam_classificationhead_fold3_phase1.keras
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m228s[0m 900ms/step - accuracy: 0.6304 - auc: 0.8459 - loss: 0.2145 - val_accuracy: 0.6695 - val_auc: 0.8842 - val_loss: 0.1724
Epoch 2/20
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 716ms/step - accuracy: 0.6703 - auc: 0.8762 - loss: 0.1834
Epoch 2: val_auc improved from 0.88420 to 0.89173, saving model to resnet50_cbam_classificationhead_fold3_phase1.keras
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m224s[0m 893ms/step - accuracy: 0.6703 - auc: 0.8762 - loss: 0.1834 - val_accuracy: 0.6695 - val_auc: 0.8917 - val_loss: 0.1739
Epoch 3/20
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 716ms/step - accuracy: 0.6665 - auc: 0.876

  self._warn_if_super_not_called()


Epoch 1/20
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 733ms/step - accuracy: 0.6263 - auc: 0.8501 - loss: 0.2066
Epoch 1: val_auc improved from -inf to 0.88171, saving model to resnet50_cbam_classificationhead_fold4_phase1.keras
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m233s[0m 919ms/step - accuracy: 0.6265 - auc: 0.8502 - loss: 0.2066 - val_accuracy: 0.6695 - val_auc: 0.8817 - val_loss: 0.1744
Epoch 2/20
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 736ms/step - accuracy: 0.6666 - auc: 0.8734 - loss: 0.2004
Epoch 2: val_auc improved from 0.88171 to 0.88951, saving model to resnet50_cbam_classificationhead_fold4_phase1.keras
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m230s[0m 918ms/step - accuracy: 0.6666 - auc: 0.8735 - loss: 0.2004 - val_accuracy: 0.6695 - val_auc: 0.8895 - val_loss: 0.1713
Epoch 3/20
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 732ms/step - accuracy: 0.6668 - auc: 0.874

  self._warn_if_super_not_called()


Epoch 1/20
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 715ms/step - accuracy: 0.6393 - auc: 0.8590 - loss: 0.2047
Epoch 1: val_auc improved from -inf to 0.88145, saving model to resnet50_cbam_classificationhead_fold5_phase1.keras
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m227s[0m 898ms/step - accuracy: 0.6394 - auc: 0.8590 - loss: 0.2047 - val_accuracy: 0.6695 - val_auc: 0.8814 - val_loss: 0.1737
Epoch 2/20
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 738ms/step - accuracy: 0.6641 - auc: 0.8735 - loss: 0.1864
Epoch 2: val_auc improved from 0.88145 to 0.88891, saving model to resnet50_cbam_classificationhead_fold5_phase1.keras
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m231s[0m 919ms/step - accuracy: 0.6641 - auc: 0.8735 - loss: 0.1864 - val_accuracy: 0.6695 - val_auc: 0.8889 - val_loss: 0.1712
Epoch 3/20
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 741ms/step - accuracy: 0.6667 - auc: 0.874