In [2]:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.metrics import classification_report, roc_auc_score, f1_score, precision_score, multilabel_confusion_matrix, confusion_matrix
from sklearn.model_selection import train_test_split, KFold
import multiprocessing
from tensorflow.keras.models import Model
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import Dense, Dropout, GlobalAveragePooling2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import backend as K
from tensorflow.keras.optimizers.schedules import ExponentialDecay
from sklearn.utils import shuffle
from PIL import ImageFile
import seaborn as sns
import matplotlib.pyplot as plt
from tensorflow.keras.regularizers import l2
import os
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.callbacks import Callback

In [3]:
# Enable loading of truncated images
ImageFile.LOAD_TRUNCATED_IMAGES = True

# Clear the previous session
K.clear_session()

style_train = '/Users/lucamullers/Downloads/wikiart/new_style_train_final.csv'
style_val = '/Users/lucamullers/Downloads/wikiart/new_style_val_final.csv'
image_directory = '/Users/lucamullers/Downloads/wikiart/'

# Load datasets
train_df = pd.read_csv(style_train)
val_df = pd.read_csv(style_val)

# Combine datasets
all_df = pd.concat([train_df, val_df], axis=0).reset_index(drop=True)

print(f"Before encoding: {len(all_df)} images")
all_labels = all_df['style_name'].str.get_dummies(sep='/')
print(f"After encoding: {len(all_labels)} labels (should match images)")

# Filter out styles with too few training samples
min_train_samples = 500
valid_classes = all_labels.sum(axis=0)[all_labels.sum(axis=0) >= min_train_samples].index
all_labels = all_labels[valid_classes]
all_df = all_df[all_labels.sum(axis=1) > 0]

# Ensure consistency between all_df and all_labels
all_labels = all_labels.loc[all_df.index].reset_index(drop=True)
all_df = all_df.reset_index(drop=True)

# Step 1: Split data - 10% Test Set
df_train_val, test_df, labels_train_val, test_labels = train_test_split(
    all_df, all_labels, test_size=0.1, random_state=42
)

# Step 2: Split remaining 90% into 70% Train and 20% Validation
train_df, val_df, train_labels, val_labels = train_test_split(
    df_train_val, labels_train_val, test_size=2/9, random_state=42
)

# Image size and batch size
IMAGE_SIZE = (224, 224)
BATCH_SIZE = 256

# Hybrid Sampling Function
def hybrid_sampling(df, labels):
    min_samples_per_class = 2000  # Only upsample classes below this
    max_samples_per_class = 3000  # Prevent large classes from growing

    image_counts = {}  # Track how often each image appears across all labels
    df_resampled = []
    labels_resampled = []

    # 1️⃣ **Ensure ONLY small classes (< 1200) get upsampled**
    for style in labels.columns:
        class_indices = labels[labels[style] == 1].index
        num_samples = len(class_indices)

        if num_samples < min_samples_per_class:
            resampled_indices = np.random.choice(class_indices, size=min_samples_per_class, replace=True)
        elif num_samples > max_samples_per_class:
            resampled_indices = np.random.choice(class_indices, size=max_samples_per_class, replace=False)
        else:
            resampled_indices = class_indices  # Keep mid-sized classes unchanged

        for idx in resampled_indices:
            img_path = df.loc[idx, 'image_path']

            # 2️⃣ **Prevent images from being overused**
            if img_path not in image_counts:
                image_counts[img_path] = 0
            image_counts[img_path] += 1

            if image_counts[img_path] <= 10:  # Allow max 2 occurrences per image
                df_resampled.append(df.loc[idx])
                labels_resampled.append(labels.loc[idx])

    # 3️⃣ **Convert lists back to DataFrame after resampling**
    resampled_df = pd.DataFrame(df_resampled).reset_index(drop=True)
    resampled_labels = pd.DataFrame(labels_resampled, columns=labels.columns).reset_index(drop=True)

    return shuffle(resampled_df, resampled_labels, random_state=42)

# Data Generators
def generator_with_labels(datagen, dataframe, labels, batch_size):
    num_samples = len(dataframe)
    while True:
        indices = np.arange(num_samples)
        np.random.shuffle(indices)
        for start in range(0, num_samples, batch_size):
            end = min(start + batch_size, num_samples)
            batch_indices = indices[start:end]
            batch_images = []
            for idx in batch_indices:
                img_path = dataframe.iloc[idx]['image_path']
                img = tf.keras.preprocessing.image.load_img(f"{image_directory}/{img_path}", target_size=IMAGE_SIZE)
                img = tf.keras.preprocessing.image.img_to_array(img)
                img = datagen.random_transform(img)
                img = datagen.standardize(img)
                batch_images.append(img)
            batch_images = np.array(batch_images)
            batch_labels = labels.iloc[batch_indices].values
            yield batch_images, batch_labels

# Define Data Augmentation
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=30,
    width_shift_range=0.3,
    height_shift_range=0.3,
    shear_range=0.3,
    zoom_range=0.3,
    brightness_range=[0.8, 1.2],
    horizontal_flip=True,
    fill_mode='nearest'
)
val_datagen = ImageDataGenerator(rescale=1./255)

# Apply sampling to check actual image and label counts 
sampled_df, sampled_labels = hybrid_sampling(train_df, train_labels)

# Print class distribution after sampling
print("\nClass counts after resampling:")
print(sampled_labels.sum().sort_values(ascending=False))

# Print total number of training samples
print(f"\nTotal resampled training samples: {len(sampled_df)}")

Before encoding: 81446 images
After encoding: 81446 labels (should match images)

Class counts after resampling:
Realism_and_19th_Century_Movements      19000
Renaissance_and_Mannerism               10999
Impressionism_and_Post_Impressionism    10987
Expressionism_and_Derivatives           10001
Modern_Art                               9000
Baroque_and_Rococo                       8000
Baroque                                  5070
Impressionism                            4936
Expressionism                            4888
Realism                                  4118
Post_Impressionism                       4000
Romanticism                              3706
Symbolism                                3428
Art_Nouveau                              3424
Northern_Renaissance                     3170
Color_Field_Painting                     2934
Rococo                                   2930
Pop_Art                                  2838
Abstract_Expressionism                   2811
Minimalism   

In [3]:
import os
import re
import numpy as np
import tensorflow as tf
from tensorflow.keras.callbacks import Callback
from tensorflow.keras.optimizers.schedules import ExponentialDecay
from sklearn.metrics import roc_auc_score, f1_score, precision_score, recall_score
from tensorflow.keras.metrics import AUC, Precision, Recall
from tensorflow.keras.models import load_model, Model
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import Dense, Dropout, GlobalAveragePooling2D, Multiply, Reshape
from tensorflow.keras.regularizers import l2
import tensorflow.keras.backend as K


# === Resample training set ===
train_df_resampled, train_labels_resampled = hybrid_sampling(train_df, train_labels)

# === Create generators ===
train_gen = generator_with_labels(train_datagen, train_df_resampled, train_labels_resampled, BATCH_SIZE)
val_gen = generator_with_labels(val_datagen, val_df, val_labels, BATCH_SIZE)

# === Compute steps ===\\\\\\\\
steps_per_epoch = int(np.ceil(len(train_df_resampled) / BATCH_SIZE))
validation_steps = int(np.ceil(len(val_df) / BATCH_SIZE))

# === Learning rate schedule ===
lr_schedule = ExponentialDecay(
    initial_learning_rate=1e-4,
    decay_steps=steps_per_epoch * 10,
    decay_rate=0.96,
    staircase=True
)

L2_FACTOR = 0.001
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

for layer in base_model.layers[-100:]:
    layer.trainable = True

x = base_model.output
x = GlobalAveragePooling2D()(x)  # shape: (None, 2048)
x = Dense(1024, activation='relu', kernel_regularizer=l2(L2_FACTOR))(x)  # (None, 1024)

def focal_loss(gamma=1.5, alpha=0.35):
    def loss(y_true, y_pred):
        y_true = K.cast(y_true, dtype='float32')  # <-- Add this line
        epsilon = K.epsilon()
        y_pred = K.clip(y_pred, epsilon, 1. - epsilon)
        cross_entropy = -y_true * K.log(y_pred) - (1 - y_true) * K.log(1 - y_pred)
        weight = alpha * K.pow(1 - y_pred, gamma) * y_true + (1 - alpha) * K.pow(y_pred, gamma) * (1 - y_true)
        return K.mean(weight * cross_entropy)
    return loss


# === Gating mechanism: one gate per style ===
num_styles = train_labels.shape[1]
gates = Dense(num_styles * 1024, activation='sigmoid', name='gating_layer')(x)
gates = Reshape((num_styles, 1024))(gates)        # shape: (None, num_styles, 2048)
x_repeated = tf.keras.layers.RepeatVector(num_styles)(x)  # shape: (None, num_styles, 2048)
x_filtered = Multiply()([x_repeated, gates])      # shape: (None, num_styles, 2048)

# === Final classifier ===
logits = tf.keras.layers.Dense(1, activation='sigmoid', kernel_regularizer=l2(L2_FACTOR))
outputs = tf.keras.layers.TimeDistributed(logits)(x_filtered)  # shape: (None, num_styles, 1)
style_output = tf.keras.layers.Reshape((num_styles,))(outputs)

model = Model(inputs=base_model.input, outputs=style_output)

# === Compile model ===
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=lr_schedule),
    loss=focal_loss(gamma=1.5, alpha=0.35),
    metrics=[
        'accuracy',
        AUC(name='auc', multi_label=True),
        Precision(name='precision'),
        Recall(name='recall')
    ]
)

class SaveEachEpoch(Callback):
    def __init__(self, save_path):
        super().__init__()
        self.save_path = save_path
        os.makedirs(self.save_path, exist_ok=True)

    def on_epoch_end(self, epoch, logs=None):
        path = os.path.join(self.save_path, f'model_epoch_{epoch:02d}.h5')
        self.model.save(path)
        print(f'\nSaved model at {path}')

save_callback = SaveEachEpoch(save_path='checkpoints_final_focal_testing_3/')

# === Train model ===
history = model.fit(
    train_gen,
    epochs=30,
    steps_per_epoch=steps_per_epoch,
    validation_data=val_gen,
    validation_steps=validation_steps,
    verbose=1,
    callbacks=[save_callback]
)


Epoch 1/30
[1m 66/274[0m [32m━━━━[0m[37m━━━━━━━━━━━━━━━━[0m [1m1:43:56[0m 30s/step - accuracy: 0.1103 - auc: 0.5088 - loss: 1.4025 - precision: 0.0702 - recall: 0.0697



[1m105/274[0m [32m━━━━━━━[0m[37m━━━━━━━━━━━━━[0m [1m1:24:10[0m 30s/step - accuracy: 0.1197 - auc: 0.5262 - loss: 1.3623 - precision: 0.0769 - recall: 0.0492



[1m274/274[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30s/step - accuracy: 0.1504 - auc: 0.6371 - loss: 1.2102 - precision: 0.2823 - recall: 0.0583 




Saved model at checkpoints_final_focal_testing_3/model_epoch_00.h5
[1m274/274[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8387s[0m 31s/step - accuracy: 0.1506 - auc: 0.6376 - loss: 1.2094 - precision: 0.2835 - recall: 0.0585 - val_accuracy: 0.0277 - val_auc: 0.5175 - val_loss: 0.6351 - val_precision: 0.0646 - val_recall: 0.0337
Epoch 2/30
[1m274/274[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29s/step - accuracy: 0.2398 - auc: 0.9299 - loss: 0.4661 - precision: 0.8456 - recall: 0.3080 




Saved model at checkpoints_final_focal_testing_3/model_epoch_01.h5
[1m274/274[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8318s[0m 30s/step - accuracy: 0.2399 - auc: 0.9299 - loss: 0.4657 - precision: 0.8456 - recall: 0.3081 - val_accuracy: 0.0694 - val_auc: 0.7020 - val_loss: 0.2347 - val_precision: 0.2372 - val_recall: 0.0625
Epoch 3/30
[1m274/274[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29s/step - accuracy: 0.2662 - auc: 0.9529 - loss: 0.1663 - precision: 0.8703 - recall: 0.4169 




Saved model at checkpoints_final_focal_testing_3/model_epoch_02.h5
[1m274/274[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8316s[0m 30s/step - accuracy: 0.2662 - auc: 0.9529 - loss: 0.1662 - precision: 0.8703 - recall: 0.4169 - val_accuracy: 0.1864 - val_auc: 0.8660 - val_loss: 0.0948 - val_precision: 0.5394 - val_recall: 0.2456
Epoch 4/30
[1m274/274[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30s/step - accuracy: 0.2867 - auc: 0.9633 - loss: 0.0646 - precision: 0.8862 - recall: 0.4861 




Saved model at checkpoints_final_focal_testing_3/model_epoch_03.h5
[1m274/274[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8610s[0m 31s/step - accuracy: 0.2867 - auc: 0.9633 - loss: 0.0645 - precision: 0.8862 - recall: 0.4861 - val_accuracy: 0.2112 - val_auc: 0.9228 - val_loss: 0.0478 - val_precision: 0.6597 - val_recall: 0.3883
Epoch 5/30
[1m274/274[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31s/step - accuracy: 0.2943 - auc: 0.9691 - loss: 0.0311 - precision: 0.8987 - recall: 0.5316 




Saved model at checkpoints_final_focal_testing_3/model_epoch_04.h5
[1m274/274[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8834s[0m 32s/step - accuracy: 0.2944 - auc: 0.9691 - loss: 0.0311 - precision: 0.8987 - recall: 0.5316 - val_accuracy: 0.2595 - val_auc: 0.9265 - val_loss: 0.0330 - val_precision: 0.6971 - val_recall: 0.4071
Epoch 6/30
[1m274/274[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30s/step - accuracy: 0.3147 - auc: 0.9740 - loss: 0.0198 - precision: 0.9070 - recall: 0.5790 




Saved model at checkpoints_final_focal_testing_3/model_epoch_05.h5
[1m274/274[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8634s[0m 32s/step - accuracy: 0.3146 - auc: 0.9740 - loss: 0.0198 - precision: 0.9070 - recall: 0.5791 - val_accuracy: 0.2353 - val_auc: 0.9289 - val_loss: 0.0306 - val_precision: 0.6889 - val_recall: 0.4590
Epoch 7/30
[1m274/274[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30s/step - accuracy: 0.3246 - auc: 0.9777 - loss: 0.0159 - precision: 0.9098 - recall: 0.6163 




Saved model at checkpoints_final_focal_testing_3/model_epoch_06.h5
[1m274/274[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8600s[0m 31s/step - accuracy: 0.3246 - auc: 0.9777 - loss: 0.0159 - precision: 0.9098 - recall: 0.6163 - val_accuracy: 0.2627 - val_auc: 0.9292 - val_loss: 0.0289 - val_precision: 0.7027 - val_recall: 0.4298
Epoch 8/30
[1m274/274[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29s/step - accuracy: 0.3370 - auc: 0.9806 - loss: 0.0140 - precision: 0.9204 - recall: 0.6513 




Saved model at checkpoints_final_focal_testing_3/model_epoch_07.h5
[1m274/274[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8229s[0m 30s/step - accuracy: 0.3370 - auc: 0.9806 - loss: 0.0140 - precision: 0.9204 - recall: 0.6513 - val_accuracy: 0.2587 - val_auc: 0.9245 - val_loss: 0.0324 - val_precision: 0.6818 - val_recall: 0.4545
Epoch 9/30
[1m274/274[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29s/step - accuracy: 0.3427 - auc: 0.9825 - loss: 0.0131 - precision: 0.9231 - recall: 0.6715 




Saved model at checkpoints_final_focal_testing_3/model_epoch_08.h5
[1m274/274[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8093s[0m 30s/step - accuracy: 0.3427 - auc: 0.9825 - loss: 0.0131 - precision: 0.9231 - recall: 0.6715 - val_accuracy: 0.2801 - val_auc: 0.9387 - val_loss: 0.0294 - val_precision: 0.7041 - val_recall: 0.5129
Epoch 10/30
[1m274/274[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30s/step - accuracy: 0.3613 - auc: 0.9850 - loss: 0.0122 - precision: 0.9295 - recall: 0.7021 




Saved model at checkpoints_final_focal_testing_3/model_epoch_09.h5
[1m274/274[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8481s[0m 31s/step - accuracy: 0.3613 - auc: 0.9850 - loss: 0.0122 - precision: 0.9295 - recall: 0.7021 - val_accuracy: 0.2412 - val_auc: 0.9381 - val_loss: 0.0297 - val_precision: 0.7021 - val_recall: 0.5326
Epoch 11/30
[1m274/274[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30s/step - accuracy: 0.3593 - auc: 0.9873 - loss: 0.0113 - precision: 0.9348 - recall: 0.7281 




Saved model at checkpoints_final_focal_testing_3/model_epoch_10.h5
[1m274/274[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8425s[0m 31s/step - accuracy: 0.3593 - auc: 0.9873 - loss: 0.0113 - precision: 0.9348 - recall: 0.7281 - val_accuracy: 0.2438 - val_auc: 0.9247 - val_loss: 0.0326 - val_precision: 0.6702 - val_recall: 0.4813
Epoch 12/30
[1m274/274[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29s/step - accuracy: 0.3774 - auc: 0.9887 - loss: 0.0107 - precision: 0.9375 - recall: 0.7445 




Saved model at checkpoints_final_focal_testing_3/model_epoch_11.h5
[1m274/274[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8285s[0m 30s/step - accuracy: 0.3774 - auc: 0.9887 - loss: 0.0107 - precision: 0.9375 - recall: 0.7445 - val_accuracy: 0.2592 - val_auc: 0.9336 - val_loss: 0.0328 - val_precision: 0.6738 - val_recall: 0.5192
Epoch 13/30
[1m274/274[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29s/step - accuracy: 0.3688 - auc: 0.9902 - loss: 0.0100 - precision: 0.9406 - recall: 0.7680 




Saved model at checkpoints_final_focal_testing_3/model_epoch_12.h5
[1m274/274[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8330s[0m 30s/step - accuracy: 0.3688 - auc: 0.9901 - loss: 0.0100 - precision: 0.9406 - recall: 0.7679 - val_accuracy: 0.2913 - val_auc: 0.9384 - val_loss: 0.0293 - val_precision: 0.7084 - val_recall: 0.5452
Epoch 14/30
[1m274/274[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29s/step - accuracy: 0.3872 - auc: 0.9912 - loss: 0.0095 - precision: 0.9442 - recall: 0.7840 




Saved model at checkpoints_final_focal_testing_3/model_epoch_13.h5
[1m274/274[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8226s[0m 30s/step - accuracy: 0.3872 - auc: 0.9912 - loss: 0.0095 - precision: 0.9441 - recall: 0.7839 - val_accuracy: 0.2518 - val_auc: 0.9371 - val_loss: 0.0297 - val_precision: 0.7017 - val_recall: 0.5398
Epoch 15/30
[1m274/274[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29s/step - accuracy: 0.3752 - auc: 0.9921 - loss: 0.0090 - precision: 0.9470 - recall: 0.7965 




Saved model at checkpoints_final_focal_testing_3/model_epoch_14.h5
[1m274/274[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8242s[0m 30s/step - accuracy: 0.3752 - auc: 0.9921 - loss: 0.0090 - precision: 0.9470 - recall: 0.7965 - val_accuracy: 0.2675 - val_auc: 0.9301 - val_loss: 0.0342 - val_precision: 0.6769 - val_recall: 0.5201
Epoch 16/30
[1m274/274[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30s/step - accuracy: 0.3898 - auc: 0.9928 - loss: 0.0086 - precision: 0.9493 - recall: 0.8111 




Saved model at checkpoints_final_focal_testing_3/model_epoch_15.h5
[1m274/274[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8555s[0m 31s/step - accuracy: 0.3898 - auc: 0.9928 - loss: 0.0086 - precision: 0.9493 - recall: 0.8111 - val_accuracy: 0.2737 - val_auc: 0.9181 - val_loss: 0.0356 - val_precision: 0.6546 - val_recall: 0.4950
Epoch 17/30
[1m274/274[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30s/step - accuracy: 0.3921 - auc: 0.9938 - loss: 0.0081 - precision: 0.9519 - recall: 0.8233 




Saved model at checkpoints_final_focal_testing_3/model_epoch_16.h5
[1m274/274[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8632s[0m 32s/step - accuracy: 0.3921 - auc: 0.9938 - loss: 0.0081 - precision: 0.9519 - recall: 0.8233 - val_accuracy: 0.2340 - val_auc: 0.9242 - val_loss: 0.0345 - val_precision: 0.6516 - val_recall: 0.5021
Epoch 18/30
[1m274/274[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31s/step - accuracy: 0.3970 - auc: 0.9944 - loss: 0.0077 - precision: 0.9550 - recall: 0.8344 




Saved model at checkpoints_final_focal_testing_3/model_epoch_17.h5
[1m274/274[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8633s[0m 32s/step - accuracy: 0.3970 - auc: 0.9944 - loss: 0.0077 - precision: 0.9550 - recall: 0.8344 - val_accuracy: 0.2746 - val_auc: 0.9232 - val_loss: 0.0355 - val_precision: 0.6793 - val_recall: 0.5304
Epoch 19/30
[1m274/274[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29s/step - accuracy: 0.3884 - auc: 0.9948 - loss: 0.0074 - precision: 0.9576 - recall: 0.8431 




Saved model at checkpoints_final_focal_testing_3/model_epoch_18.h5
[1m274/274[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8252s[0m 30s/step - accuracy: 0.3885 - auc: 0.9948 - loss: 0.0074 - precision: 0.9576 - recall: 0.8431 - val_accuracy: 0.2811 - val_auc: 0.9237 - val_loss: 0.0332 - val_precision: 0.6836 - val_recall: 0.5355
Epoch 20/30
[1m274/274[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30s/step - accuracy: 0.4096 - auc: 0.9952 - loss: 0.0072 - precision: 0.9576 - recall: 0.8489 




Saved model at checkpoints_final_focal_testing_3/model_epoch_19.h5
[1m274/274[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8376s[0m 31s/step - accuracy: 0.4096 - auc: 0.9952 - loss: 0.0072 - precision: 0.9576 - recall: 0.8489 - val_accuracy: 0.2748 - val_auc: 0.9310 - val_loss: 0.0354 - val_precision: 0.6859 - val_recall: 0.5638
Epoch 21/30
[1m274/274[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29s/step - accuracy: 0.3852 - auc: 0.9960 - loss: 0.0067 - precision: 0.9615 - recall: 0.8642 




Saved model at checkpoints_final_focal_testing_3/model_epoch_20.h5
[1m274/274[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8143s[0m 30s/step - accuracy: 0.3852 - auc: 0.9960 - loss: 0.0067 - precision: 0.9615 - recall: 0.8642 - val_accuracy: 0.2744 - val_auc: 0.9224 - val_loss: 0.0334 - val_precision: 0.6787 - val_recall: 0.5455
Epoch 22/30
[1m274/274[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28s/step - accuracy: 0.3985 - auc: 0.9964 - loss: 0.0065 - precision: 0.9624 - recall: 0.8706 




Saved model at checkpoints_final_focal_testing_3/model_epoch_21.h5
[1m274/274[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8076s[0m 29s/step - accuracy: 0.3985 - auc: 0.9964 - loss: 0.0065 - precision: 0.9624 - recall: 0.8706 - val_accuracy: 0.2796 - val_auc: 0.9215 - val_loss: 0.0376 - val_precision: 0.6535 - val_recall: 0.5195
Epoch 23/30
[1m274/274[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28s/step - accuracy: 0.3976 - auc: 0.9967 - loss: 0.0062 - precision: 0.9642 - recall: 0.8788 




Saved model at checkpoints_final_focal_testing_3/model_epoch_22.h5
[1m274/274[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8075s[0m 29s/step - accuracy: 0.3977 - auc: 0.9967 - loss: 0.0062 - precision: 0.9642 - recall: 0.8788 - val_accuracy: 0.3021 - val_auc: 0.9204 - val_loss: 0.0390 - val_precision: 0.6609 - val_recall: 0.5323
Epoch 24/30
[1m274/274[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29s/step - accuracy: 0.4207 - auc: 0.9970 - loss: 0.0060 - precision: 0.9663 - recall: 0.8833 




Saved model at checkpoints_final_focal_testing_3/model_epoch_23.h5
[1m274/274[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8088s[0m 30s/step - accuracy: 0.4207 - auc: 0.9970 - loss: 0.0060 - precision: 0.9663 - recall: 0.8833 - val_accuracy: 0.2861 - val_auc: 0.9173 - val_loss: 0.0401 - val_precision: 0.6607 - val_recall: 0.5452
Epoch 25/30
[1m274/274[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29s/step - accuracy: 0.4196 - auc: 0.9970 - loss: 0.0060 - precision: 0.9646 - recall: 0.8851 




Saved model at checkpoints_final_focal_testing_3/model_epoch_24.h5
[1m274/274[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8106s[0m 30s/step - accuracy: 0.4196 - auc: 0.9970 - loss: 0.0060 - precision: 0.9646 - recall: 0.8851 - val_accuracy: 0.2668 - val_auc: 0.9316 - val_loss: 0.0349 - val_precision: 0.6893 - val_recall: 0.5892
Epoch 26/30
[1m274/274[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29s/step - accuracy: 0.4365 - auc: 0.9973 - loss: 0.0057 - precision: 0.9671 - recall: 0.8942 




Saved model at checkpoints_final_focal_testing_3/model_epoch_25.h5
[1m274/274[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8164s[0m 30s/step - accuracy: 0.4364 - auc: 0.9973 - loss: 0.0057 - precision: 0.9671 - recall: 0.8942 - val_accuracy: 0.2390 - val_auc: 0.9257 - val_loss: 0.0373 - val_precision: 0.6658 - val_recall: 0.5616
Epoch 27/30
[1m274/274[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29s/step - accuracy: 0.3953 - auc: 0.9974 - loss: 0.0056 - precision: 0.9665 - recall: 0.8945 




Saved model at checkpoints_final_focal_testing_3/model_epoch_26.h5
[1m274/274[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8202s[0m 30s/step - accuracy: 0.3953 - auc: 0.9974 - loss: 0.0056 - precision: 0.9665 - recall: 0.8944 - val_accuracy: 0.3012 - val_auc: 0.9196 - val_loss: 0.0365 - val_precision: 0.6753 - val_recall: 0.5345
Epoch 28/30
[1m274/274[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29s/step - accuracy: 0.4302 - auc: 0.9977 - loss: 0.0053 - precision: 0.9702 - recall: 0.9022 




Saved model at checkpoints_final_focal_testing_3/model_epoch_27.h5
[1m274/274[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8146s[0m 30s/step - accuracy: 0.4302 - auc: 0.9977 - loss: 0.0053 - precision: 0.9702 - recall: 0.9022 - val_accuracy: 0.2888 - val_auc: 0.9213 - val_loss: 0.0391 - val_precision: 0.6587 - val_recall: 0.5733
Epoch 29/30
[1m274/274[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29s/step - accuracy: 0.4063 - auc: 0.9978 - loss: 0.0052 - precision: 0.9690 - recall: 0.9045 




Saved model at checkpoints_final_focal_testing_3/model_epoch_28.h5
[1m274/274[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8220s[0m 30s/step - accuracy: 0.4063 - auc: 0.9978 - loss: 0.0052 - precision: 0.9690 - recall: 0.9045 - val_accuracy: 0.2699 - val_auc: 0.9231 - val_loss: 0.0365 - val_precision: 0.6774 - val_recall: 0.5563
Epoch 30/30
[1m274/274[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28s/step - accuracy: 0.4039 - auc: 0.9981 - loss: 0.0049 - precision: 0.9715 - recall: 0.9118 




Saved model at checkpoints_final_focal_testing_3/model_epoch_29.h5
[1m274/274[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8039s[0m 29s/step - accuracy: 0.4039 - auc: 0.9981 - loss: 0.0049 - precision: 0.9715 - recall: 0.9118 - val_accuracy: 0.2715 - val_auc: 0.9186 - val_loss: 0.0398 - val_precision: 0.6599 - val_recall: 0.5507


In [4]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.metrics import f1_score, roc_auc_score, precision_score, recall_score
import tensorflow.keras.backend as K
import scipy

# === Loss function used during training ===
def focal_loss(gamma=1.5, alpha=0.35):
    def loss(y_true, y_pred):
        y_true = K.cast(y_true, dtype='float32')
        epsilon = K.epsilon()
        y_pred = K.clip(y_pred, epsilon, 1. - epsilon)
        cross_entropy = -y_true * K.log(y_pred) - (1 - y_true) * K.log(1 - y_pred)
        weight = alpha * K.pow(1 - y_pred, gamma) * y_true + (1 - alpha) * K.pow(y_pred, gamma) * (1 - y_true)
        return K.mean(weight * cross_entropy)
    loss.__name__ = 'focal_loss'
    return loss

# === Validation generator ===
val_datagen = ImageDataGenerator(rescale=1./255)
val_generator = val_datagen.flow_from_dataframe(
    dataframe=val_df,
    directory=image_directory,
    x_col='image_path',
    y_col=None,
    target_size=IMAGE_SIZE,
    batch_size=BATCH_SIZE,
    class_mode=None,
    shuffle=False
)
val_steps = int(np.ceil(len(val_df) / BATCH_SIZE))

# === Evaluate selected checkpoints ===
EVALUATE_EPOCHS = list(range(30))  # Modify as needed
checkpoint_dir = "checkpoints_final_focal_testing_3/"
threshold = 0.3  # Modify as needed

for epoch in EVALUATE_EPOCHS:
    checkpoint_path = os.path.join(checkpoint_dir, f"model_epoch_{epoch:02d}.h5")
    if not os.path.exists(checkpoint_path):
        print(f"Skipped epoch {epoch} (not found)")
        continue

    print(f"\n=== Evaluating Epoch {epoch+1} ===")
    
    model = load_model(checkpoint_path, custom_objects={'loss': focal_loss(gamma=1.5, alpha=0.35)})

    val_predictions = model.predict(val_generator, steps=val_steps)
    val_predictions = val_predictions[:len(val_df)]
    val_predicted_labels = (val_predictions > threshold).astype(int)

    val_auc = roc_auc_score(val_labels, val_predictions, average='macro')
    val_f1 = f1_score(val_labels, val_predicted_labels, average='macro', zero_division=0)
    val_precision = precision_score(val_labels, val_predicted_labels, average='macro', zero_division=0)
    val_recall = recall_score(val_labels, val_predicted_labels, average='macro', zero_division=0)

    print(f"Validation AUC: {val_auc:.4f}")
    print(f"Validation F1-Score: {val_f1:.4f}")
    print(f"Validation Precision: {val_precision:.4f}")
    print(f"Validation Recall: {val_recall:.4f}")


Found 16290 validated image filenames.

=== Evaluating Epoch 1 ===


  self._warn_if_super_not_called()


[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m267s[0m 4s/step
Validation AUC: 0.5171
Validation F1-Score: 0.0067
Validation Precision: 0.0826
Validation Recall: 0.0720

=== Evaluating Epoch 2 ===




[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m266s[0m 4s/step
Validation AUC: 0.7026
Validation F1-Score: 0.1286
Validation Precision: 0.2693
Validation Recall: 0.2052

=== Evaluating Epoch 3 ===




[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m267s[0m 4s/step
Validation AUC: 0.8661
Validation F1-Score: 0.3803
Validation Precision: 0.4374
Validation Recall: 0.4448

=== Evaluating Epoch 4 ===




[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m268s[0m 4s/step
Validation AUC: 0.9226
Validation F1-Score: 0.4851
Validation Precision: 0.4643
Validation Recall: 0.6018

=== Evaluating Epoch 5 ===




[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m270s[0m 4s/step
Validation AUC: 0.9268
Validation F1-Score: 0.5166
Validation Precision: 0.4740
Validation Recall: 0.6253

=== Evaluating Epoch 6 ===




[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m271s[0m 4s/step
Validation AUC: 0.9300
Validation F1-Score: 0.5512
Validation Precision: 0.5129
Validation Recall: 0.6423

=== Evaluating Epoch 7 ===




[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m269s[0m 4s/step
Validation AUC: 0.9292
Validation F1-Score: 0.5324
Validation Precision: 0.5027
Validation Recall: 0.6174

=== Evaluating Epoch 8 ===




[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m269s[0m 4s/step
Validation AUC: 0.9247
Validation F1-Score: 0.5258
Validation Precision: 0.5222
Validation Recall: 0.5965

=== Evaluating Epoch 9 ===




[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m269s[0m 4s/step
Validation AUC: 0.9391
Validation F1-Score: 0.5829
Validation Precision: 0.5666
Validation Recall: 0.6378

=== Evaluating Epoch 10 ===




[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m269s[0m 4s/step
Validation AUC: 0.9393
Validation F1-Score: 0.5764
Validation Precision: 0.5363
Validation Recall: 0.6602

=== Evaluating Epoch 11 ===




[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m270s[0m 4s/step
Validation AUC: 0.9248
Validation F1-Score: 0.5466
Validation Precision: 0.5160
Validation Recall: 0.6307

=== Evaluating Epoch 12 ===




[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m269s[0m 4s/step
Validation AUC: 0.9333
Validation F1-Score: 0.5713
Validation Precision: 0.5425
Validation Recall: 0.6449

=== Evaluating Epoch 13 ===




[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m269s[0m 4s/step
Validation AUC: 0.9387
Validation F1-Score: 0.5846
Validation Precision: 0.5449
Validation Recall: 0.6543

=== Evaluating Epoch 14 ===




[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m270s[0m 4s/step
Validation AUC: 0.9376
Validation F1-Score: 0.5816
Validation Precision: 0.5354
Validation Recall: 0.6651

=== Evaluating Epoch 15 ===




[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m269s[0m 4s/step
Validation AUC: 0.9313
Validation F1-Score: 0.5718
Validation Precision: 0.5374
Validation Recall: 0.6414

=== Evaluating Epoch 16 ===




[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m270s[0m 4s/step
Validation AUC: 0.9180
Validation F1-Score: 0.5305
Validation Precision: 0.5314
Validation Recall: 0.5901

=== Evaluating Epoch 17 ===




[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m269s[0m 4s/step
Validation AUC: 0.9246
Validation F1-Score: 0.5650
Validation Precision: 0.5417
Validation Recall: 0.6306

=== Evaluating Epoch 18 ===




[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m270s[0m 4s/step
Validation AUC: 0.9240
Validation F1-Score: 0.5650
Validation Precision: 0.5416
Validation Recall: 0.6205

=== Evaluating Epoch 19 ===




[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m269s[0m 4s/step
Validation AUC: 0.9240
Validation F1-Score: 0.5814
Validation Precision: 0.5658
Validation Recall: 0.6219

=== Evaluating Epoch 20 ===




[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m270s[0m 4s/step
Validation AUC: 0.9311
Validation F1-Score: 0.5851
Validation Precision: 0.5608
Validation Recall: 0.6398

=== Evaluating Epoch 21 ===




[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m269s[0m 4s/step
Validation AUC: 0.9240
Validation F1-Score: 0.5889
Validation Precision: 0.5787
Validation Recall: 0.6274

=== Evaluating Epoch 22 ===




[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m269s[0m 4s/step
Validation AUC: 0.9213
Validation F1-Score: 0.5691
Validation Precision: 0.5627
Validation Recall: 0.6151

=== Evaluating Epoch 23 ===




[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m269s[0m 4s/step
Validation AUC: 0.9205
Validation F1-Score: 0.5519
Validation Precision: 0.5573
Validation Recall: 0.5957

=== Evaluating Epoch 24 ===




[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m269s[0m 4s/step
Validation AUC: 0.9182
Validation F1-Score: 0.5660
Validation Precision: 0.5382
Validation Recall: 0.6204

=== Evaluating Epoch 25 ===




[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m269s[0m 4s/step
Validation AUC: 0.9309
Validation F1-Score: 0.6027
Validation Precision: 0.5791
Validation Recall: 0.6448

=== Evaluating Epoch 26 ===




[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m269s[0m 4s/step
Validation AUC: 0.9266
Validation F1-Score: 0.5854
Validation Precision: 0.5565
Validation Recall: 0.6394

=== Evaluating Epoch 27 ===




[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m270s[0m 4s/step
Validation AUC: 0.9191
Validation F1-Score: 0.5607
Validation Precision: 0.5338
Validation Recall: 0.6249

=== Evaluating Epoch 28 ===




[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m269s[0m 4s/step
Validation AUC: 0.9231
Validation F1-Score: 0.5870
Validation Precision: 0.5852
Validation Recall: 0.6228

=== Evaluating Epoch 29 ===




[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m269s[0m 4s/step
Validation AUC: 0.9213
Validation F1-Score: 0.5752
Validation Precision: 0.5510
Validation Recall: 0.6203

=== Evaluating Epoch 30 ===




[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m269s[0m 4s/step
Validation AUC: 0.9197
Validation F1-Score: 0.5737
Validation Precision: 0.5519
Validation Recall: 0.6178


In [None]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.metrics import (
    f1_score, roc_auc_score, precision_score, recall_score,
    classification_report, multilabel_confusion_matrix
)
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow.keras.backend as K

# === Define focal loss (must match training) ===
def focal_loss(gamma=1.5, alpha=0.35):
    def loss(y_true, y_pred):
        y_true = K.cast(y_true, dtype='float32')
        epsilon = K.epsilon()
        y_pred = K.clip(y_pred, epsilon, 1. - epsilon)
        cross_entropy = -y_true * K.log(y_pred) - (1 - y_true) * K.log(1 - y_pred)
        weight = alpha * K.pow(1 - y_pred, gamma) * y_true + (1 - alpha) * K.pow(y_pred, gamma) * (1 - y_true)
        return K.mean(weight * cross_entropy)
    loss.__name__ = 'focal_loss'
    return loss

# === Setup ===
threshold = 0.32
checkpoint_dir = "checkpoints_final_focal_testing_3/"
EVALUATE_EPOCHS = list(range(30))

# === Test generator ===
test_datagen = ImageDataGenerator(rescale=1./255)
test_generator = test_datagen.flow_from_dataframe(
    dataframe=test_df,
    directory=image_directory,
    x_col='image_path',
    y_col=None,
    target_size=IMAGE_SIZE,
    batch_size=BATCH_SIZE,
    class_mode=None,
    shuffle=False
)
test_steps = int(np.ceil(len(test_df) / BATCH_SIZE))

# === Evaluate each model ===
for epoch in EVALUATE_EPOCHS:
    checkpoint_path = os.path.join(checkpoint_dir, f"model_epoch_{epoch:02d}.h5")
    if not os.path.exists(checkpoint_path):
        print(f"Skipped epoch {epoch} (not found)")
        continue

    print(f"\n=== Evaluating Epoch {epoch + 1} on Test Set ===")

    model = load_model(checkpoint_path, custom_objects={'loss': focal_loss(gamma=1.5, alpha=0.35)})

    test_predictions = model.predict(test_generator, steps=test_steps)
    test_predictions = test_predictions[:len(test_df)]
    test_predicted_labels = (test_predictions > threshold).astype(int)

    # === Metrics ===
    test_auc = roc_auc_score(test_labels, test_predictions, average='macro')
    test_f1 = f1_score(test_labels, test_predicted_labels, average='macro', zero_division=0)
    test_precision = precision_score(test_labels, test_predicted_labels, average='macro', zero_division=0)
    test_recall = recall_score(test_labels, test_predicted_labels, average='macro', zero_division=0)

    print(f"Test AUC: {test_auc:.4f}")
    print(f"Test F1-Score: {test_f1:.4f}")
    print(f"Test Precision: {test_precision:.4f}")
    print(f"Test Recall: {test_recall:.4f}")

    print("\nClassification Report:")
    print(classification_report(
        test_labels, 
        test_predicted_labels, 
        target_names=valid_classes, 
        zero_division=0
    ))

Found 8145 validated image filenames.

=== Evaluating Epoch 1 on Test Set ===


  self._warn_if_super_not_called()


[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m136s[0m 4s/step
Test AUC: 0.5118
Test F1-Score: 0.0055
Test Precision: 0.0207
Test Recall: 0.0715

Classification Report:
                                      precision    recall  f1-score   support

              Abstract_Expressionism       0.00      0.00      0.00       246
                         Art_Nouveau       0.00      0.00      0.00       425
                             Baroque       0.00      0.00      0.00       400
                  Baroque_and_Rococo       0.00      0.00      0.00       615
                Color_Field_Painting       0.00      0.00      0.00       144
                              Cubism       0.00      0.00      0.00       230
                   Early_Renaissance       0.00      0.00      0.00       139
                       Expressionism       0.00      0.00      0.00       685
       Expressionism_and_Derivatives       0.50      0.00      0.00      1047
                             Fauvism   



[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m137s[0m 4s/step
Test AUC: 0.7093
Test F1-Score: 0.1251
Test Precision: 0.2772
Test Recall: 0.1865

Classification Report:
                                      precision    recall  f1-score   support

              Abstract_Expressionism       0.08      0.29      0.13       246
                         Art_Nouveau       0.30      0.16      0.21       425
                             Baroque       0.12      0.35      0.18       400
                  Baroque_and_Rococo       0.19      0.40      0.26       615
                Color_Field_Painting       0.69      0.06      0.11       144
                              Cubism       0.82      0.16      0.27       230
                   Early_Renaissance       0.60      0.06      0.12       139
                       Expressionism       0.21      0.14      0.17       685
       Expressionism_and_Derivatives       0.20      0.53      0.29      1047
                             Fauvism   



[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m136s[0m 4s/step
Test AUC: 0.8634
Test F1-Score: 0.3664
Test Precision: 0.4741
Test Recall: 0.4040

Classification Report:
                                      precision    recall  f1-score   support

              Abstract_Expressionism       0.28      0.55      0.37       246
                         Art_Nouveau       0.13      0.72      0.22       425
                             Baroque       0.44      0.28      0.34       400
                  Baroque_and_Rococo       0.53      0.42      0.47       615
                Color_Field_Painting       0.46      0.89      0.60       144
                              Cubism       0.77      0.37      0.50       230
                   Early_Renaissance       0.92      0.08      0.15       139
                       Expressionism       0.23      0.55      0.32       685
       Expressionism_and_Derivatives       0.29      0.73      0.42      1047
                             Fauvism   



[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m136s[0m 4s/step
Test AUC: 0.9217
Test F1-Score: 0.4865
Test Precision: 0.4881
Test Recall: 0.5762

Classification Report:
                                      precision    recall  f1-score   support

              Abstract_Expressionism       0.57      0.36      0.44       246
                         Art_Nouveau       0.25      0.79      0.38       425
                             Baroque       0.35      0.64      0.45       400
                  Baroque_and_Rococo       0.43      0.77      0.55       615
                Color_Field_Painting       0.73      0.64      0.68       144
                              Cubism       0.91      0.28      0.43       230
                   Early_Renaissance       0.75      0.44      0.55       139
                       Expressionism       0.42      0.46      0.44       685
       Expressionism_and_Derivatives       0.56      0.52      0.54      1047
                             Fauvism   



[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m136s[0m 4s/step
Test AUC: 0.9282
Test F1-Score: 0.5203
Test Precision: 0.5001
Test Recall: 0.5998

Classification Report:
                                      precision    recall  f1-score   support

              Abstract_Expressionism       0.49      0.47      0.48       246
                         Art_Nouveau       0.34      0.70      0.45       425
                             Baroque       0.50      0.60      0.55       400
                  Baroque_and_Rococo       0.56      0.79      0.66       615
                Color_Field_Painting       0.59      0.79      0.67       144
                              Cubism       0.46      0.68      0.55       230
                   Early_Renaissance       0.56      0.71      0.63       139
                       Expressionism       0.37      0.57      0.45       685
       Expressionism_and_Derivatives       0.51      0.61      0.55      1047
                             Fauvism   



[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m136s[0m 4s/step
Test AUC: 0.9305
Test F1-Score: 0.5511
Test Precision: 0.5346
Test Recall: 0.6186

Classification Report:
                                      precision    recall  f1-score   support

              Abstract_Expressionism       0.35      0.77      0.48       246
                         Art_Nouveau       0.64      0.49      0.56       425
                             Baroque       0.40      0.79      0.53       400
                  Baroque_and_Rococo       0.53      0.86      0.65       615
                Color_Field_Painting       0.59      0.76      0.67       144
                              Cubism       0.42      0.67      0.51       230
                   Early_Renaissance       0.45      0.82      0.58       139
                       Expressionism       0.30      0.72      0.42       685
       Expressionism_and_Derivatives       0.38      0.83      0.52      1047
                             Fauvism   



[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m136s[0m 4s/step
Test AUC: 0.9285
Test F1-Score: 0.5396
Test Precision: 0.5320
Test Recall: 0.5989

Classification Report:
                                      precision    recall  f1-score   support

              Abstract_Expressionism       0.46      0.57      0.51       246
                         Art_Nouveau       0.48      0.60      0.54       425
                             Baroque       0.46      0.69      0.55       400
                  Baroque_and_Rococo       0.63      0.75      0.69       615
                Color_Field_Painting       0.66      0.76      0.71       144
                              Cubism       0.41      0.71      0.52       230
                   Early_Renaissance       0.52      0.74      0.61       139
                       Expressionism       0.34      0.64      0.44       685
       Expressionism_and_Derivatives       0.45      0.71      0.55      1047
                             Fauvism   



[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m136s[0m 4s/step
Test AUC: 0.9230
Test F1-Score: 0.5331
Test Precision: 0.5471
Test Recall: 0.5867

Classification Report:
                                      precision    recall  f1-score   support

              Abstract_Expressionism       0.46      0.50      0.48       246
                         Art_Nouveau       0.44      0.67      0.53       425
                             Baroque       0.74      0.46      0.57       400
                  Baroque_and_Rococo       0.84      0.53      0.65       615
                Color_Field_Painting       0.60      0.78      0.68       144
                              Cubism       0.49      0.63      0.55       230
                   Early_Renaissance       0.64      0.60      0.62       139
                       Expressionism       0.36      0.53      0.43       685
       Expressionism_and_Derivatives       0.50      0.58      0.54      1047
                             Fauvism   



[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m137s[0m 4s/step
Test AUC: 0.9402
Test F1-Score: 0.5833
Test Precision: 0.5827
Test Recall: 0.6172

Classification Report:
                                      precision    recall  f1-score   support

              Abstract_Expressionism       0.57      0.52      0.55       246
                         Art_Nouveau       0.42      0.70      0.52       425
                             Baroque       0.52      0.71      0.60       400
                  Baroque_and_Rococo       0.71      0.71      0.71       615
                Color_Field_Painting       0.68      0.73      0.70       144
                              Cubism       0.49      0.70      0.58       230
                   Early_Renaissance       0.50      0.77      0.61       139
                       Expressionism       0.45      0.44      0.44       685
       Expressionism_and_Derivatives       0.58      0.56      0.57      1047
                             Fauvism   



[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m136s[0m 4s/step
Test AUC: 0.9394
Test F1-Score: 0.5811
Test Precision: 0.5559
Test Recall: 0.6428

Classification Report:
                                      precision    recall  f1-score   support

              Abstract_Expressionism       0.49      0.59      0.54       246
                         Art_Nouveau       0.52      0.62      0.57       425
                             Baroque       0.51      0.71      0.59       400
                  Baroque_and_Rococo       0.58      0.84      0.68       615
                Color_Field_Painting       0.62      0.76      0.68       144
                              Cubism       0.61      0.59      0.60       230
                   Early_Renaissance       0.84      0.53      0.65       139
                       Expressionism       0.38      0.61      0.46       685
       Expressionism_and_Derivatives       0.45      0.74      0.56      1047
                             Fauvism   



[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m136s[0m 4s/step
Test AUC: 0.9262
Test F1-Score: 0.5488
Test Precision: 0.5337
Test Recall: 0.6170

Classification Report:
                                      precision    recall  f1-score   support

              Abstract_Expressionism       0.38      0.69      0.49       246
                         Art_Nouveau       0.48      0.54      0.51       425
                             Baroque       0.58      0.61      0.59       400
                  Baroque_and_Rococo       0.71      0.66      0.68       615
                Color_Field_Painting       0.67      0.75      0.71       144
                              Cubism       0.44      0.67      0.53       230
                   Early_Renaissance       0.42      0.83      0.55       139
                       Expressionism       0.27      0.69      0.39       685
       Expressionism_and_Derivatives       0.37      0.80      0.51      1047
                             Fauvism   



[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m137s[0m 4s/step
Test AUC: 0.9313
Test F1-Score: 0.5655
Test Precision: 0.5479
Test Recall: 0.6232

Classification Report:
                                      precision    recall  f1-score   support

              Abstract_Expressionism       0.45      0.61      0.52       246
                         Art_Nouveau       0.51      0.65      0.57       425
                             Baroque       0.43      0.74      0.55       400
                  Baroque_and_Rococo       0.53      0.86      0.66       615
                Color_Field_Painting       0.68      0.77      0.72       144
                              Cubism       0.50      0.66      0.57       230
                   Early_Renaissance       0.60      0.77      0.68       139
                       Expressionism       0.32      0.67      0.44       685
       Expressionism_and_Derivatives       0.45      0.75      0.56      1047
                             Fauvism   



[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m136s[0m 4s/step
Test AUC: 0.9367
Test F1-Score: 0.5900
Test Precision: 0.5603
Test Recall: 0.6432

Classification Report:
                                      precision    recall  f1-score   support

              Abstract_Expressionism       0.58      0.54      0.56       246
                         Art_Nouveau       0.45      0.67      0.54       425
                             Baroque       0.40      0.82      0.54       400
                  Baroque_and_Rococo       0.53      0.85      0.65       615
                Color_Field_Painting       0.68      0.77      0.72       144
                              Cubism       0.60      0.60      0.60       230
                   Early_Renaissance       0.67      0.73      0.70       139
                       Expressionism       0.44      0.45      0.44       685
       Expressionism_and_Derivatives       0.57      0.61      0.58      1047
                             Fauvism   



[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m136s[0m 4s/step
Test AUC: 0.9363
Test F1-Score: 0.5861
Test Precision: 0.5530
Test Recall: 0.6519

Classification Report:
                                      precision    recall  f1-score   support

              Abstract_Expressionism       0.48      0.63      0.54       246
                         Art_Nouveau       0.43      0.68      0.52       425
                             Baroque       0.55      0.70      0.62       400
                  Baroque_and_Rococo       0.66      0.78      0.71       615
                Color_Field_Painting       0.70      0.68      0.69       144
                              Cubism       0.56      0.67      0.61       230
                   Early_Renaissance       0.51      0.78      0.62       139
                       Expressionism       0.43      0.55      0.48       685
       Expressionism_and_Derivatives       0.54      0.63      0.58      1047
                             Fauvism   



[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m136s[0m 4s/step
Test AUC: 0.9290
Test F1-Score: 0.5678
Test Precision: 0.5445
Test Recall: 0.6242

Classification Report:
                                      precision    recall  f1-score   support

              Abstract_Expressionism       0.49      0.49      0.49       246
                         Art_Nouveau       0.48      0.61      0.54       425
                             Baroque       0.45      0.73      0.56       400
                  Baroque_and_Rococo       0.59      0.79      0.67       615
                Color_Field_Painting       0.63      0.79      0.70       144
                              Cubism       0.38      0.77      0.51       230
                   Early_Renaissance       0.56      0.71      0.63       139
                       Expressionism       0.37      0.56      0.45       685
       Expressionism_and_Derivatives       0.50      0.63      0.55      1047
                             Fauvism   



[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m136s[0m 4s/step
Test AUC: 0.9135
Test F1-Score: 0.5391
Test Precision: 0.5501
Test Recall: 0.5853

Classification Report:
                                      precision    recall  f1-score   support

              Abstract_Expressionism       0.56      0.46      0.50       246
                         Art_Nouveau       0.42      0.64      0.51       425
                             Baroque       0.41      0.80      0.54       400
                  Baroque_and_Rococo       0.56      0.83      0.67       615
                Color_Field_Painting       0.65      0.76      0.71       144
                              Cubism       0.38      0.77      0.51       230
                   Early_Renaissance       0.57      0.68      0.62       139
                       Expressionism       0.36      0.45      0.40       685
       Expressionism_and_Derivatives       0.54      0.51      0.52      1047
                             Fauvism   



[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m136s[0m 4s/step
Test AUC: 0.9241
Test F1-Score: 0.5670
Test Precision: 0.5599
Test Recall: 0.6166

Classification Report:
                                      precision    recall  f1-score   support

              Abstract_Expressionism       0.46      0.61      0.52       246
                         Art_Nouveau       0.63      0.46      0.53       425
                             Baroque       0.52      0.67      0.59       400
                  Baroque_and_Rococo       0.69      0.73      0.71       615
                Color_Field_Painting       0.67      0.74      0.70       144
                              Cubism       0.46      0.64      0.54       230
                   Early_Renaissance       0.72      0.71      0.72       139
                       Expressionism       0.27      0.75      0.40       685
       Expressionism_and_Derivatives       0.37      0.83      0.51      1047
                             Fauvism   



[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m136s[0m 4s/step
Test AUC: 0.9232
Test F1-Score: 0.5585
Test Precision: 0.5489
Test Recall: 0.6009

Classification Report:
                                      precision    recall  f1-score   support

              Abstract_Expressionism       0.53      0.53      0.53       246
                         Art_Nouveau       0.43      0.65      0.52       425
                             Baroque       0.49      0.69      0.57       400
                  Baroque_and_Rococo       0.60      0.77      0.68       615
                Color_Field_Painting       0.73      0.63      0.68       144
                              Cubism       0.54      0.64      0.59       230
                   Early_Renaissance       0.58      0.68      0.62       139
                       Expressionism       0.38      0.52      0.44       685
       Expressionism_and_Derivatives       0.52      0.60      0.55      1047
                             Fauvism   



[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m136s[0m 4s/step
Test AUC: 0.9240
Test F1-Score: 0.5729
Test Precision: 0.5701
Test Recall: 0.5992

Classification Report:
                                      precision    recall  f1-score   support

              Abstract_Expressionism       0.53      0.47      0.50       246
                         Art_Nouveau       0.43      0.69      0.53       425
                             Baroque       0.47      0.70      0.56       400
                  Baroque_and_Rococo       0.60      0.79      0.68       615
                Color_Field_Painting       0.68      0.77      0.72       144
                              Cubism       0.50      0.67      0.57       230
                   Early_Renaissance       0.52      0.76      0.62       139
                       Expressionism       0.36      0.62      0.46       685
       Expressionism_and_Derivatives       0.50      0.63      0.56      1047
                             Fauvism   



[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m136s[0m 4s/step
Test AUC: 0.9286
Test F1-Score: 0.5826
Test Precision: 0.5770
Test Recall: 0.6223

Classification Report:
                                      precision    recall  f1-score   support

              Abstract_Expressionism       0.49      0.62      0.54       246
                         Art_Nouveau       0.45      0.69      0.55       425
                             Baroque       0.45      0.80      0.58       400
                  Baroque_and_Rococo       0.55      0.86      0.67       615
                Color_Field_Painting       0.61      0.82      0.70       144
                              Cubism       0.64      0.51      0.57       230
                   Early_Renaissance       0.76      0.63      0.69       139
                       Expressionism       0.47      0.51      0.49       685
       Expressionism_and_Derivatives       0.58      0.60      0.58      1047
                             Fauvism   



[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m136s[0m 4s/step
Test AUC: 0.9221
Test F1-Score: 0.5891
Test Precision: 0.5922
Test Recall: 0.6139

Classification Report:
                                      precision    recall  f1-score   support

              Abstract_Expressionism       0.52      0.60      0.56       246
                         Art_Nouveau       0.46      0.64      0.53       425
                             Baroque       0.59      0.70      0.64       400
                  Baroque_and_Rococo       0.73      0.73      0.73       615
                Color_Field_Painting       0.71      0.76      0.73       144
                              Cubism       0.67      0.52      0.59       230
                   Early_Renaissance       0.72      0.60      0.65       139
                       Expressionism       0.32      0.68      0.44       685
       Expressionism_and_Derivatives       0.42      0.76      0.54      1047
                             Fauvism   



[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m136s[0m 4s/step
Test AUC: 0.9174
Test F1-Score: 0.5783
Test Precision: 0.5854
Test Recall: 0.6112

Classification Report:
                                      precision    recall  f1-score   support

              Abstract_Expressionism       0.53      0.60      0.56       246
                         Art_Nouveau       0.43      0.68      0.53       425
                             Baroque       0.46      0.72      0.56       400
                  Baroque_and_Rococo       0.65      0.73      0.69       615
                Color_Field_Painting       0.77      0.72      0.74       144
                              Cubism       0.39      0.72      0.51       230
                   Early_Renaissance       0.77      0.65      0.71       139
                       Expressionism       0.31      0.68      0.42       685
       Expressionism_and_Derivatives       0.42      0.77      0.55      1047
                             Fauvism   



[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m136s[0m 4s/step
Test AUC: 0.9155
Test F1-Score: 0.5521
Test Precision: 0.5706
Test Recall: 0.5831

Classification Report:
                                      precision    recall  f1-score   support

              Abstract_Expressionism       0.47      0.62      0.54       246
                         Art_Nouveau       0.35      0.76      0.48       425
                             Baroque       0.52      0.68      0.59       400
                  Baroque_and_Rococo       0.64      0.73      0.69       615
                Color_Field_Painting       0.73      0.55      0.63       144
                              Cubism       0.62      0.53      0.57       230
                   Early_Renaissance       0.70      0.67      0.69       139
                       Expressionism       0.54      0.25      0.34       685
       Expressionism_and_Derivatives       0.64      0.42      0.51      1047
                             Fauvism   



[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m136s[0m 4s/step
Test AUC: 0.9196
Test F1-Score: 0.5699
Test Precision: 0.5551
Test Recall: 0.6100

Classification Report:
                                      precision    recall  f1-score   support

              Abstract_Expressionism       0.51      0.56      0.53       246
                         Art_Nouveau       0.53      0.62      0.57       425
                             Baroque       0.40      0.75      0.52       400
                  Baroque_and_Rococo       0.57      0.79      0.66       615
                Color_Field_Painting       0.61      0.80      0.69       144
                              Cubism       0.46      0.67      0.55       230
                   Early_Renaissance       0.57      0.65      0.61       139
                       Expressionism       0.55      0.33      0.41       685
       Expressionism_and_Derivatives       0.64      0.47      0.54      1047
                             Fauvism   



[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m136s[0m 4s/step
Test AUC: 0.9294
Test F1-Score: 0.6020
Test Precision: 0.5875
Test Recall: 0.6315

Classification Report:
                                      precision    recall  f1-score   support

              Abstract_Expressionism       0.48      0.70      0.57       246
                         Art_Nouveau       0.47      0.66      0.54       425
                             Baroque       0.62      0.66      0.64       400
                  Baroque_and_Rococo       0.73      0.70      0.71       615
                Color_Field_Painting       0.62      0.76      0.69       144
                              Cubism       0.52      0.57      0.55       230
                   Early_Renaissance       0.76      0.60      0.67       139
                       Expressionism       0.37      0.62      0.46       685
       Expressionism_and_Derivatives       0.48      0.73      0.58      1047
                             Fauvism   



[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m136s[0m 4s/step
Test AUC: 0.9249
Test F1-Score: 0.5833
Test Precision: 0.5631
Test Recall: 0.6262

Classification Report:
                                      precision    recall  f1-score   support

              Abstract_Expressionism       0.47      0.60      0.53       246
                         Art_Nouveau       0.47      0.65      0.55       425
                             Baroque       0.60      0.65      0.62       400
                  Baroque_and_Rococo       0.66      0.77      0.71       615
                Color_Field_Painting       0.64      0.75      0.69       144
                              Cubism       0.64      0.53      0.58       230
                   Early_Renaissance       0.66      0.69      0.68       139
                       Expressionism       0.47      0.45      0.46       685
       Expressionism_and_Derivatives       0.58      0.57      0.57      1047
                             Fauvism   



[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m136s[0m 4s/step
Test AUC: 0.9141
Test F1-Score: 0.5642
Test Precision: 0.5470
Test Recall: 0.6161

Classification Report:
                                      precision    recall  f1-score   support

              Abstract_Expressionism       0.56      0.49      0.52       246
                         Art_Nouveau       0.58      0.60      0.59       425
                             Baroque       0.44      0.70      0.54       400
                  Baroque_and_Rococo       0.58      0.76      0.66       615
                Color_Field_Painting       0.69      0.79      0.74       144
                              Cubism       0.31      0.78      0.44       230
                   Early_Renaissance       0.70      0.62      0.66       139
                       Expressionism       0.42      0.48      0.45       685
       Expressionism_and_Derivatives       0.60      0.52      0.56      1047
                             Fauvism   



[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m137s[0m 4s/step
Test AUC: 0.9198
Test F1-Score: 0.5839
Test Precision: 0.5935
Test Recall: 0.6077

Classification Report:
                                      precision    recall  f1-score   support

              Abstract_Expressionism       0.46      0.67      0.55       246
                         Art_Nouveau       0.29      0.80      0.43       425
                             Baroque       0.62      0.63      0.63       400
                  Baroque_and_Rococo       0.76      0.65      0.70       615
                Color_Field_Painting       0.66      0.72      0.69       144
                              Cubism       0.72      0.39      0.51       230
                   Early_Renaissance       0.58      0.70      0.64       139
                       Expressionism       0.40      0.57      0.47       685
       Expressionism_and_Derivatives       0.48      0.70      0.57      1047
                             Fauvism   



[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m137s[0m 4s/step
Test AUC: 0.9198
Test F1-Score: 0.5834
Test Precision: 0.5702
Test Recall: 0.6164

Classification Report:
                                      precision    recall  f1-score   support

              Abstract_Expressionism       0.55      0.55      0.55       246
                         Art_Nouveau       0.52      0.61      0.56       425
                             Baroque       0.62      0.58      0.60       400
                  Baroque_and_Rococo       0.75      0.67      0.71       615
                Color_Field_Painting       0.60      0.82      0.69       144
                              Cubism       0.35      0.77      0.48       230
                   Early_Renaissance       0.50      0.75      0.60       139
                       Expressionism       0.41      0.48      0.44       685
       Expressionism_and_Derivatives       0.52      0.61      0.56      1047
                             Fauvism   



[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m137s[0m 4s/step
Test AUC: 0.9131
Test F1-Score: 0.5715
Test Precision: 0.5583
Test Recall: 0.6084

Classification Report:
                                      precision    recall  f1-score   support

              Abstract_Expressionism       0.40      0.69      0.51       246
                         Art_Nouveau       0.39      0.69      0.50       425
                             Baroque       0.49      0.75      0.59       400
                  Baroque_and_Rococo       0.62      0.77      0.69       615
                Color_Field_Painting       0.63      0.78      0.70       144
                              Cubism       0.44      0.60      0.51       230
                   Early_Renaissance       0.58      0.75      0.65       139
                       Expressionism       0.39      0.54      0.45       685
       Expressionism_and_Derivatives       0.48      0.69      0.56      1047
                             Fauvism   

In [16]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import backend as K
import matplotlib.pyplot as plt
import seaborn as sns

# === Define focal loss (same as training) ===
def focal_loss(gamma=1.5, alpha=0.35):
    def loss(y_true, y_pred):
        y_true = K.cast(y_true, dtype='float32')
        epsilon = K.epsilon()
        y_pred = K.clip(y_pred, epsilon, 1. - epsilon)
        cross_entropy = -y_true * K.log(y_pred) - (1 - y_true) * K.log(1 - y_pred)
        weight = alpha * K.pow(1 - y_pred, gamma) * y_true + (1 - alpha) * K.pow(y_pred, gamma) * (1 - y_true)
        return K.mean(weight * cross_entropy)
    loss.__name__ = "focal_loss"
    return loss


# === Load model (Epoch 25 = model_epoch_24.h5) ===
checkpoint_path = "checkpoints_final_focal_testing_3/model_epoch_24.h5"
model = load_model(checkpoint_path, custom_objects={'loss': focal_loss(gamma=1.5, alpha=0.35)})
print("\n✅ Model loaded successfully.")


# === Automatically find gating layer ===
gating_layer = None
for layer in model.layers:
    lname = layer.name.lower()
    if "gate" in lname or "gating" in lname:
        gating_layer = layer
        print(f"🟢 Found gating layer: {layer.name} (shape: {layer.output.shape})")
        break

if gating_layer is None:
    raise ValueError("❌ No gating layer found. Check model summary for layer names containing 'gate'.")


# === Build submodel to extract gating outputs ===
gate_model = tf.keras.Model(inputs=model.input, outputs=gating_layer.output)
print("✅ Gating submodel created.")


# === Test generator ===
test_datagen = ImageDataGenerator(rescale=1./255)
test_generator = test_datagen.flow_from_dataframe(
    dataframe=test_df,
    directory=image_directory,
    x_col='image_path',
    y_col=None,
    target_size=IMAGE_SIZE,
    batch_size=BATCH_SIZE,
    class_mode=None,
    shuffle=False
)
test_steps = int(np.ceil(len(test_df) / BATCH_SIZE))


# === Generate gating activations for test data ===
print("\n🔹 Extracting gating activations from test set...")
gates = gate_model.predict(test_generator, steps=test_steps, verbose=1)
gates = gates[:len(test_df)]

# === Fix gating shape if flattened ===
num_styles = len(valid_classes)
num_features = gates.shape[-1] // num_styles if gates.ndim == 2 else gates.shape[-1]

if gates.ndim == 2:
    print(f"⚠️ Detected flattened gating output ({gates.shape}); reshaping to (N, {num_styles}, {num_features})")
    gates = gates.reshape(gates.shape[0], num_styles, num_features)

print("✅ Gating shape after correction:", gates.shape)


# === Inspect distribution to calibrate threshold ===
flat_gates = gates.ravel()
print(f"\nGate stats — min: {flat_gates.min():.4f}, mean: {flat_gates.mean():.4f}, median: {np.median(flat_gates):.4f}")

style_names = valid_classes
num_styles = len(style_names)
num_features = gates.shape[-1]


# === Get classifier predictions to group by style ===
print("\n🔹 Getting predicted labels from classifier...")
predictions = model.predict(test_generator, steps=test_steps, verbose=1)
predictions = predictions[:len(test_df)]
print("✅ Predictions obtained with shape:", predictions.shape)


# === Per-style average gates based on predicted membership ===
style_threshold = 0.32  # same threshold as used for classification
avg_gates_per_style = np.zeros((num_styles, num_features))

for i, style in enumerate(style_names):
    mask = predictions[:, i] > style_threshold
    if np.sum(mask) == 0:
        avg_gates_per_style[i, :] = np.nan
        print(f"{style:35s}: no samples above threshold, skipped.")
    else:
        # average over all test images with this style active
        avg_gates_per_style[i, :] = np.mean(gates[mask, i, :], axis=0)
        print(f"{style:35s}: used {np.sum(mask)} samples for averaging.")


# === Identify least-active features per style (gate < 0.5) ===
not_active_threshold = 0.3
print(f"\n🔹 Least-active features per style (gate < {not_active_threshold})")

def _print_indices_wrapped(label, idx_array, width=80):
    s = ", ".join(map(str, idx_array))
    print(f"{label}[{len(idx_array)}]:")
    start = 0
    while start < len(s):
        print("   " + s[start:start+width])
        start += width

for i, style in enumerate(style_names):
    if np.isnan(avg_gates_per_style[i]).all():
        continue
    inactive_idx = np.where(avg_gates_per_style[i] < not_active_threshold)[0]
    _print_indices_wrapped(f"{style:35s} ", inactive_idx)





✅ Model loaded successfully.
🟢 Found gating layer: gating_layer (shape: (None, 28672))
✅ Gating submodel created.
Found 8145 validated image filenames.

🔹 Extracting gating activations from test set...
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m142s[0m 4s/step
⚠️ Detected flattened gating output ((8145, 28672)); reshaping to (N, 28, 1024)
✅ Gating shape after correction: (8145, 28, 1024)

Gate stats — min: 0.0000, mean: 0.5084, median: 0.4942

🔹 Getting predicted labels from classifier...
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m139s[0m 4s/step
✅ Predictions obtained with shape: (8145, 28)
Abstract_Expressionism             : used 355 samples for averaging.
Art_Nouveau                        : used 600 samples for averaging.
Baroque                            : used 425 samples for averaging.
Baroque_and_Rococo                 : used 593 samples for averaging.
Color_Field_Painting               : used 176 samples for averaging.
Cubism                   

In [18]:
# === Load model (Epoch 25 = model_epoch_24.h5) ===
checkpoint_path = "checkpoints_final_focal_testing_3/model_epoch_24.h5"
model = load_model(checkpoint_path, custom_objects={'loss': focal_loss(gamma=1.5, alpha=0.35)})
print("\n✅ Model loaded successfully.")

# --- Predict on test set with epoch 25 model ---
pred = model.predict(test_generator, steps=test_steps, verbose=1)[:len(test_df)]
pred_bin = (pred > threshold).astype(int)

# --- Exact-match correctness (multilabel) ---
is_correct = np.all(pred_bin == test_labels, axis=1)
correct_idx = np.where(is_correct)[0]
incorrect_idx = np.where(~is_correct)[0]
print(f"\nCorrect: {len(correct_idx)} | Misclassified: {len(incorrect_idx)}")

# --- Sample 10 correct + 40 misclassified (or as many as available) ---
np.random.seed(42)
sample_correct = np.random.choice(correct_idx, min(10, len(correct_idx)), replace=False) if len(correct_idx) else []
sample_incorrect = np.random.choice(incorrect_idx, min(40, len(incorrect_idx)), replace=False) if len(incorrect_idx) else []

# --- Helper: decode indices -> style names ---
def decode(vec):
    return [valid_classes[i] for i in np.where(vec == 1)[0]]

print("\n=== ✅ Correctly Classified (up to 10) ===")
for idx in sample_correct:
    print(f"Image {idx:5d} | TRUE={decode(test_labels[idx])} | PRED={decode(pred_bin[idx])}")

print("\n=== ❌ Misclassified (up to 40) ===")
for idx in sample_incorrect:
    t = set(decode(test_labels[idx]))
    p = set(decode(pred_bin[idx]))
    print(f"Image {idx:5d}")
    print(f"  TRUE : {sorted(t)}")
    print(f"  PRED : {sorted(p)}")
    print(f"  ↳ overlap={sorted(t & p)}, missed={sorted(t - p)}, extras={sorted(p - t)}\n")

# --- Style-vs-style confusion summary (who gets mistaken for whom) ---
num_styles = len(valid_classes)
confuse = np.zeros((num_styles, num_styles), dtype=int)   # counts of i(true)->j(pred extra)
missed = np.zeros(num_styles, dtype=int)                  # true i missed entirely
spurious = np.zeros(num_styles, dtype=int)                # predicted j when not present

for i in range(len(test_df)):
    true_i = np.where(test_labels[i] == 1)[0]
    pred_i = np.where(pred_bin[i] == 1)[0]

    # missed true labels
    for ti in true_i:
        if ti not in pred_i:
            missed[ti] += 1

    # spurious positives
    for pj in pred_i:
        if pj not in true_i:
            spurious[pj] += 1

    # pairwise confusions: count (true ti) -> (pred pj) for pj not in true
    for ti in true_i:
        for pj in pred_i:
            if pj != ti and pj not in true_i:  # only count when pj is an extra
                confuse[ti, pj] += 1

# Print top-3 confusions per style
print("\n=== 🔁 Top confusions by style (true → predicted extra) ===")
for ti, style in enumerate(valid_classes):
    row = confuse[ti].copy()
    row[ti] = 0
    if row.sum() == 0:
        continue
    top_js = row.argsort()[-3:][::-1]
    pairs = [f"{valid_classes[j]} ({row[j]})" for j in top_js if row[j] > 0]
    if pairs:
        print(f"{style:35s} → " + ", ".join(pairs))

# Also show most-missed and most-spurious styles
top_missed = np.argsort(missed)[-5:][::-1]
top_spurious = np.argsort(spurious)[-5:][::-1]

print("\n=== 🚫 Most missed true styles ===")
for k in top_missed:
    if missed[k] > 0:
        print(f"{valid_classes[k]:35s}: missed {missed[k]}")

print("\n=== ➕ Most spurious predicted styles ===")
for k in top_spurious:
    if spurious[k] > 0:
        print(f"{valid_classes[k]:35s}: spurious {spurious[k]}")




✅ Model loaded successfully.
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m151s[0m 5s/step


NameError: name 'threshold' is not defined

In [27]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.metrics import (
    f1_score, roc_auc_score, precision_score, recall_score,
    classification_report
)
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import tensorflow.keras.backend as K

# === Define focal loss (must match training) ===
def focal_loss(gamma=1.5, alpha=0.35):
    def loss(y_true, y_pred):
        y_true = K.cast(y_true, dtype='float32')
        epsilon = K.epsilon()
        y_pred = K.clip(y_pred, epsilon, 1. - epsilon)
        cross_entropy = -y_true * K.log(y_pred) - (1 - y_true) * K.log(1 - y_pred)
        weight = alpha * K.pow(1 - y_pred, gamma) * y_true + (1 - alpha) * K.pow(y_pred, gamma) * (1 - y_true)
        return K.mean(weight * cross_entropy)
    loss.__name__ = 'focal_loss'
    return loss


# === Setup ===
threshold = 0.32
checkpoint_path = "checkpoints_final_focal_testing_3/model_epoch_24.h5"
print("=== Evaluating model_epoch_24.h5 (Epoch 25) ===")

# === Prepare test data ===
test_df = test_df.reset_index(drop=True)     # important fix
test_datagen = ImageDataGenerator(rescale=1./255)
test_generator = test_datagen.flow_from_dataframe(
    dataframe=test_df,
    directory=image_directory,
    x_col='image_path',
    y_col=None,
    target_size=IMAGE_SIZE,
    batch_size=BATCH_SIZE,
    class_mode=None,
    shuffle=False
)
test_steps = int(np.ceil(len(test_df) / BATCH_SIZE))

# === Load model ===
model = load_model(checkpoint_path, custom_objects={'loss': focal_loss(gamma=1.5, alpha=0.35)})
print("\n✅ Model loaded successfully.")

# === Predict ===
preds = model.predict(test_generator, steps=test_steps, verbose=1)[:len(test_df)]
preds_bin = (preds > threshold).astype(int)

# === Metrics ===
test_auc = roc_auc_score(test_labels, preds, average='macro')
test_f1 = f1_score(test_labels, preds_bin, average='macro', zero_division=0)
test_precision = precision_score(test_labels, preds_bin, average='macro', zero_division=0)
test_recall = recall_score(test_labels, preds_bin, average='macro', zero_division=0)

print(f"\nTest AUC:       {test_auc:.4f}")
print(f"Test F1-Score:  {test_f1:.4f}")
print(f"Test Precision: {test_precision:.4f}")
print(f"Test Recall:    {test_recall:.4f}")

print("\nClassification Report:")
print(classification_report(
    test_labels, preds_bin, target_names=valid_classes, zero_division=0
))

# === Correct vs misclassified ===
def decode(vec): 
    return [valid_classes[i] for i in np.where(vec == 1)[0]]

is_correct = np.all(preds_bin == test_labels, axis=1)
correct_idx = np.where(is_correct)[0]
incorrect_idx = np.where(~is_correct)[0]
print(f"\nCorrect: {len(correct_idx)}  |  Misclassified: {len(incorrect_idx)}")

np.random.seed(42)
sample_correct = np.random.choice(correct_idx, min(10, len(correct_idx)), replace=False) if len(correct_idx) else []
sample_incorrect = np.random.choice(incorrect_idx, min(40, len(incorrect_idx)), replace=False) if len(incorrect_idx) else []

# === Helper functions for displaying images ===
def _decode(vec): 
    return [valid_classes[i] for i in np.where(vec == 1)[0]]

def _img_path(idx):
    return os.path.join(image_directory, test_df.iloc[idx]['image_path'])

def _title_for(idx, is_correct):
    t = set(_decode(test_labels.iloc[idx]))
    p = set(_decode(preds_bin[idx]))
    overlap = sorted(t & p)
    missed  = sorted(t - p)
    extras  = sorted(p - t)
    if is_correct:
        return "✅ CORRECT\n" + ", ".join(sorted(t))
    else:
        lines = ["❌ MISCLASSIFIED",
                 "T: " + (", ".join(sorted(t)) or "—"),
                 "P: " + (", ".join(sorted(p)) or "—")]
        if missed: lines.append("missed: " + ", ".join(missed))
        if extras: lines.append("extras: " + ", ".join(extras))
        return "\n".join(lines)

def _show_grid(indices, title, correct_flag=False, cols=5, save_path=None):
    import math
    from textwrap import wrap
    
    if len(indices) == 0:
        print(f"(no images to show for {title})")
        return

    rows = math.ceil(len(indices) / cols)
    fig, axes = plt.subplots(rows, cols, figsize=(cols * 4.5, rows * 5))  # more space
    plt.suptitle(title, fontsize=18, y=1.03)
    axes = axes.flatten() if isinstance(axes, np.ndarray) else [axes]

    for k in range(rows * cols):
        ax = axes[k]
        if k < len(indices):
            idx = indices[k]
            try:
                img = mpimg.imread(_img_path(idx))
                ax.imshow(img)
            except Exception:
                ax.text(0.5, 0.5, f"Failed to load\n{_img_path(idx)}",
                        ha='center', va='center', fontsize=8)
            title_text = _title_for(idx, is_correct=correct_flag)
            wrapped = "\n".join(wrap(title_text, width=40))  # wrap long titles
            ax.set_title(wrapped, fontsize=8, loc='left')
        ax.axis('off')

    plt.tight_layout(rect=[0, 0, 1, 0.95], h_pad=3.5, w_pad=2.0)  # more padding
    if save_path:
        fig.savefig(save_path, bbox_inches='tight', dpi=150)
        print(f"✅ Saved: {save_path}")
    plt.close(fig)


# === Show image grids ===
#_show_grid(sample_correct, "Correctly Classified (Epoch 25)", correct_flag=True)
#_show_grid(sample_incorrect, "Misclassified (Epoch 25)", correct_flag=False)


=== Evaluating model_epoch_24.h5 (Epoch 25) ===
Found 8145 validated image filenames.


  self._warn_if_super_not_called()



✅ Model loaded successfully.
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m148s[0m 5s/step

Test AUC:       0.9294
Test F1-Score:  0.6020
Test Precision: 0.5875
Test Recall:    0.6315

Classification Report:
                                      precision    recall  f1-score   support

              Abstract_Expressionism       0.48      0.70      0.57       246
                         Art_Nouveau       0.47      0.66      0.54       425
                             Baroque       0.62      0.66      0.64       400
                  Baroque_and_Rococo       0.73      0.70      0.71       615
                Color_Field_Painting       0.62      0.76      0.69       144
                              Cubism       0.52      0.57      0.55       230
                   Early_Renaissance       0.76      0.60      0.67       139
                       Expressionism       0.37      0.62      0.46       685
       Expressionism_and_Derivatives       0.48      0.73      0.58      104

In [28]:
# === Ensure save directory exists ===
save_dir = "classification_examples"
os.makedirs(save_dir, exist_ok=True)

# === Per-class visualization ===
for class_idx, class_name in enumerate(valid_classes):
    # TRUE POSITIVES
    tp_mask = (preds_bin[:, class_idx] == 1) & (test_labels.iloc[:, class_idx] == 1)
    tp_indices = np.where(tp_mask)[0]

    # FALSE NEGATIVES
    fn_mask = (preds_bin[:, class_idx] == 0) & (test_labels.iloc[:, class_idx] == 1)
    fn_indices = np.where(fn_mask)[0]

    if len(tp_indices) == 0 and len(fn_indices) == 0:
        print(f"\n⛔ Skipping '{class_name}' — no examples found.")
        continue

    print(f"\n📂 Style: {class_name} | TPs: {len(tp_indices)} | FNs: {len(fn_indices)}")

    # Sample
    sampled_tp = np.random.choice(tp_indices, min(10, len(tp_indices)), replace=False) if len(tp_indices) else []
    sampled_fn = np.random.choice(fn_indices, min(40, len(fn_indices)), replace=False) if len(fn_indices) else []

    # Save paths
    save_path_tp = os.path.join(save_dir, f"{class_name}_TP.png")
    save_path_fn = os.path.join(save_dir, f"{class_name}_FN.png")

    # Visualize and save
    _show_grid(sampled_tp, f"TRUE POSITIVES — {class_name}", correct_flag=True, save_path=save_path_tp)
    _show_grid(sampled_fn, f"FALSE NEGATIVES — {class_name}", correct_flag=False, save_path=save_path_fn)



📂 Style: Abstract_Expressionism | TPs: 171 | FNs: 75


  plt.tight_layout(rect=[0, 0, 1, 0.95], h_pad=3.5, w_pad=2.0)  # more padding
  fig.savefig(save_path, bbox_inches='tight', dpi=150)


✅ Saved: classification_examples/Abstract_Expressionism_TP.png


  plt.tight_layout(rect=[0, 0, 1, 0.95], h_pad=3.5, w_pad=2.0)  # more padding
  fig.savefig(save_path, bbox_inches='tight', dpi=150)


✅ Saved: classification_examples/Abstract_Expressionism_FN.png

📂 Style: Art_Nouveau | TPs: 279 | FNs: 146
✅ Saved: classification_examples/Art_Nouveau_TP.png
✅ Saved: classification_examples/Art_Nouveau_FN.png

📂 Style: Baroque | TPs: 264 | FNs: 136
✅ Saved: classification_examples/Baroque_TP.png
✅ Saved: classification_examples/Baroque_FN.png

📂 Style: Baroque_and_Rococo | TPs: 431 | FNs: 184
✅ Saved: classification_examples/Baroque_and_Rococo_TP.png
✅ Saved: classification_examples/Baroque_and_Rococo_FN.png

📂 Style: Color_Field_Painting | TPs: 110 | FNs: 34
✅ Saved: classification_examples/Color_Field_Painting_TP.png
✅ Saved: classification_examples/Color_Field_Painting_FN.png

📂 Style: Cubism | TPs: 132 | FNs: 98
✅ Saved: classification_examples/Cubism_TP.png
✅ Saved: classification_examples/Cubism_FN.png

📂 Style: Early_Renaissance | TPs: 84 | FNs: 55
✅ Saved: classification_examples/Early_Renaissance_TP.png
✅ Saved: classification_examples/Early_Renaissance_FN.png

📂 Style: Expr