In [16]:
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.layers import Conv2D, MaxPooling2D, GlobalAveragePooling2D, Dense, Dropout, BatchNormalization
from tensorflow.keras.regularizers import l2
from sklearn.utils import class_weight


# paths to the training and testing directories
train_dir = '../Data/Skin_Data/Training/'
test_dir = '../Data/Skin_Data/Testing/'

# load datasets using image_dataset_from_directory
train_dataset = tf.keras.preprocessing.image_dataset_from_directory(
    train_dir,
    labels='inferred',
    label_mode='binary',
    batch_size=16,
    image_size=(224, 224),
    shuffle=True,
    seed=42
)

validation_dataset = tf.keras.preprocessing.image_dataset_from_directory(
    test_dir,
    labels='inferred',
    label_mode='binary',
    batch_size=16,
    image_size=(224, 224),
    shuffle=False
)

# data augmentation w/ keras layers
data_augmentation = tf.keras.Sequential([
    layers.RandomFlip('horizontal_and_vertical'),
    layers.RandomRotation(0.4),
    layers.RandomZoom(0.2),
    layers.RandomWidth(0.2),
    layers.RandomHeight(0.2),
])

# prefetch the datasets to help local performance
AUTOTUNE = tf.data.AUTOTUNE

train_dataset = train_dataset.map(lambda x, y: (data_augmentation(x, training=True), y))
train_dataset = train_dataset.prefetch(buffer_size=AUTOTUNE)

validation_dataset = validation_dataset.prefetch(buffer_size=AUTOTUNE)

# build the model
model = models.Sequential()

# input layer
model.add(layers.InputLayer(shape=(224, 224, 3)))

# conv layer 1
model.add(Conv2D(32, (3, 3), activation='relu', padding='same', kernel_regularizer=l2(0.001)))
model.add(BatchNormalization())

# conv layer 2
model.add(Conv2D(32, (3, 3), activation='relu', padding='same', kernel_regularizer=l2(0.001)))
model.add(BatchNormalization())

# pooling layer 1
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

# conv layer 3
model.add(Conv2D(64, (3, 3), activation='relu', padding='same', kernel_regularizer=l2(0.001)))
model.add(BatchNormalization())

# conv layer 4
model.add(Conv2D(64, (3, 3), activation='relu', padding='same', kernel_regularizer=l2(0.001)))
model.add(BatchNormalization())

# pooling layer 2
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

# convlLayer 5
model.add(Conv2D(128, (3, 3), activation='relu', padding='same', kernel_regularizer=l2(0.001)))
model.add(BatchNormalization())

# pooling layer 3
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

# pooling
model.add(GlobalAveragePooling2D())

# fully connected layers
model.add(Dense(128, activation='relu', kernel_regularizer=l2(0.001)))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))

# compile model
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),
    loss='binary_crossentropy',
    metrics=['accuracy', tf.keras.metrics.AUC(name='auc')]
)

# print model summary
model.summary()

# train model
history = model.fit(
    train_dataset,
    epochs=20,
    validation_data=validation_dataset
)

# evaluate on training data
train_scores = model.evaluate(
    train_dataset,
    verbose=0
)

print(f"Training Loss: {train_scores[0]:.4f}")
print(f"Training Accuracy: {train_scores[1]*100:.2f}%")
print(f"Training AUC: {train_scores[2]:.4f}")

# evaluate on test data
test_scores = model.evaluate(
    validation_dataset,
    verbose=0
)

print(f"Test Loss: {test_scores[0]:.4f}")
print(f"Test Accuracy: {test_scores[1]*100:.2f}%")
print(f"Test AUC: {test_scores[2]:.4f}")


Found 204 files belonging to 2 classes.
Found 84 files belonging to 2 classes.
Image batch shape: (16, 268, 221, 3)
Label batch shape: (16, 1)


Epoch 1/20
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 3s/step - accuracy: 0.4619 - auc: 0.5965 - loss: 1.2033 - val_accuracy: 0.5000 - val_auc: 0.3688 - val_loss: 1.2405
Epoch 2/20
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 3s/step - accuracy: 0.7272 - auc: 0.5558 - loss: 0.9672 - val_accuracy: 0.4881 - val_auc: 0.4654 - val_loss: 1.0775
Epoch 3/20
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 3s/step - accuracy: 0.7759 - auc: 0.5991 - loss: 0.8635 - val_accuracy: 0.4762 - val_auc: 0.5737 - val_loss: 1.0403
Epoch 4/20
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 3s/step - accuracy: 0.7771 - auc: 0.6963 - loss: 0.8434 - val_accuracy: 0.5238 - val_auc: 0.6117 - val_loss: 1.0317
Epoch 5/20
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 3s/step - accuracy: 0.7734 - auc: 0.7170 - loss: 0.8755 - val_accuracy: 0.5714 - val_auc: 0.6366 - val_loss: 1.0253
Epoch 6/20
[1m13/13[0m [32m━━━━━━━━━━

In [22]:
import tensorflow as tf
from tensorflow.keras import layers, models # type: ignore
from tensorflow.keras.applications import EfficientNetB0 # type: ignore
from tensorflow.keras.preprocessing import image_dataset_from_directory # type: ignore
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau # type: ignore
import numpy as np
from sklearn.utils.class_weight import compute_class_weight

# paths to the training and testing directories
train_dir = '../Data/Skin_Data/Training/'
test_dir = '../Data/Skin_Data/Testing/'

# params
BATCH_SIZE = 16
AUTOTUNE = tf.data.AUTOTUNE

# load datasets
train_dataset = image_dataset_from_directory(
    train_dir, labels='inferred', label_mode='binary',
    batch_size=BATCH_SIZE, image_size=(224,224), shuffle=True, seed=42)

validation_dataset = image_dataset_from_directory(
    test_dir, labels='inferred', label_mode='binary',
    batch_size=BATCH_SIZE, image_size=(224,224), shuffle=False)

# compute class weights, helps w/ small dataset. found on stack overflow + sklearn docs
train_labels = np.concatenate([labels for _, labels in train_dataset], axis=0).ravel()

class_weights = compute_class_weight(
    class_weight='balanced', classes=np.unique(train_labels), y=train_labels)

class_weights = dict(enumerate(class_weights))
print("Class Weights:", class_weights)

# create data augmentation, small dataset needs it
data_augmentation = tf.keras.Sequential([
    layers.RandomFlip('horizontal_and_vertical'),
    layers.RandomRotation(0.5),
    layers.RandomZoom(0.3),
    layers.RandomContrast(0.3),
    layers.RandomTranslation(height_factor=0.2, width_factor=0.2),
])

# apply data augmentation
train_dataset = train_dataset.map(
    lambda x, y: (data_augmentation(x, training=True), y),
    num_parallel_calls=AUTOTUNE)

# prefetch for performance, running locally so this helps
train_dataset = train_dataset.prefetch(buffer_size=AUTOTUNE)
validation_dataset = validation_dataset.prefetch(buffer_size=AUTOTUNE)

# build the base model w/ efficient net b0
base_model = EfficientNetB0(input_shape=(224,224) + (3,), include_top=False, weights='imagenet')
base_model.trainable = False  # Freeze the base model initially

inputs = tf.keras.Input(shape=(224,224) + (3,))
x = data_augmentation(inputs)
x = base_model(x, training=False)
x = layers.GlobalAveragePooling2D()(x)
x = layers.Dropout(0.4)(x)
outputs = layers.Dense(1, activation='sigmoid')(x)
model = tf.keras.Model(inputs, outputs)

# compile the model
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),
              loss='binary_crossentropy',
              metrics=['accuracy', tf.keras.metrics.AUC(name='auc')])

# callbacks, found info on keras docs
early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
checkpoint = ModelCheckpoint('best_model.keras', save_best_only=True, monitor='val_loss')
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=1e-6)

# Train the model
history = model.fit(
    train_dataset,
    epochs=10,
    validation_data=validation_dataset,
    class_weight=class_weights,
    callbacks=[early_stop, checkpoint, reduce_lr]
)

# Fine-tune the model
base_model.trainable = True  # Unfreeze the base model
fine_tune_at = len(base_model.layers) // 2

for layer in base_model.layers[:fine_tune_at]:
    layer.trainable = False

# Recompile the model with a lower learning rate
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-5),
              loss='binary_crossentropy',
              metrics=['accuracy', tf.keras.metrics.AUC(name='auc')])


history_fine = model.fit(
    train_dataset,
    epochs=10,
    validation_data=validation_dataset,
    class_weight=class_weights,
    callbacks=[early_stop, checkpoint, reduce_lr]
)

# Evaluate the model
train_scores = model.evaluate(train_dataset, verbose=0)
print(f"Training Loss: {train_scores[0]:.4f}")
print(f"Training Accuracy: {train_scores[1]*100:.2f}%")
print(f"Training AUC: {train_scores[2]:.4f}")

validation_scores = model.evaluate(validation_dataset, verbose=0)
print(f"Validation Loss: {validation_scores[0]:.4f}")
print(f"Validation Accuracy: {validation_scores[1]*100:.2f}%")
print(f"Validation AUC: {validation_scores[2]:.4f}")

Found 204 files belonging to 2 classes.
Found 84 files belonging to 2 classes.
Class Weights: {0: 2.4285714285714284, 1: 0.6296296296296297}
Epoch 1/10
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 663ms/step - accuracy: 0.4462 - auc: 0.4685 - loss: 0.8176 - val_accuracy: 0.5357 - val_auc: 0.5255 - val_loss: 0.7037 - learning_rate: 0.0010
Epoch 2/10
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 808ms/step - accuracy: 0.5612 - auc: 0.6030 - loss: 0.7167 - val_accuracy: 0.5714 - val_auc: 0.6375 - val_loss: 0.6614 - learning_rate: 0.0010
Epoch 3/10
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 812ms/step - accuracy: 0.6572 - auc: 0.6511 - loss: 0.6355 - val_accuracy: 0.6548 - val_auc: 0.7302 - val_loss: 0.6224 - learning_rate: 0.0010
Epoch 4/10
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 746ms/step - accuracy: 0.6698 - auc: 0.8067 - loss: 0.5620 - val_accuracy: 0.7024 - val_auc: 0.7809 - val_loss: 0.5835 - learn