In [1]:
import os
import pandas as pd
import tensorflow as tf
from tensorflow.keras.applications import InceptionV3
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, roc_curve, auc
import numpy as np
import plotly.express as px

In [21]:
# Set up paths (Modify these paths based on your system)
sample_csv_path = "C:\\Users\\Lian4ik\\Desktop\\derm_project\\data\\sample_dataset.csv"
sample_image_dir = "C:\\Users\\Lian4ik\\Desktop\\derm_project\\data\\sample_images"

In [22]:
# Load the dataset
df_sample = pd.read_csv(sample_csv_path)
print(df_sample.head())  # Check if the dataset is loaded correctly

             image_id  target
0  isic_0012208_4.jpg       1
1  isic_6850708_0.jpg       0
2  isic_0011045_4.jpg       1
3  isic_0031784_0.jpg       1
4  isic_0021274_0.jpg       0


In [23]:
# Image preprocessing for inceptionV3
img_generator = tf.keras.preprocessing.image.ImageDataGenerator(
    preprocessing_function=tf.keras.applications.inception_v3.preprocess_input,
    validation_split=0.3
)

In [24]:
df_sample["target"] = df_sample["target"].astype(str)


In [25]:
# Train generator
train_generator = img_generator.flow_from_dataframe(
    dataframe=df_sample,
    directory=sample_image_dir,
    x_col="image_id",
    y_col="target",
    target_size=(299, 299),
    batch_size=32,
    class_mode="binary",
    shuffle=False,
    subset="training",
    seed=4
)

Found 10164 validated image filenames belonging to 2 classes.


In [26]:
# Validation generator
val_generator = img_generator.flow_from_dataframe(
    dataframe=df_sample,
    directory=sample_image_dir,
    x_col="image_id",
    y_col="target",
    target_size=(299, 299),
    batch_size=32,
    class_mode="binary",
    shuffle=False,  # Ensure validation order remains consistent
    subset="validation",
    seed=42
)

Found 4356 validated image filenames belonging to 2 classes.


In [27]:
# Load the pre-trained InceptionV3 model
base_model = InceptionV3(weights='imagenet', include_top=False, input_shape=(299, 299, 3))

In [28]:
# Fine-tune last 30 layers
for layer in base_model.layers[-30:]:
    layer.trainable = True


In [29]:
# Add custom layers
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(128, activation='relu')(x)
output = Dense(1, activation='sigmoid')(x)   # output = Dense(1, activation='sigmoid')(x)
# for softmax: class_mode="categorical"   
# for sigmoid: class_mode= "binary"
# for softmax: loss='categorical_crossentropy'
# for sigmoid: loss='binary_crossentropy'
# with softmax: y_pred = np.argmax(model.predict(val_generator), axis=1)  -> Gets index of highest probability



In [30]:
# Create final model
model = Model(inputs=base_model.input, outputs=output)

In [31]:
# Compile the model
model.compile(
    optimizer=Adam(learning_rate=5e-5),
    loss='binary_crossentropy',
    metrics=[
        'accuracy',      # Percentage of correct predictions.
        tf.keras.metrics.AUC(name="auc"),  # how well the model separates classes across thresholds
        tf.keras.metrics.Precision(name="precision"),   # proportion of predicted malignant cases that are actually malignant (High precision = fewer false positives (FP))
        tf.keras.metrics.Recall(name="recall")    # The proportion of actual malignant cases that were correctly identified  (High recall = fewer false negatives (FN))
    ]
)


In [32]:
# Set early stopping criteria
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

In [None]:
# Train the model
history = model.fit(
    train_generator,
    epochs=1,
    validation_data=val_generator,
    verbose=1,
    callbacks=[early_stopping]
)

[1m 15/318[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m3:42:48[0m 44s/step - accuracy: 0.5956 - auc: 0.6374 - loss: 0.6709 - precision: 0.6524 - recall: 0.4826

In [None]:
y_true = np.array(val_generator.classes)  # Ensure correct order
y_pred_prob = model.predict(val_generator).ravel()  # Get probabilities

# Find the best decision threshold from ROC curve
fpr, tpr, thresholds = roc_curve(y_true, y_pred_prob)
optimal_idx = np.argmax(tpr - fpr)  # Best balance between TPR & FPR
optimal_threshold = thresholds[optimal_idx]

# Convert probabilities to binary labels using the best threshold
y_pred = (y_pred_prob > optimal_threshold).astype("int32")

print(f"Optimal Threshold: {optimal_threshold:.4f}")


In [None]:
# Compute metrics
accuracy = np.mean(y_pred == y_true)
precision = precision_score(y_true, y_pred)
recall = recall_score(y_true, y_pred)
f1 = f1_score(y_true, y_pred)

In [None]:
# Print evaluation results
print(f"Validation Accuracy: {accuracy:.4f}")
print(f"Validation Precision: {precision:.4f}")
print(f"Validation Recall: {recall:.4f}")
print(f"Validation F1 Score: {f1:.4f}")
print(f"Validation AUC: {roc_auc:.4f}")

In [None]:
# Compute confusion matrix
cm = confusion_matrix(y_true, y_pred)
df_cm = pd.DataFrame(cm, index=['Benign', 'Malignant'], columns=['Benign', 'Malignant'])
fig = px.imshow(df_cm, text_auto=True, color_continuous_scale='Blues')
fig.show()

In [None]:
# Save the trained model
model.save("inceptionV3_model.keras")
print("Model saved as inceptionV3_model.keras")