In [1]:
# =====================================================
# INSTALL & IMPORTS
# =====================================================
!pip install -q kagglehub imagehash

import os, shutil, random
import numpy as np
from PIL import Image
import imagehash
import kagglehub
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.applications import DenseNet169
from tensorflow.keras import mixed_precision
from sklearn.metrics import (
    classification_report,
    confusion_matrix,
    roc_auc_score
)
from sklearn.preprocessing import label_binarize

# =====================================================
# MIXED PRECISION
# =====================================================
mixed_precision.set_global_policy("mixed_float16")

# =====================================================
# DOWNLOAD DATASET
# =====================================================
root_path = kagglehub.dataset_download(
    "ismailpromus/skin-diseases-image-dataset"
)

RAW_DATASET = os.path.join(root_path, "IMG_CLASSES")
BASE_DATASET = "/content/skin_dataset"

if os.path.exists(BASE_DATASET):
    shutil.rmtree(BASE_DATASET)

shutil.copytree(RAW_DATASET, BASE_DATASET)

# =====================================================
# REMOVE DUPLICATE IMAGES (pHash)
# =====================================================
hashes = {}
removed = 0

for cls in os.listdir(BASE_DATASET):
    cls_path = os.path.join(BASE_DATASET, cls)
    if not os.path.isdir(cls_path):
        continue

    for f in os.listdir(cls_path):
        if f.lower().endswith((".jpg", ".jpeg", ".png")):
            path = os.path.join(cls_path, f)
            try:
                img = Image.open(path).convert("RGB")
                h = imagehash.phash(img)
                if h in hashes:
                    os.remove(path)
                    removed += 1
                else:
                    hashes[h] = path
            except:
                pass

print("Removed duplicate images:", removed)

# =====================================================
# TRAIN / VAL / TEST SPLIT (70 / 15 / 15)
# =====================================================
SPLIT_DIR = "/content/skin_split"
TRAIN_DIR = os.path.join(SPLIT_DIR, "train")
VAL_DIR   = os.path.join(SPLIT_DIR, "val")
TEST_DIR  = os.path.join(SPLIT_DIR, "test")

for d in [TRAIN_DIR, VAL_DIR, TEST_DIR]:
    os.makedirs(d, exist_ok=True)

random.seed(42)

for cls in os.listdir(BASE_DATASET):
    cls_path = os.path.join(BASE_DATASET, cls)
    if not os.path.isdir(cls_path):
        continue

    images = os.listdir(cls_path)
    random.shuffle(images)

    n = len(images)
    n_train = int(0.7 * n)
    n_val   = int(0.15 * n)

    splits = {
        TRAIN_DIR: images[:n_train],
        VAL_DIR:   images[n_train:n_train+n_val],
        TEST_DIR:  images[n_train+n_val:]
    }

    for split_dir, files in splits.items():
        os.makedirs(os.path.join(split_dir, cls), exist_ok=True)
        for f in files:
            shutil.copy(
                os.path.join(cls_path, f),
                os.path.join(split_dir, cls, f)
            )

# =====================================================
# DATA LOADING
# =====================================================
IMG_SIZE = (224, 224)
BATCH_SIZE = 32
AUTOTUNE = tf.data.AUTOTUNE

train_ds = tf.keras.utils.image_dataset_from_directory(
    TRAIN_DIR,
    image_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    label_mode="categorical",
    shuffle=True
)

val_ds = tf.keras.utils.image_dataset_from_directory(
    VAL_DIR,
    image_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    label_mode="categorical",
    shuffle=True
)

test_ds = tf.keras.utils.image_dataset_from_directory(
    TEST_DIR,
    image_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    label_mode="categorical",
    shuffle=False
)

class_names = train_ds.class_names
num_classes = len(class_names)

# =====================================================
# DATA AUGMENTATION
# =====================================================
augment = tf.keras.Sequential([
    layers.RandomFlip("horizontal_and_vertical"),
    layers.RandomRotation(0.1),
    layers.RandomBrightness(0.2),
    layers.RandomContrast(0.2)
])

train_ds = train_ds.map(
    lambda x, y: (augment(x, training=True), y),
    num_parallel_calls=AUTOTUNE
)

train_ds = train_ds.prefetch(AUTOTUNE)
val_ds   = val_ds.prefetch(AUTOTUNE)
test_ds  = test_ds.prefetch(AUTOTUNE)

# =====================================================
# MODEL
# =====================================================
inputs = tf.keras.Input(shape=(224, 224, 3))
x = layers.Rescaling(1./255)(inputs)

base_model = DenseNet169(
    weights="imagenet",
    include_top=False,
    input_tensor=x
)
base_model.trainable = False

x = layers.GlobalAveragePooling2D()(base_model.output)
x = layers.BatchNormalization()(x)
x = layers.Dense(256, activation="relu")(x)
x = layers.Dropout(0.4)(x)

outputs = layers.Dense(
    num_classes,
    activation="softmax",
    dtype="float32"
)(x)

model = models.Model(inputs, outputs)

# =====================================================
# CATEGORICAL FOCAL LOSS (CORRECT)
# =====================================================
def categorical_focal_loss(gamma=2.0, alpha=0.25):
    def loss(y_true, y_pred):
        y_pred = tf.clip_by_value(y_pred, 1e-7, 1 - 1e-7)
        ce = -tf.reduce_sum(y_true * tf.math.log(y_pred), axis=1)
        p_t = tf.reduce_sum(y_true * y_pred, axis=1)
        return alpha * tf.pow(1 - p_t, gamma) * ce
    return loss

# =====================================================
# TRAINING
# =====================================================
model.compile(
    optimizer=tf.keras.optimizers.Adam(2e-5),
    loss=categorical_focal_loss(),
    metrics=["accuracy"]
)

model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=15
)

# =====================================================
# FINE-TUNING
# =====================================================
base_model.trainable = True
for layer in base_model.layers[:200]:
    layer.trainable = False

model.compile(
    optimizer=tf.keras.optimizers.Adam(1e-5),
    loss=categorical_focal_loss(),
    metrics=["accuracy"]
)

callbacks = [
    tf.keras.callbacks.ReduceLROnPlateau(
        monitor="val_loss", patience=3, factor=0.3, min_lr=1e-7
    ),
    tf.keras.callbacks.EarlyStopping(
        monitor="val_loss", patience=6, restore_best_weights=True
    )
]

model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=15,
    callbacks=callbacks
)

# =====================================================
# TEST EVALUATION
# =====================================================
test_loss, test_acc = model.evaluate(test_ds)
print(f"\nTest Accuracy: {test_acc*100:.2f}%")

# =====================================================
# PREDICTIONS
# =====================================================
y_true = np.concatenate([y.numpy() for _, y in test_ds])
y_true = np.argmax(y_true, axis=1)

y_pred_proba = model.predict(test_ds)
y_pred = np.argmax(y_pred_proba, axis=1)

# =====================================================
# CLASSIFICATION REPORT
# =====================================================
print("\nClassification Report:\n")
print(classification_report(y_true, y_pred, target_names=class_names))

# =====================================================
# RE-SAVE MODEL FOR DEPLOYMENT (NO OPTIMIZER)
# =====================================================

NEW_MODEL_PATH = "/content/skin_disease_model_deploy.keras"

model.save(NEW_MODEL_PATH, include_optimizer=False)

print("Deployment-ready model saved successfully!")

# =====================================================
# DOWNLOAD THE NEW MODEL
# =====================================================
from google.colab import files

files.download(NEW_MODEL_PATH)

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/296.7 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m296.7/296.7 kB[0m [31m10.4 MB/s[0m eta [36m0:00:00[0m
[?25hUsing Colab cache for faster access to the 'skin-diseases-image-dataset' dataset.
Removed duplicate images: 688
Found 18522 files belonging to 10 classes.
Found 3964 files belonging to 10 classes.
Found 3979 files belonging to 10 classes.
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/densenet/densenet169_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m51877672/51877672[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Epoch 1/15
[1m579/579[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m393s[0m 600ms/step - accuracy: 0.2524 - loss: 0.4501 - val_accuracy: 0.5146 - val_loss: 0.2360
Epoch 2/15
[1m579/579[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m285s[0m 491ms/step - accuracy: 0.4517 - loss: 0.278

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
# =====================================================
# CONFUSION MATRIX
# =====================================================
cm = confusion_matrix(y_true, y_pred)
print("\nConfusion Matrix:\n", cm)

# =====================================================
# ROC–AUC (MULTI-CLASS OvR)
# =====================================================
y_true_bin = label_binarize(y_true, classes=np.arange(num_classes))

macro_roc_auc = roc_auc_score(
    y_true_bin,
    y_pred_proba,
    average="macro",
    multi_class="ovr"
)

print(f"\nMacro ROC–AUC (OvR): {macro_roc_auc:.4f}")

roc_auc_per_class = roc_auc_score(
    y_true_bin,
    y_pred_proba,
    average=None,
    multi_class="ovr"
)

print("\nPer-class ROC–AUC:")
for i, auc in enumerate(roc_auc_per_class):
    print(f"{class_names[i]}: {auc:.4f}")


Confusion Matrix:
 [[ 137   14    0   17    0    0    0   29   11   25]
 [  14  169    1   33    1    1    0   17   25   16]
 [   0    0  434    0    0   36    1    0    0    0]
 [  15   24    0   88    0    2    0   26    2   18]
 [   0    0    0    0  446   14   37    0    0    0]
 [   0    0   27    0    8 1138   20    0    0    0]
 [   0    0    1    0   33   44  231    0    1    0]
 [  37   29    1   22    0    0    0  139   16   58]
 [   7   30    0    6    2    1    0   10  200   19]
 [  17   19    0   12    0    1    0   24   11  161]]

Macro ROC–AUC (OvR): 0.9734

Per-class ROC–AUC:
1. Eczema 1677: 0.9617
10. Warts Molluscum and other Viral Infections - 2103: 0.9606
2. Melanoma 15.75k: 0.9960
3. Atopic Dermatitis - 1.25k: 0.9621
4. Basal Cell Carcinoma (BCC) 3323: 0.9946
5. Melanocytic Nevi (NV) - 7970: 0.9925
6. Benign Keratosis-like Lesions (BKL) 2624: 0.9757
7. Psoriasis pictures Lichen Planus and related diseases - 2k: 0.9479
8. Seborrheic Keratoses and other Benign Tumor

In [None]:
from google.colab import files
files.download("/content/skin_disease_model.keras")

FileNotFoundError: Cannot find file: /content/skin_disease_model.keras