In [116]:
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models, regularizers
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.utils import to_categorical
import cv2
import os
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, precision_score, recall_score, f1_score
from sklearn.utils import class_weight

In [117]:

train_dir = r"C:\Users\rrawa\skin\Derm\train"
test_dir = r"C:\Users\rrawa\skin\Derm\test"

In [118]:

label_names = sorted(os.listdir(train_dir))
label_map = {label: index for index, label in enumerate(label_names)}
print("Classes found:", label_names)

Classes found: ['Blackheads', 'Cyst', 'Papules', 'Pustules', 'Whiteheads']


In [119]:

def load_data(data_dir, img_size=(128, 128)):
    images = []
    labels = []

    for label in os.listdir(data_dir):
        label_dir = os.path.join(data_dir, label)
        if os.path.isdir(label_dir):
            for img_name in os.listdir(label_dir):
                img_path = os.path.join(label_dir, img_name)
                img = cv2.imread(img_path)  # Read in color (BGR)
                if img is not None:
                    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # Convert to RGB
                    img = cv2.resize(img, img_size)
                    images.append(img)
                    if label in label_map:
                        labels.append(label_map[label])
                else:
                    print(f"Warning: Could not load {img_path}")

    images = np.array(images, dtype="float32") / 255.0
    labels = to_categorical(np.array(labels), num_classes=len(label_names))
    return images, labels

In [120]:
# Compute class weights for imbalanced dataset
# Convert one-hot encoded labels back to class indices
train_label_indices = np.argmax(train_labels, axis=1)
class_weights = class_weight.compute_class_weight(
    class_weight='balanced',
    classes=np.unique(train_label_indices),
    y=train_label_indices
)
class_weights = dict(enumerate(class_weights))
print("Class weights calculated successfully.")


Class weights calculated successfully.


In [121]:
print("Train dir exists:", os.path.exists(train_dir))
print("Subfolders in train_dir:", os.listdir(train_dir))

Train dir exists: True
Subfolders in train_dir: ['Blackheads', 'Cyst', 'Papules', 'Pustules', 'Whiteheads']


In [122]:

# Load only test data (smaller dataset)
test_images, test_labels = load_data(test_dir)

# For training, we'll use ImageDataGenerator with flow_from_directory
# to avoid loading all images into memory at once
# First, create the training data generator
train_datagen = ImageDataGenerator(
    rotation_range=25,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.15,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode="nearest",
    rescale=1./255
)

val_datagen = ImageDataGenerator(rescale=1./255)

# Load generators from directory
train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(128, 128),
    batch_size=32,
    class_mode='categorical',
    shuffle=True
)

val_generator = val_datagen.flow_from_directory(
    train_dir,
    target_size=(128, 128),
    batch_size=32,
    class_mode='categorical',
    shuffle=False,
    subset='validation'  # This will need directory structure adjustment
)

# Alternative: Create validation split from training directory
# For now, we'll use a smaller validation set loaded into memory
train_images_small, train_labels_small = load_data(train_dir)
train_images, val_images, train_labels, val_labels = train_test_split(
    train_images_small, train_labels_small, test_size=0.2, random_state=42
)
del train_images_small, train_labels_small  # Free memory

Found 2778 images belonging to 5 classes.
Found 0 images belonging to 5 classes.


In [123]:

train_images, val_images, train_labels, val_labels = train_test_split(
    train_images, train_labels, test_size=0.2, random_state=42
)

In [124]:

datagen = ImageDataGenerator(
    rotation_range=25,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.15,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode="nearest"
)

In [125]:

model = models.Sequential([
    layers.Input(shape=(128, 128, 3)),

    layers.Conv2D(32, (3, 3), activation="relu", kernel_regularizer=regularizers.l2(0.001)),
    layers.BatchNormalization(),
    layers.MaxPooling2D(2, 2),

    layers.Conv2D(64, (3, 3), activation="relu", kernel_regularizer=regularizers.l2(0.001)),
    layers.BatchNormalization(),
    layers.MaxPooling2D(2, 2),

    layers.Conv2D(128, (3, 3), activation="relu", kernel_regularizer=regularizers.l2(0.001)),
    layers.BatchNormalization(),
    layers.MaxPooling2D(2, 2),

    layers.Conv2D(256, (3, 3), activation="relu", kernel_regularizer=regularizers.l2(0.001)),
    layers.BatchNormalization(),
    layers.MaxPooling2D(2, 2),

    layers.Flatten(),
    layers.Dropout(0.4),
    layers.Dense(512, activation="relu", kernel_regularizer=regularizers.l2(0.001)),
    layers.Dropout(0.5),
    layers.Dense(len(label_names), activation="softmax")
])

model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),
              loss="categorical_crossentropy",
              metrics=["accuracy"])

In [126]:

early_stopping = EarlyStopping(monitor="val_loss", patience=10, restore_best_weights=True)
lr_reduction = ReduceLROnPlateau(monitor="val_loss", patience=3, factor=0.5, min_lr=1e-7, verbose=1)
model_checkpoint = ModelCheckpoint("best_dermnet_model.keras", monitor="val_loss", save_best_only=True, verbose=1)

In [127]:
history = model.fit(
    datagen.flow(train_images, train_labels, batch_size=32),
    validation_data=(val_images, val_labels),
    epochs=25,  # reduce to save time
    class_weight=class_weights,
    callbacks=[early_stopping, lr_reduction, model_checkpoint],
    verbose=1
)

  self._warn_if_super_not_called()


Epoch 1/25
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 896ms/step - accuracy: 0.2471 - loss: 5.6591
Epoch 1: val_loss improved from None to 3.14002, saving model to best_dermnet_model.keras
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m68s[0m 1s/step - accuracy: 0.2774 - loss: 5.0949 - val_accuracy: 0.2517 - val_loss: 3.1400 - learning_rate: 1.0000e-04
Epoch 2/25
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 712ms/step - accuracy: 0.3350 - loss: 4.0720
Epoch 2: val_loss did not improve from 3.14002
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m45s[0m 746ms/step - accuracy: 0.3247 - loss: 3.9834 - val_accuracy: 0.1888 - val_loss: 3.8821 - learning_rate: 1.0000e-04
Epoch 3/25
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 697ms/step - accuracy: 0.3103 - loss: 3.6493
Epoch 3: val_loss did not improve from 3.14002
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 727ms/step - accuracy: 0.3286 - lo

In [129]:
for layer in model.layers[-20:]:
    layer.trainable = True

# train_labels are one-hot encoded, so use categorical_crossentropy
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.fit(
    datagen.flow(train_images, train_labels, batch_size=32),
    validation_data=(val_images, val_labels),
    epochs=5,
    class_weight=class_weights
)


Epoch 1/5
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 764ms/step - accuracy: 0.2898 - loss: 4.6423 - val_accuracy: 0.2427 - val_loss: 5.8812
Epoch 2/5
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 790ms/step - accuracy: 0.2763 - loss: 3.6345 - val_accuracy: 0.2494 - val_loss: 3.7224
Epoch 3/5
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 768ms/step - accuracy: 0.2802 - loss: 3.2902 - val_accuracy: 0.2539 - val_loss: 5.8164
Epoch 4/5
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 754ms/step - accuracy: 0.2915 - loss: 3.1711 - val_accuracy: 0.3011 - val_loss: 5.5108
Epoch 5/5
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 767ms/step - accuracy: 0.3146 - loss: 3.1507 - val_accuracy: 0.2742 - val_loss: 4.1144


<keras.src.callbacks.history.History at 0x235aef62b50>

In [130]:
import json

with open("dermnet_training_history.json", "w") as f:
    json.dump(history.history, f)

In [131]:
test_images, test_labels = load_data(r"C:\Users\rrawa\skin\Derm\test")


In [132]:
from tensorflow.keras.models import load_model
import pickle


# Save model
model.save("dermnet_skin_disease_model.keras")
# Save training history
with open("dermnet_training_history.pkl", "wb") as f:
    pickle.dump(history.history, f)
# Later when running webcam
model = load_model("dermnet_skin_disease_model.keras")

In [133]:
import os
print(os.path.getsize("dermnet_training_history.pkl"))  # Should be > 0
print("Saved keys:", history.history.keys())

1221
Saved keys: dict_keys(['accuracy', 'loss', 'val_accuracy', 'val_loss', 'learning_rate'])


In [134]:
from tensorflow.keras.models import load_model

In [135]:
print(type(test_images))
print(type(test_labels))
try:
    print(test_images.shape, test_labels.shape)
except:
    print("Not numpy arrays (likely generators)")


<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
(918, 128, 128, 3) (918, 5)


In [136]:
# val_generator may be empty if subset/validation_split was not configured,
# fallback to using the in-memory validation arrays (val_images, val_labels).
try:
	test_loss, test_acc = model.evaluate(val_generator, verbose=1)
except ValueError:
	test_loss, test_acc = model.evaluate(val_images, val_labels, verbose=1)

print(f"Test Accuracy: {test_acc:.4f}")
print(f"Test Loss: {test_loss:.4f}")


[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 121ms/step - accuracy: 0.2742 - loss: 4.1144
Test Accuracy: 0.2742
Test Loss: 4.1144
