<h1><b>Importing Libraries</b></h1>

In [None]:
#import os
#for dirname, _, filenames in os.walk('/kaggle/input'):
#    for filename in filenames:
#        os.path.join(dirname, filename)

In [None]:
import numpy as np
import pandas as pd
import random
import os
import cv2
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
import keras
from collections import Counter
from tqdm import tqdm
from keras.callbacks import EarlyStopping, ModelCheckpoint
from sklearn.metrics import confusion_matrix , accuracy_score
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

from sklearn.metrics import recall_score, precision_score, f1_score, matthews_corrcoef, confusion_matrix, accuracy_score
#from imblearn.metrics import geometric_mean_score
import seaborn as sns
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings('ignore')

import torch
from torchvision import datasets

<h1><b>Dataset Loading</b></h1>

In [None]:
dataset_path = '/kaggle/input/51-skin-disease/Best_50_class'

In [None]:
dataset = datasets.ImageFolder(root= dataset_path)
class_names = dataset.classes
print(class_names)

In [None]:
# dataset.targets has numeric labels for each image
counts = Counter(dataset.targets)

# Map counts to class names
for class_idx, count in counts.items():
    print(f"{dataset.classes[class_idx]}: {count} images")

In [None]:
df_counts = pd.DataFrame({
    "Class": [dataset.classes[idx] for idx in counts.keys()],
    "Count": [counts[idx] for idx in counts.keys()]
})

# Save to CSV
df_counts.to_csv("class_counts.csv", index=False)

print("Counts saved to class_counts.csv")

<h1><b>EfficientNetB0</b></h1>

<h2><b>Datset Splitting</b></h2>

In [None]:
import tensorflow as tf
from tensorflow.keras.applications.efficientnet import EfficientNetB0, preprocess_input
from tensorflow.keras import layers, models

# =========================
# Dataset creation
# =========================
data_dir = dataset_path  # your dataset folder
img_size = (224, 224)    # EfficientNetB0 expects 224x224
batch_size = 32

# =========================
# Step 1: 70% training set
# =========================
train_ds = tf.keras.preprocessing.image_dataset_from_directory(
    data_dir,
    validation_split=0.30,   # leave 30% for val+test
    subset="training",
    seed=123,
    image_size=img_size,
    batch_size=batch_size,
    label_mode='categorical'
)

# =========================
# Step 2: 30% val+test pool
# =========================
val_test_ds = tf.keras.preprocessing.image_dataset_from_directory(
    data_dir,
    validation_split=0.30,
    subset="validation",
    seed=123,
    image_size=img_size,
    batch_size=batch_size,
    label_mode='categorical'
)

# =========================
# Step 3: Split val_test_ds into 15% val + 15% test
# =========================
val_test_size = val_test_ds.cardinality().numpy()
val_size = val_test_size // 2
test_size = val_test_size - val_size  # handles odd numbers safely

val_ds = val_test_ds.take(val_size)
test_ds = val_test_ds.skip(val_size)

# =========================
# Preprocessing function
# =========================
AUTOTUNE = tf.data.AUTOTUNE

def preprocess_batch(images, labels):
    images = preprocess_input(images)  # EfficientNetB0 preprocessing
    return images, labels

train_ds = train_ds.map(preprocess_batch, num_parallel_calls=AUTOTUNE)
val_ds = val_ds.map(preprocess_batch, num_parallel_calls=AUTOTUNE)
test_ds = test_ds.map(preprocess_batch, num_parallel_calls=AUTOTUNE)

# Prefetch for performance
train_ds = train_ds.prefetch(buffer_size=AUTOTUNE)
val_ds = val_ds.prefetch(buffer_size=AUTOTUNE)
test_ds = test_ds.prefetch(buffer_size=AUTOTUNE)

In [None]:
batch_size = 32  # Use the same batch size you set

def dataset_size(dataset):
    # Get number of batches
    batches = dataset.cardinality().numpy()
    if batches == tf.data.INFINITE_CARDINALITY or batches == tf.data.UNKNOWN_CARDINALITY:
        return "Unknown size"
    else:
        return batches * batch_size

print("Train set size:", dataset_size(train_ds))
print("Validation set size:", dataset_size(val_ds))
print("Test set size:", dataset_size(test_ds))

<h2><b>Model Compiling</b></h2>

In [None]:
# =========================
# Model creation
# =========================
num_classes = len(val_test_ds.class_names)

base_model = EfficientNetB0(
    weights='imagenet',
    include_top=False,
    input_shape=(img_size[0], img_size[1], 3)
)
base_model.trainable = False  # Freeze base model

model = models.Sequential([
    base_model,
    layers.GlobalAveragePooling2D(),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(num_classes, activation='softmax')
])

model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)
model.summary()

In [None]:
from tensorflow.keras.callbacks import EarlyStopping

early_stopping = EarlyStopping(
    monitor='val_loss',      # Metric to monitor
    patience=5,              # Number of epochs with no improvement to wait
    restore_best_weights=True  # Restore model weights from the epoch with best val_loss
)

from tensorflow.keras.callbacks import ModelCheckpoint

checkpoint = ModelCheckpoint(
    'best_model_efficientnetb0.h5',        # File path to save the model
    monitor='val_loss',     # Metric to monitor
    save_best_only=True,    # Save only when improvement
    verbose=1
)

<h2><b>Model Training</b></h2>

In [None]:
history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=50,
    callbacks=[early_stopping, checkpoint]
)

<h2><b>Results</b></h2>

In [None]:
hist_training_efficientnet=pd.DataFrame(history.history)
hist_training_efficientnet
# Save to CSV
hist_training_efficientnet.to_csv("hist_training_efficientnet.csv", index=False)
print("EfficientNetB0 Training history to hist_training_efficientnet.csv")

In [None]:
#sections to modify
fig, axs = plt.subplots(1, 2, figsize=(20, 6))

num_epochs = len(history.history['accuracy'])  # total epochs trained

# X ticks positions (integers from 0 to num_epochs, step 4)
xticks = range(0, num_epochs + 1, 4)

# Plot accuracy
axs[0].plot(history.history['accuracy'], label='Train Accuracy')
axs[0].plot(history.history['val_accuracy'], label='Validation Accuracy')
axs[0].set_xlabel('Epoch', fontsize=20)
axs[0].set_ylabel('Accuracy', fontsize=20)
axs[0].set_title('Training and Validation Accuracy - EfficientNetB0', fontsize=22)
axs[0].legend(fontsize=18)
axs[0].set_xticks(xticks)
axs[0].tick_params(axis='x', labelsize=18)
axs[0].tick_params(axis='y', labelsize=18)
axs[0].grid(True)

# Plot loss
axs[1].plot(history.history['loss'], label='Train Loss')
axs[1].plot(history.history['val_loss'], label='Validation Loss')
axs[1].set_xlabel('Epoch', fontsize=20)
axs[1].set_ylabel('Loss', fontsize=20)
axs[1].set_title('Training and Validation Loss - EfficientNetB0', fontsize=22)
axs[1].legend(fontsize=18)
axs[1].set_xticks(xticks)
axs[1].tick_params(axis='x', labelsize=18)
axs[1].tick_params(axis='y', labelsize=18)
axs[1].grid(True)

plt.tight_layout()

# Save the combined figure
plt.savefig('efficientnet_training_curves.png', dpi=600)
plt.savefig('efficientnet_training_curves.pdf')

plt.show()

In [None]:
from sklearn.metrics import confusion_matrix, accuracy_score, recall_score, precision_score, f1_score, matthews_corrcoef

# 1. Get true and predicted labels
y_true = []
y_pred = []

for images, labels in test_ds:
    preds = model.predict(images)
    y_true.extend(np.argmax(labels.numpy(), axis=1))
    y_pred.extend(np.argmax(preds, axis=1))

y_true = np.array(y_true)
y_pred = np.array(y_pred)

# 2. Compute confusion matrix and metrics
cm = confusion_matrix(y_true, y_pred)
accuracy = accuracy_score(y_true, y_pred)
mcc = matthews_corrcoef(y_true, y_pred)
recall = recall_score(y_true, y_pred, average='weighted')
precision = precision_score(y_true, y_pred, average='weighted')
f1 = f1_score(y_true, y_pred, average='weighted')

print(f"Accuracy: {accuracy:.4f}")
print(f"Recall: {recall:.4f}")
print(f"Precision: {precision:.4f}")
print(f"F1 Score: {f1:.4f}")
print(f'Matthews Correlation Coefficient: {mcc:.4f}')

# 3. Plot confusion matrix
plt.figure(figsize=(30, 25))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
            xticklabels= class_names, yticklabels= class_names)
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.title('Confusion Matrix')
plt.tight_layout()
plt.savefig('efficinetnet_confusion_matrix.png', dpi=600)
plt.savefig('efficientnet_confusion_matrix.pdf')
plt.show()

# 4. Save metrics to CSV
metrics = {
    'Accuracy': [accuracy],
    'Recall': [recall],
    'Precision': [precision],
    'F1 Score': [f1],
    'MCC':[mcc]
}
# Save confusion matrix as CSV
df_cm = pd.DataFrame(cm, index=class_names, columns=class_names)
df_cm.to_csv("confusion_matrix_efficientnet.csv")

df_metrics_efficientnet = pd.DataFrame(metrics)
df_metrics_efficientnet.to_csv('performance_metrics_efficientnet.csv', index=False)

<h1><b>ConvNext</b></h1>

In [None]:
import tensorflow as tf
from tensorflow.keras.applications.convnext import ConvNeXtSmall, preprocess_input
from tensorflow.keras import layers, models, optimizers

# =========================
# Parameters
# =========================
data_dir = dataset_path  # your dataset folder
img_size = (224, 224)
batch_size = 32

# =========================
# Step 1: 70% Training Set
# =========================
train_ds = tf.keras.preprocessing.image_dataset_from_directory(
    data_dir,
    validation_split=0.30,    # leave 30% for val+test
    subset="training",
    seed=123,
    image_size=img_size,
    batch_size=batch_size,
    label_mode='categorical'
)

# =========================
# Step 2: 30% Val+Test Pool
# =========================
val_test_ds = tf.keras.preprocessing.image_dataset_from_directory(
    data_dir,
    validation_split=0.30,
    subset="validation",
    seed=123,
    image_size=img_size,
    batch_size=batch_size,
    label_mode='categorical'
)

# =========================
# Step 3: Split Val+Test into 15% Val + 15% Test
# =========================
val_test_size = val_test_ds.cardinality().numpy()
val_size = val_test_size // 2
test_size = val_test_size - val_size

val_ds = val_test_ds.take(val_size)
test_ds = val_test_ds.skip(val_size)

# =========================
# Step 4: Preprocessing Function
# =========================
AUTOTUNE = tf.data.AUTOTUNE

def preprocess_batch(images, labels):
    images = preprocess_input(images)  # ConvNeXt preprocessing
    return images, labels

train_ds = train_ds.map(preprocess_batch, num_parallel_calls=AUTOTUNE)
val_ds   = val_ds.map(preprocess_batch, num_parallel_calls=AUTOTUNE)
test_ds  = test_ds.map(preprocess_batch, num_parallel_calls=AUTOTUNE)

# =========================
# Step 5: Prefetch for Performance
# =========================
train_ds = train_ds.prefetch(buffer_size=AUTOTUNE)
val_ds   = val_ds.prefetch(buffer_size=AUTOTUNE)
test_ds  = test_ds.prefetch(buffer_size=AUTOTUNE)

# =========================
# Step 6: Verify Dataset Sizes
# =========================
num_train = sum([images.shape[0] for images, labels in train_ds])
num_val   = sum([images.shape[0] for images, labels in val_ds])
num_test  = sum([images.shape[0] for images, labels in test_ds])

print(f"Train images: {num_train}")
print(f"Validation images: {num_val}")
print(f"Test images: {num_test}")


<h2><b>Model Compiling</b></h2>

In [None]:
# =========================
# Model
# =========================
num_classes = len(class_names)

base_model = ConvNeXtSmall(
    weights='imagenet',
    include_top=False,
    input_shape=(img_size[0], img_size[1], 3)
)
base_model.trainable = False  # Freeze base model

# Custom classification head
x = base_model.output
x = layers.GlobalAveragePooling2D()(x)
x = layers.Dense(64, activation='relu')(x)
x = layers.Dropout(0.3)(x)
output = layers.Dense(num_classes, activation='softmax')(x)

model = models.Model(inputs=base_model.input, outputs=output)

# Compile
model.compile(
    optimizer=optimizers.Adam(learning_rate=1e-4),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)


In [None]:
model.summary()

In [None]:
from tensorflow.keras.callbacks import EarlyStopping

early_stopping = EarlyStopping(
    monitor='val_loss',      # Metric to monitor
    patience=5,              # Number of epochs with no improvement to wait
    restore_best_weights=True  # Restore model weights from the epoch with best val_loss
)

from tensorflow.keras.callbacks import ModelCheckpoint

checkpoint = ModelCheckpoint(
    'best_model_convnext.h5',        # File path to save the model
    monitor='val_loss',     # Metric to monitor
    save_best_only=True,    # Save only when improvement
    verbose=1
)

<h2><b>Model Training</b></h2>

In [None]:
history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=50,
    callbacks=[early_stopping, checkpoint]
)

<h2><b>Results</b></h2>

In [None]:
hist_training_convnext=pd.DataFrame(history.history)
hist_training_convnext
# Save to CSV
hist_training_convnext.to_csv("hist_training_convnext.csv", index=False)
print("ConvNext Training history to hist_training_convnext.csv")

In [None]:
#sections to modify
fig, axs = plt.subplots(1, 2, figsize=(20, 6))

num_epochs = len(history.history['accuracy'])  # total epochs trained

# X ticks positions (integers from 0 to num_epochs, step 4)
xticks = range(0, num_epochs + 1, 4)

# Plot accuracy
axs[0].plot(history.history['accuracy'], label='Train Accuracy')
axs[0].plot(history.history['val_accuracy'], label='Validation Accuracy')
axs[0].set_xlabel('Epoch', fontsize=20)
axs[0].set_ylabel('Accuracy', fontsize=20)
axs[0].set_title('Training and Validation Accuracy - ConvNext', fontsize=22)
axs[0].legend(fontsize=18)
axs[0].set_xticks(xticks)
axs[0].tick_params(axis='x', labelsize=18)
axs[0].tick_params(axis='y', labelsize=18)
axs[0].grid(True)

# Plot loss
axs[1].plot(history.history['loss'], label='Train Loss')
axs[1].plot(history.history['val_loss'], label='Validation Loss')
axs[1].set_xlabel('Epoch', fontsize=20)
axs[1].set_ylabel('Loss', fontsize=20)
axs[1].set_title('Training and Validation Loss - ConvNext', fontsize=22)
axs[1].legend(fontsize=18)
axs[1].set_xticks(xticks)
axs[1].tick_params(axis='x', labelsize=18)
axs[1].tick_params(axis='y', labelsize=18)
axs[1].grid(True)

plt.tight_layout()

# Save the combined figure
plt.savefig('convnext_training_curves.png', dpi=600)
plt.savefig('convnext_training_curves.pdf')

plt.show()


In [None]:
from sklearn.metrics import confusion_matrix, accuracy_score, recall_score, precision_score, f1_score, matthews_corrcoef

# 1. Get true and predicted labels
y_true = []
y_pred = []

for images, labels in test_ds:
    preds = model.predict(images)
    y_true.extend(np.argmax(labels.numpy(), axis=1))
    y_pred.extend(np.argmax(preds, axis=1))

y_true = np.array(y_true)
y_pred = np.array(y_pred)

# 2. Compute confusion matrix and metrics
cm = confusion_matrix(y_true, y_pred)
accuracy = accuracy_score(y_true, y_pred)
mcc = matthews_corrcoef(y_true, y_pred)
recall = recall_score(y_true, y_pred, average='weighted')
precision = precision_score(y_true, y_pred, average='weighted')
f1 = f1_score(y_true, y_pred, average='weighted')

print(f"Accuracy: {accuracy:.4f}")
print(f"Recall: {recall:.4f}")
print(f"Precision: {precision:.4f}")
print(f"F1 Score: {f1:.4f}")
print(f'Matthews Correlation Coefficient: {mcc:.4f}')

# 3. Plot confusion matrix
plt.figure(figsize=(30, 25))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
            xticklabels= class_names, yticklabels= class_names)
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.title('Confusion Matrix')
plt.tight_layout()
plt.savefig('convnext_confusion_matrix.png', dpi=600)
plt.savefig('convnext_confusion_matrix.pdf')
plt.show()

# 4. Save metrics to CSV
metrics = {
    'Accuracy': [accuracy],
    'Recall': [recall],
    'Precision': [precision],
    'F1 Score': [f1],
    'MCC':[mcc]
}
# Save confusion matrix as CSV
df_cm = pd.DataFrame(cm, index=class_names, columns=class_names)
df_cm.to_csv("confusion_matrix_convnext.csv")

df_metrics_xception = pd.DataFrame(metrics)
df_metrics_xception.to_csv('performance_metrics_convnext.csv', index=False)

<h1><b>ResNet50</b></h1>

<h2><b>Model Compiling</b></h2>

In [None]:
from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input
from tensorflow.keras import layers, models, optimizers
import tensorflow as tf

# =========================
# Model
# =========================

# Load ResNet50 base model (pretrained on ImageNet)
base_model = ResNet50(
    weights='imagenet',
    include_top=False,                # remove default classification head
    input_shape=(img_size[0], img_size[1], 3)
)
base_model.trainable = False          # Freeze base model for transfer learning

# Custom classification head
x = base_model.output
x = layers.GlobalAveragePooling2D()(x)
x = layers.Dense(64, activation='relu')(x)
x = layers.Dropout(0.3)(x)
output = layers.Dense(num_classes, activation='softmax')(x)

# Build final model
model = models.Model(inputs=base_model.input, outputs=output)

# Compile model
model.compile(
    optimizer=optimizers.Adam(learning_rate=1e-4),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

In [None]:
model.summary()

In [None]:
from tensorflow.keras.callbacks import EarlyStopping

early_stopping = EarlyStopping(
    monitor='val_loss',      # Metric to monitor
    patience=5,              # Number of epochs with no improvement to wait
    restore_best_weights=True  # Restore model weights from the epoch with best val_loss
)

from tensorflow.keras.callbacks import ModelCheckpoint

checkpoint = ModelCheckpoint(
    'best_model_resnet50.h5',        # File path to save the model
    monitor='val_loss',     # Metric to monitor
    save_best_only=True,    # Save only when improvement
    verbose=1
)

<h2><b>Model Training</b></h2>

In [None]:
history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=50,
    callbacks=[early_stopping, checkpoint]
)

<h2><b>Results</b></h2>

In [None]:
hist_training_resnet50=pd.DataFrame(history.history)
hist_training_resnet50
# Save to CSV
hist_training_resnet50.to_csv("hist_training_resnet50.csv", index=False)
print("ResNet50 Training history to hist_training_resnet50.csv")

In [None]:
#sections to modify
fig, axs = plt.subplots(1, 2, figsize=(20, 6))

num_epochs = len(history.history['accuracy'])  # total epochs trained

# X ticks positions (integers from 0 to num_epochs, step 4)
xticks = range(0, num_epochs + 1, 4)

# Plot accuracy
axs[0].plot(history.history['accuracy'], label='Train Accuracy')
axs[0].plot(history.history['val_accuracy'], label='Validation Accuracy')
axs[0].set_xlabel('Epoch', fontsize=20)
axs[0].set_ylabel('Accuracy', fontsize=20)
axs[0].set_title('Training and Validation Accuracy - ResNet50', fontsize=22)
axs[0].legend(fontsize=18)
axs[0].set_xticks(xticks)
axs[0].tick_params(axis='x', labelsize=18)
axs[0].tick_params(axis='y', labelsize=18)
axs[0].grid(True)

# Plot loss
axs[1].plot(history.history['loss'], label='Train Loss')
axs[1].plot(history.history['val_loss'], label='Validation Loss')
axs[1].set_xlabel('Epoch', fontsize=20)
axs[1].set_ylabel('Loss', fontsize=20)
axs[1].set_title('Training and Validation Loss - ResNet50', fontsize=22)
axs[1].legend(fontsize=18)
axs[1].set_xticks(xticks)
axs[1].tick_params(axis='x', labelsize=18)
axs[1].tick_params(axis='y', labelsize=18)
axs[1].grid(True)

plt.tight_layout()

# Save the combined figure
plt.savefig('resnet50_training_curves.png', dpi=600)
plt.savefig('resnet50_training_curves.pdf')

plt.show()


In [None]:
from sklearn.metrics import confusion_matrix, accuracy_score, recall_score, precision_score, f1_score, matthews_corrcoef

# 1. Get true and predicted labels
y_true = []
y_pred = []

for images, labels in test_ds:
    preds = model.predict(images)
    y_true.extend(np.argmax(labels.numpy(), axis=1))
    y_pred.extend(np.argmax(preds, axis=1))

y_true = np.array(y_true)
y_pred = np.array(y_pred)

# 2. Compute confusion matrix and metrics
cm = confusion_matrix(y_true, y_pred)
accuracy = accuracy_score(y_true, y_pred)
mcc = matthews_corrcoef(y_true, y_pred)
recall = recall_score(y_true, y_pred, average='weighted')
precision = precision_score(y_true, y_pred, average='weighted')
f1 = f1_score(y_true, y_pred, average='weighted')

print(f"Accuracy: {accuracy:.4f}")
print(f"Recall: {recall:.4f}")
print(f"Precision: {precision:.4f}")
print(f"F1 Score: {f1:.4f}")
print(f'Matthews Correlation Coefficient: {mcc:.4f}')

# 3. Plot confusion matrix
plt.figure(figsize=(30, 25))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
            xticklabels= class_names, yticklabels= class_names)
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.title('Confusion Matrix')
plt.tight_layout()
plt.savefig('resnet50_confusion_matrix.png', dpi=600)
plt.savefig('resnet50_confusion_matrix.pdf')
plt.show()

# 4. Save metrics to CSV
metrics = {
    'Accuracy': [accuracy],
    'Recall': [recall],
    'Precision': [precision],
    'F1 Score': [f1],
    'MCC':[mcc]
}
# Save confusion matrix as CSV
df_cm = pd.DataFrame(cm, index=class_names, columns=class_names)
df_cm.to_csv("confusion_matrix_resnet50.csv")

df_metrics_resnet50 = pd.DataFrame(metrics)
df_metrics_resnet50.to_csv('performance_metrics_resnet50.csv', index=False)

<h1><b>Zipping all outputs</b></h1>

In [None]:
!zip -r /kaggle/working/output_files_efficientnet_convnext_resnet50.zip /kaggle/working/*