In [1]:
import numpy as np 
import tensorflow as tf
import os
from tqdm import tqdm
from tensorflow.keras import layers, models
import joblib

## Create dataset from image data

In [2]:
images_root_path = '/kaggle/input/augmented-skin-conditions-image-dataset/Skin_Conditions'
list_disease_labels = os.listdir(images_root_path)
print("Skin diseases present in the dataset: ",','.join(list_disease_labels))
for disease_label in list_disease_labels:
    images = os.listdir(f'{images_root_path}/{disease_label}')
    print(f'\nDisease label: {disease_label}\nImaga files counts: {len(images)}\n')

Skin diseases present in the dataset:  Eczema,Acne,Milia,Rosacea,Keratosis,Carcinoma

Disease label: Eczema
Imaga files counts: 399


Disease label: Acne
Imaga files counts: 399


Disease label: Milia
Imaga files counts: 399


Disease label: Rosacea
Imaga files counts: 399


Disease label: Keratosis
Imaga files counts: 399


Disease label: Carcinoma
Imaga files counts: 399



In [3]:
def fetch_image_data():
    combined_images_path,disese_labels = [],[]
    for disease_label in list_disease_labels:
        current_disease_images_path = os.listdir(f'{images_root_path}/{disease_label}')
        current_disease_images_path = [f'{images_root_path}/{disease_label}/{cur_image}' for cur_image in current_disease_images_path]
        combined_images_path.extend(current_disease_images_path)
        disese_labels.extend([disease_label]*len(current_disease_images_path))
    print(f"Count images: {len(combined_images_path)}")
    print(f"Count labels: {len(disese_labels)}")
    # Shuffling data
    image_paths_shuffled,label_shuffled = [],[]
    permutation = np.random.permutation(len(combined_images_path))
    image_paths_shuffled = [combined_images_path[i] for i in permutation]
    label_shuffled = [disese_labels[i] for i in permutation]
    return image_paths_shuffled,label_shuffled

list_images_paths, list_labels = fetch_image_data()
print("First 5 images:",list_images_paths[:5])
print("First 5 labels:",list_labels[:5])

Count images: 2394
Count labels: 2394
First 5 images: ['/kaggle/input/augmented-skin-conditions-image-dataset/Skin_Conditions/Eczema/Eczema_105.jpg', '/kaggle/input/augmented-skin-conditions-image-dataset/Skin_Conditions/Eczema/Eczema_391.jpg', '/kaggle/input/augmented-skin-conditions-image-dataset/Skin_Conditions/Rosacea/Rosacea_5.jpg', '/kaggle/input/augmented-skin-conditions-image-dataset/Skin_Conditions/Carcinoma/Carcinoma_356.jpg', '/kaggle/input/augmented-skin-conditions-image-dataset/Skin_Conditions/Carcinoma/Carcinoma_250.jpg']
First 5 labels: ['Eczema', 'Eczema', 'Rosacea', 'Carcinoma', 'Carcinoma']


In [4]:
# Function to preprocess images (read, resize and normalize)
def preprocess_image(image_path):
    image = tf.io.read_file(image_path)
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.image.resize(image, [224, 224])  
    return image


In [5]:
# Function to load image and label
def load_image_and_label(image_path,label):
    image = preprocess_image(image_path)
    label = tf.reduce_min(tf.where(tf.equal(list_disease_labels, label)))
    return image, label

In [6]:
# Main function
def build_dataset(batch_size = 32):
    image_paths,labels = fetch_image_data()
    # Create TensorFlow dataset
    dataset = tf.data.Dataset.from_tensor_slices((image_paths,labels))
    dataset = dataset.map(lambda image_path, label: load_image_and_label(image_path,label))
    dataset.shuffle(len(image_paths))
    return dataset.batch(batch_size)

In [7]:
dataset = build_dataset()

Count images: 2394
Count labels: 2394


In [8]:

print("Dataset spec :",tf.data.DatasetSpec.from_value(dataset))
dataset_size = dataset.cardinality().numpy()
print("Dataset size :", dataset_size)


Dataset spec : DatasetSpec((TensorSpec(shape=(None, 224, 224, 3), dtype=tf.float32, name=None), TensorSpec(shape=(None,), dtype=tf.int64, name=None)), TensorShape([]))
Dataset size : 75


In [9]:
train_size,test_size = int(0.8*dataset_size),int(0.1*dataset_size)
train_dataset = dataset.take(train_size)
remaining_dataset = dataset.skip(train_size)
test_dataset = remaining_dataset.take(test_size)
val_dataset = remaining_dataset.skip(test_size)

# Optional: Prefetch for performance
train_dataset = train_dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
val_dataset = val_dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
test_dataset = test_dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)

print("Train dataset spec :",tf.data.DatasetSpec.from_value(train_dataset))
print("Eval dataset spec :",tf.data.DatasetSpec.from_value(val_dataset))
print("Test dataset spec :",tf.data.DatasetSpec.from_value(test_dataset))

Train dataset spec : DatasetSpec((TensorSpec(shape=(None, 224, 224, 3), dtype=tf.float32, name=None), TensorSpec(shape=(None,), dtype=tf.int64, name=None)), TensorShape([]))
Eval dataset spec : DatasetSpec((TensorSpec(shape=(None, 224, 224, 3), dtype=tf.float32, name=None), TensorSpec(shape=(None,), dtype=tf.int64, name=None)), TensorShape([]))
Test dataset spec : DatasetSpec((TensorSpec(shape=(None, 224, 224, 3), dtype=tf.float32, name=None), TensorSpec(shape=(None,), dtype=tf.int64, name=None)), TensorShape([]))


## Building CNN model

In [81]:
def classification_model(input_shape=(224, 224, 3), labels=list_disease_labels):
    num_classes = len(list_disease_labels)
    model = models.Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=input_shape),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(128, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(256, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dense(num_classes, activation='softmax')
    ])

    optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)

    # Compile the model
    model.compile(optimizer=optimizer,
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])
    return model

In [82]:
epochs = 50
model = classification_model()

# Add early stopping
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss',  
    patience=3,  
    start_from_epoch=10,
    restore_best_weights=True  
)

#training model
history = model.fit(train_dataset,validation_data=val_dataset,epochs = epochs, callbacks=[early_stopping])

training_loss = history.history['loss']
training_accuracy = history.history['accuracy']
val_loss = history.history['val_loss']
val_accuracy = history.history['val_accuracy']


Epoch 1/50
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 105ms/step - accuracy: 0.2063 - loss: 27.3431 - val_accuracy: 0.3480 - val_loss: 1.5749
Epoch 2/50
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 97ms/step - accuracy: 0.3634 - loss: 1.5266 - val_accuracy: 0.3360 - val_loss: 1.6252
Epoch 3/50
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 97ms/step - accuracy: 0.4666 - loss: 1.3332 - val_accuracy: 0.5280 - val_loss: 1.1806
Epoch 4/50
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 97ms/step - accuracy: 0.5452 - loss: 1.1905 - val_accuracy: 0.4600 - val_loss: 1.3764
Epoch 5/50
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 104ms/step - accuracy: 0.6291 - loss: 0.9766 - val_accuracy: 0.5640 - val_loss: 1.2688
Epoch 6/50
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 96ms/step - accuracy: 0.6935 - loss: 0.7966 - val_accuracy: 0.4680 - val_loss: 1.3975
Epoch 7/50
[1m60/60[0m [32m━

In [83]:
test_loss, test_accuracy = model.evaluate(test_dataset)
print("Test loss:",test_loss)
print("Test accuracy:",test_accuracy)

[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 40ms/step - accuracy: 0.5931 - loss: 2.0786
Test loss: 2.0905330181121826
Test accuracy: 0.5758928656578064


In [85]:
# Saving the model
finetuned_model.save('cnn_model.h5')

## Fine tuning Resnet50 model

In [13]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.applications import ResNet50


In [14]:
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
base_model.trainable = False
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation='relu')(x)
x = Dropout(0.5)(x)
predictions = Dense(6, activation='softmax')(x)
def lr_scheduler(epoch, lr):
    if epoch%10 ==9:
        return 0.8*lr
    else:
        return lr
finetuned_model = Model(inputs=base_model.input, outputs=predictions)
finetuned_model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate = lr_scheduler(0,0.01) ),
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m94765736/94765736[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [15]:
epochs = 50
model = classification_model()

# Add early stopping
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss',  
    patience=3,  
    start_from_epoch=10,
    restore_best_weights=True  
)

# training model
history = finetuned_model.fit(train_dataset,validation_data=val_dataset,epochs = epochs, callbacks=[early_stopping])

training_loss = history.history['loss']
training_accuracy = history.history['accuracy']
val_loss = history.history['val_loss']
val_accuracy = history.history['val_accuracy']


Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
I0000 00:00:1726511155.516291     101 service.cc:145] XLA service 0x7c25dc0498d0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1726511155.516366     101 service.cc:153]   StreamExecutor device (0): Tesla T4, Compute Capability 7.5
I0000 00:00:1726511155.516372     101 service.cc:153]   StreamExecutor device (1): Tesla T4, Compute Capability 7.5


[1m 2/60[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m4s[0m 83ms/step - accuracy: 0.1641 - loss: 9.6955  

I0000 00:00:1726511163.424069     101 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 278ms/step - accuracy: 0.4455 - loss: 13.3668 - val_accuracy: 0.7560 - val_loss: 0.6706
Epoch 2/50
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 138ms/step - accuracy: 0.7557 - loss: 0.7149 - val_accuracy: 0.8360 - val_loss: 0.4456
Epoch 3/50
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 138ms/step - accuracy: 0.8014 - loss: 0.5386 - val_accuracy: 0.8520 - val_loss: 0.4146
Epoch 4/50
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 139ms/step - accuracy: 0.8408 - loss: 0.4843 - val_accuracy: 0.8760 - val_loss: 0.4069
Epoch 5/50
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 141ms/step - accuracy: 0.8514 - loss: 0.4244 - val_accuracy: 0.8800 - val_loss: 0.3463
Epoch 6/50
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 140ms/step - accuracy: 0.8494 - loss: 0.4734 - val_accuracy: 0.8680 - val_loss: 0.4218
Epoch 7/50
[1m60/60[0m [32m━━━━━━━

In [16]:
test_loss, test_accuracy = finetuned_model.evaluate(test_dataset)
print("Test loss:",test_loss)
print("Test accuracy:",test_accuracy)

[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 89ms/step - accuracy: 0.8920 - loss: 0.5808
Test loss: 0.4540325999259949
Test accuracy: 0.9017857313156128


In [17]:
finetuned_model.save('resnet_50_finetuned.h5')
