In [1]:
import tensorflow as tf
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.metrics import Precision, Recall
from tensorflow.keras import backend as K
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping, ModelCheckpoint
from sklearn.utils.class_weight import compute_class_weight
# from google.colab import drive
import numpy as np
import os

# Set up constants
IMG_SIZE = (224, 224)  # ResNet50 input size
BATCH_SIZE = 32
EPOCHS = 150  # Increased max epochs, early stopping will prevent unnecessary training
# drive.mount('/content/drive')

# Define the path to the images folder
data_dir = '/kaggle/input/images-skinlesion/images'
print("Contents of data_dir:", os.listdir(data_dir))

print("Contents of data_dir:")
print(os.listdir(data_dir))


Contents of data_dir: ['MEL', 'SCC', 'NV', 'BCC']
Contents of data_dir:
['MEL', 'SCC', 'NV', 'BCC']


In [2]:

# Update class names to include SCC
class_names = ['MEL', 'NV', 'BCC', 'SCC']
for class_name in class_names:
    if not os.path.isdir(os.path.join(data_dir, class_name)):
        raise ValueError(f"Folder {class_name} not found in {data_dir}")

# Set up data generators with increased augmentation
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    vertical_flip=True,
    fill_mode='nearest',
    validation_split=0.2
)

train_generator = train_datagen.flow_from_directory(
    data_dir,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    subset='training',
    classes=class_names,
    shuffle=True
)

validation_generator = train_datagen.flow_from_directory(
    data_dir,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    subset='validation',
    classes=class_names,
    shuffle=True
)

# Load pre-trained ResNet50 model
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

# Fine-tune the model
for layer in base_model.layers:
    layer.trainable = True

# Add custom layers
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation='relu')(x)
x = Dropout(0.5)(x)
x = Dense(512, activation='relu')(x)
x = Dropout(0.3)(x)
output = Dense(4, activation='softmax')(x)  # 4 classes

# Create the final model
model = Model(inputs=base_model.input, outputs=output)

# Define F1 Score metric as a class
class F1Score(tf.keras.metrics.Metric):
    def __init__(self, name='f1_score', **kwargs):
        super().__init__(name=name, **kwargs)
        self.precision = Precision()
        self.recall = Recall()

    def update_state(self, y_true, y_pred, sample_weight=None):
        self.precision.update_state(y_true, y_pred, sample_weight)
        self.recall.update_state(y_true, y_pred, sample_weight)

    def result(self):
        precision = self.precision.result()
        recall = self.recall.result()
        return 2 * ((precision * recall) / (precision + recall + K.epsilon()))

    def reset_states(self):
        self.precision.reset_states()
        self.recall.reset_states()

# Compile the model with additional metrics
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),
              loss='categorical_crossentropy',
              metrics=['accuracy', Precision(), Recall(), F1Score()])

# Compute class weights
class_weights = compute_class_weight('balanced', classes=np.unique(train_generator.classes), y=train_generator.classes)
class_weight_dict = dict(enumerate(class_weights))


# Define callbacks
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=1e-6)
early_stopping = EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True)
model_checkpoint = ModelCheckpoint('/kaggle/working/weights_resnet50_best_.keras',
                                   save_best_only=True, monitor='val_accuracy', mode='max')



Found 7174 images belonging to 4 classes.
Found 1791 images belonging to 4 classes.
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m94765736/94765736[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 0us/step


In [3]:
# Train the model with class weights and callbacks
history = model.fit(
    train_generator,
    steps_per_epoch=train_generator.samples // BATCH_SIZE,
    validation_data=validation_generator,
    validation_steps=validation_generator.samples // BATCH_SIZE,
    epochs=EPOCHS,
    class_weight=class_weight_dict,
    callbacks=[reduce_lr, early_stopping, model_checkpoint]
)

# Save the final model
model.save('/kaggle/working/skin_lesion_classifier_resnet50_final.keras')


Epoch 1/150


  self._warn_if_super_not_called()
I0000 00:00:1726386684.526573     116 service.cc:145] XLA service 0x784c500039c0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1726386684.526632     116 service.cc:153]   StreamExecutor device (0): Tesla T4, Compute Capability 7.5
I0000 00:00:1726386684.526636     116 service.cc:153]   StreamExecutor device (1): Tesla T4, Compute Capability 7.5

I0000 00:00:1726386715.848465     116 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m130/224[0m [32m━━━━━━━━━━━[0m[37m━━━━━━━━━[0m [1m1:24[0m 898ms/step - accuracy: 0.5722 - f1_score: 0.5503 - loss: 1.0820 - precision: 0.6763 - recall: 0.4668




[1m224/224[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m293s[0m 999ms/step - accuracy: 0.6177 - f1_score: 0.6037 - loss: 0.9481 - precision: 0.7071 - recall: 0.5304 - val_accuracy: 0.0795 - val_f1_score: 0.0135 - val_loss: 1.6841 - val_precision: 0.0788 - val_recall: 0.0074 - learning_rate: 1.0000e-04
Epoch 2/150
[1m  1/224[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m1:17[0m 347ms/step - accuracy: 0.7500 - f1_score: 0.7619 - loss: 0.4908 - precision: 0.7742 - recall: 0.7500

  self.gen.throw(typ, value, traceback)


[1m224/224[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 19ms/step - accuracy: 0.7500 - f1_score: 0.7619 - loss: 0.4908 - precision: 0.7742 - recall: 0.7500 - val_accuracy: 0.0323 - val_f1_score: 0.0000e+00 - val_loss: 1.6946 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00 - learning_rate: 1.0000e-04
Epoch 3/150
[1m224/224[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m153s[0m 667ms/step - accuracy: 0.7507 - f1_score: 0.7489 - loss: 0.4821 - precision: 0.7803 - recall: 0.7198 - val_accuracy: 0.7477 - val_f1_score: 0.7477 - val_loss: 0.8878 - val_precision: 0.7477 - val_recall: 0.7477 - learning_rate: 1.0000e-04
Epoch 4/150
[1m224/224[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step - accuracy: 0.7812 - f1_score: 0.7119 - loss: 0.5638 - precision: 0.7778 - recall: 0.6562 - val_accuracy: 0.8065 - val_f1_score: 0.8065 - val_loss: 0.8031 - val_precision: 0.8065 - val_recall: 0.8065 - learning_rate: 1.0000e-04
Epoch 5/150
[1m224/224[0m [32m━━━━━━━━━━━━━━━━━

In [4]:
# # Print class indices
# print("Class indices:", train_generator.class_indices)

# # Function to predict image
# def predict_image(img_path, model, nv_threshold=0.7):
#     img = tf.keras.preprocessing.image.load_img(img_path, target_size=IMG_SIZE)
#     img_array = tf.keras.preprocessing.image.img_to_array(img)
#     img_array = np.expand_dims(img_array, axis=0)
#     img_array /= 255.0

#     prediction = model.predict(img_array)

#     if np.argmax(prediction) == class_names.index('NV') and prediction[0][class_names.index('NV')] < nv_threshold:
#         predicted_class = class_names[np.argsort(prediction[0])[-2]]
#     else:
#         predicted_class = class_names[np.argmax(prediction)]

#     confidence = np.max(prediction)

#     return predicted_class, confidence

# # Directory containing test images
# finish_dir = '/content/drive/My Drive/finish'

# # Interactive prediction loop
# while True:
#     user_input = input("Enter an image number (1-1000) or 'q' to quit: ")

#     if user_input.lower() == 'q':
#         break

#     try:
#         image_number = int(user_input)

#         for filename in os.listdir(finish_dir):
#             if filename.startswith(f"{image_number}.") and filename.lower().endswith(('.png', '.jpg', '.jpeg')):
#                 img_path = os.path.join(finish_dir, filename)

#                 predicted_class, confidence = predict_image(img_path, model)

#                 print(f"Image: {filename}")
#                 print(f"Predicted class: {predicted_class}")
#                 print(f"Confidence: {confidence:.2f}")
#                 print()
#                 break
#         else:
#             print(f"No image found with number {image_number}")

#     except ValueError:
#         print("Invalid input. Please enter a number or 'q' to quit.")

# print("Thank you for using the classifier!")

# # Evaluation
# from tensorflow.keras.models import load_model

# # Load the best model
# # best_model = load_model('/content/drive/My Drive/models/skin_lesion_classifier_resnet50_best.h5',
# #                         custom_objects={'F1Score': F1Score})
# # Load the best model
# best_model = load_model('/content/drive/My Drive/models/skin_lesion_classifier_resnet50_best.keras',
#                         custom_objects={'F1Score': F1Score})
# # Set up data generator for evaluation
# eval_datagen = ImageDataGenerator(rescale=1./255)

# eval_generator = eval_datagen.flow_from_directory(
#     data_dir,
#     target_size=IMG_SIZE,
#     batch_size=BATCH_SIZE,
#     class_mode='categorical',
#     shuffle=False
# )

# # Evaluate the model
# scores = best_model.evaluate(eval_generator, verbose=1)

# # Print the results
# print("Evaluation on the entire dataset:")
# for metric, score in zip(best_model.metrics_names, scores):
#     print(f"{metric}: {score}")

# # Evaluate on specific classes
# for i, class_name in enumerate(class_names):
#     class_generator = eval_datagen.flow_from_directory(
#         data_dir,
#         target_size=IMG_SIZE,
#         batch_size=BATCH_SIZE,
#         class_mode='categorical',
#         classes=[class_name],
#         shuffle=False
#     )
#     scores = best_model.evaluate(class_generator, verbose=0)
#     print(f"\nEvaluation on {class_name} class:")
#     for metric, score in zip(best_model.metrics_names, scores):
#         print(f"{metric}: {score}")