In [1]:
pip install tensorflow keras numpy pandas matplotlib scikit-learn opencv-python requests




Load Data

In [3]:
import os

# Set Kaggle API environment variable (replace with the actual path to your kaggle.json)
os.environ["KAGGLE_CONFIG_DIR"] = "/path/to/kaggle.json"

# Download the dataset using Kaggle CLI (replace with your dataset identifier)
!kaggle datasets download -d nodoubttome/skin-cancer9-classesisic -p /desired/path

# Unzip the dataset to the specified directory
!unzip /desired/path/skin-cancer9-classesisic.zip -d /desired/path


Dataset URL: https://www.kaggle.com/datasets/nodoubttome/skin-cancer9-classesisic
License(s): other
Downloading skin-cancer9-classesisic.zip to /desired/path
 99% 780M/786M [00:13<00:00, 121MB/s]
100% 786M/786M [00:13<00:00, 62.2MB/s]
Archive:  /desired/path/skin-cancer9-classesisic.zip
  inflating: /desired/path/Skin cancer ISIC The International Skin Imaging Collaboration/Test/actinic keratosis/ISIC_0010512.jpg  
  inflating: /desired/path/Skin cancer ISIC The International Skin Imaging Collaboration/Test/actinic keratosis/ISIC_0010889.jpg  
  inflating: /desired/path/Skin cancer ISIC The International Skin Imaging Collaboration/Test/actinic keratosis/ISIC_0024468.jpg  
  inflating: /desired/path/Skin cancer ISIC The International Skin Imaging Collaboration/Test/actinic keratosis/ISIC_0024470.jpg  
  inflating: /desired/path/Skin cancer ISIC The International Skin Imaging Collaboration/Test/actinic keratosis/ISIC_0024511.jpg  
  inflating: /desired/path/Skin cancer ISIC The Internati

In [6]:
import os
dataset_path = "/desired/path/Skin cancer ISIC The International Skin Imaging Collaboration"
print("Dataset contents:", os.listdir(dataset_path))


Dataset contents: ['Train', 'Test']


In [7]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import classification_report

# Dataset Path Setup
dataset_path = "/desired/path/Skin cancer ISIC The International Skin Imaging Collaboration"
train_dir = os.path.join(dataset_path, 'Train')
test_dir = os.path.join(dataset_path, 'Test')

# Dataset structure check
print("Train directory contents:", os.listdir(train_dir))
print("Test directory contents:", os.listdir(test_dir))


Train directory contents: ['seborrheic keratosis', 'basal cell carcinoma', 'vascular lesion', 'nevus', 'melanoma', 'squamous cell carcinoma', 'pigmented benign keratosis', 'dermatofibroma', 'actinic keratosis']
Test directory contents: ['seborrheic keratosis', 'basal cell carcinoma', 'vascular lesion', 'nevus', 'melanoma', 'squamous cell carcinoma', 'pigmented benign keratosis', 'dermatofibroma', 'actinic keratosis']


Data Preprocessing

In [8]:
# Data Preprocessing: Normalizing pixel values, resizing images, and augmenting training data

IMG_SIZE = 224  # Resize images to 224x224 for ResNet50 compatibility
BATCH_SIZE = 32

# Data Augmentation for Training
train_datagen = ImageDataGenerator(
    rescale=1.0/255,  # Normalize pixel values to [0, 1]
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Data Preprocessing for Validation and Test
val_test_datagen = ImageDataGenerator(rescale=1.0/255)

# Split the dataset into training, validation, and testing sets
train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    class_mode='binary',  # Binary classification (benign vs malignant)
    shuffle=True
)

val_generator = val_test_datagen.flow_from_directory(
    test_dir,  # Here using test data as validation since no validation split is provided
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    class_mode='binary'
)


Found 2239 images belonging to 9 classes.
Found 118 images belonging to 9 classes.


Data Augmentation

In [9]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Data Augmentation for Training Data
train_datagen = ImageDataGenerator(
    rescale=1.0/255,  # Normalize pixel values to [0, 1]
    rotation_range=40,  # Random rotation between -40 and +40 degrees
    width_shift_range=0.2,  # Randomly shift images horizontally
    height_shift_range=0.2,  # Randomly shift images vertically
    shear_range=0.2,  # Randomly apply shear transformations
    zoom_range=0.2,  # Randomly zoom in or out
    horizontal_flip=True,  # Randomly flip images horizontally
    fill_mode='nearest'  # Fill the newly created pixels after transformation
)

# Data Preprocessing for Validation and Test (without augmentation)
val_test_datagen = ImageDataGenerator(rescale=1.0/255)


In [13]:
# Load training data with augmentation
train_generator = train_datagen.flow_from_directory(
    train_dir,  # Path to the training dataset
    target_size=(224, 224),  # Resize images to 224x224 (ResNet50 compatible size)
    batch_size=32,  # Batch size
    class_mode='binary',  # Binary classification (benign vs malignant)
    shuffle=True  # Shuffle the data to prevent model overfitting on ordered data
)

# Load validation/test data (no augmentation)
val_generator = val_test_datagen.flow_from_directory(
    test_dir,  # Path to the testing dataset
    target_size=(224, 224),  # Resize images to 224x224 (ResNet50 compatible size)
    batch_size=32,  # Batch size
    class_mode='binary'  # Binary classification (benign vs malignant)
)


Found 2239 images belonging to 9 classes.
Found 118 images belonging to 9 classes.


Model Development

In [10]:
# Model Development using Transfer Learning (ResNet50)

# Load pre-trained ResNet50 model with weights from ImageNet
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(IMG_SIZE, IMG_SIZE, 3))

# Add custom layers for binary classification
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(128, activation='relu')(x)
predictions = Dense(1, activation='sigmoid')(x)  # Sigmoid activation for binary classification

# Final model
model = Model(inputs=base_model.input, outputs=predictions)

# Freeze the base ResNet50 layers (only train the custom layers)
for layer in base_model.layers:
    layer.trainable = False

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001),
              loss='binary_crossentropy',
              metrics=['accuracy'])


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m94765736/94765736[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step


Train Model

In [11]:
# Train the Model

EPOCHS = 10  # Number of epochs for initial training
history = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=EPOCHS
)


Epoch 1/10


  self._warn_if_super_not_called()


[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m521s[0m 7s/step - accuracy: 0.1572 - loss: -57.4372 - val_accuracy: 0.1356 - val_loss: -263.3884
Epoch 2/10
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m513s[0m 7s/step - accuracy: 0.1631 - loss: -474.6864 - val_accuracy: 0.1356 - val_loss: -891.2144
Epoch 3/10
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m507s[0m 7s/step - accuracy: 0.1787 - loss: -1292.1895 - val_accuracy: 0.1356 - val_loss: -1950.0135
Epoch 4/10
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m538s[0m 7s/step - accuracy: 0.1697 - loss: -2749.8142 - val_accuracy: 0.1356 - val_loss: -3447.3826
Epoch 5/10
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m485s[0m 7s/step - accuracy: 0.1721 - loss: -4688.4604 - val_accuracy: 0.1356 - val_loss: -5388.7798
Epoch 6/10
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m480s[0m 7s/step - accuracy: 0.1800 - loss: -6914.3345 - val_accuracy: 0.1356 - val_loss: -7732.8525
Ep

Fine-Tune Model

In [12]:
# Fine-tune the Model

# Unfreeze the last 10 layers of ResNet50 to fine-tune the model
for layer in base_model.layers[-10:]:  # Fine-tuning the last 10 layers
    layer.trainable = True

# Recompile the model after unfreezing layers
model.compile(optimizer=Adam(learning_rate=1e-5),
              loss='binary_crossentropy',
              metrics=['accuracy'])

# Fine-tune the model for a few more epochs
fine_tune_epochs = 5
total_epochs = EPOCHS + fine_tune_epochs
history_fine = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=total_epochs,
    initial_epoch=EPOCHS
)


Epoch 11/15
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m552s[0m 8s/step - accuracy: 0.1659 - loss: -29141.8242 - val_accuracy: 0.1356 - val_loss: -26829.9102
Epoch 12/15
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m564s[0m 8s/step - accuracy: 0.1745 - loss: -34958.7461 - val_accuracy: 0.1356 - val_loss: -29311.4785
Epoch 13/15
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m537s[0m 8s/step - accuracy: 0.1707 - loss: -37005.5898 - val_accuracy: 0.1356 - val_loss: -29961.0957
Epoch 14/15
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m567s[0m 8s/step - accuracy: 0.1668 - loss: -38632.0977 - val_accuracy: 0.1356 - val_loss: -29818.3945
Epoch 15/15
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m559s[0m 8s/step - accuracy: 0.1787 - loss: -38659.1992 - val_accuracy: 0.1356 - val_loss: -31230.6914


Model Evaluation on test data

In [21]:
# Model Evaluation on Test Data

test_generator = val_test_datagen.flow_from_directory(
    test_dir,
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    class_mode='binary',
    shuffle=False
)

# Evaluate the model on the test dataset
test_loss, test_accuracy = model.evaluate(test_generator)
print(f"Test Accuracy: {test_accuracy:.2f}")


Found 118 images belonging to 9 classes.


  self._warn_if_super_not_called()


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 6s/step - accuracy: 0.2376 - loss: -16476.1582
Test Accuracy: 0.14


Generate Classification Report (Precision, Recall, F1-Score)

In [24]:
# Generate Classification Report (Precision, Recall, F1-Score)

# Make predictions on the test data
Y_pred = model.predict(test_generator)
y_pred = (Y_pred > 0.5).astype(int)  # Convert probabilities to binary predictions

#classification report
print(classification_report(test_generator.classes, y_pred))


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 6s/step
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        16
           1       0.14      1.00      0.24        16
           2       0.00      0.00      0.00        16
           3       0.00      0.00      0.00        16
           4       0.00      0.00      0.00        16
           5       0.00      0.00      0.00        16
           6       0.00      0.00      0.00         3
           7       0.00      0.00      0.00        16
           8       0.00      0.00      0.00         3

    accuracy                           0.14       118
   macro avg       0.02      0.11      0.03       118
weighted avg       0.02      0.14      0.03       118



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
