<a href="https://colab.research.google.com/github/manoharsingh77/Skin_Cancer_Detection/blob/main/skinCancer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -q kaggle
import os

In [None]:
from google.colab import files
files.upload()

Saving kaggle.json to kaggle.json


{'kaggle.json': b'{"username":"manohar7812","key":"b0738e92ad01093f1e4427bd6334e06c"}'}

In [None]:
!mkdir ~/.kaggle
!mv kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

In [None]:
!kaggle datasets download -d kmader/skin-cancer-mnist-ham10000
!unzip -q skin-cancer-mnist-ham10000.zip -d ./skin_cancer_data

Dataset URL: https://www.kaggle.com/datasets/kmader/skin-cancer-mnist-ham10000
License(s): CC-BY-NC-SA-4.0
Downloading skin-cancer-mnist-ham10000.zip to /content
 98% 5.12G/5.20G [01:37<00:07, 11.0MB/s]
100% 5.20G/5.20G [01:37<00:00, 57.0MB/s]


In [None]:
!unzip -q ham10000.zip -d /content/HAM10000

unzip:  cannot find or open ham10000.zip, ham10000.zip.zip or ham10000.zip.ZIP.


In [None]:
import pandas as pd
import os
import shutil
from sklearn.model_selection import train_test_split

# Path to your dataset
base_path = '/content/skin_cancer_data'

# Load metadata
df = pd.read_csv(os.path.join(base_path, 'HAM10000_metadata.csv'))
print(df.head())

     lesion_id      image_id   dx dx_type   age   sex localization
0  HAM_0000118  ISIC_0027419  bkl   histo  80.0  male        scalp
1  HAM_0000118  ISIC_0025030  bkl   histo  80.0  male        scalp
2  HAM_0002730  ISIC_0026769  bkl   histo  80.0  male        scalp
3  HAM_0002730  ISIC_0025661  bkl   histo  80.0  male        scalp
4  HAM_0001466  ISIC_0031633  bkl   histo  75.0  male          ear


In [None]:
# Source folders
src1 = os.path.join(base_path, 'HAM10000_images_part_1')
src2 = os.path.join(base_path, 'HAM10000_images_part_2')

# Combined folder
all_images = os.path.join(base_path, 'all_images')
os.makedirs(all_images, exist_ok=True)

# Copy both parts into one folder
for src in [src1, src2]:
    for fname in os.listdir(src):
        shutil.copy(os.path.join(src, fname), all_images)

In [None]:
# Folder to store sorted images
sorted_path = os.path.join(base_path, 'sorted')
os.makedirs(sorted_path, exist_ok=True)

for _, row in df.iterrows():
    label = row['dx']
    image_id = row['image_id'] + '.jpg'
    src = os.path.join(all_images, image_id)
    dst_dir = os.path.join(sorted_path, label)
    os.makedirs(dst_dir, exist_ok=True)
    dst = os.path.join(dst_dir, image_id)
    if os.path.exists(src):
        shutil.copy(src, dst)

In [None]:
train_dir = os.path.join(base_path, 'train')
val_dir = os.path.join(base_path, 'val')

for label in os.listdir(sorted_path):
    imgs = os.listdir(os.path.join(sorted_path, label))
    train_imgs, val_imgs = train_test_split(imgs, test_size=0.2, random_state=42)

    os.makedirs(os.path.join(train_dir, label), exist_ok=True)
    os.makedirs(os.path.join(val_dir, label), exist_ok=True)

    for img in train_imgs:
        shutil.copy(os.path.join(sorted_path, label, img), os.path.join(train_dir, label, img))
    for img in val_imgs:
        shutil.copy(os.path.join(sorted_path, label, img), os.path.join(val_dir, label, img))

In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.utils.class_weight import compute_class_weight

# ----------------------
# Config
# ----------------------
BASE_DIR = '/content/skin_cancer_data'
TRAIN_DIR = os.path.join(BASE_DIR, 'train')
VAL_DIR   = os.path.join(BASE_DIR, 'val')

IMG_SIZE = 224
BATCH_SIZE = 32
SEED = 42
INITIAL_EPOCHS = 12
FINE_TUNE_EPOCHS = 8
FINE_TUNE_AT = -40   # unfreeze the last 40 layers of base model (tweakable)

In [None]:
# ----------------------
# Data generators (use EfficientNet preprocessing)
# ----------------------
preproc = tf.keras.applications.efficientnet.preprocess_input

train_datagen = ImageDataGenerator(
    preprocessing_function=preproc,
    rotation_range=25,
    width_shift_range=0.08,
    height_shift_range=0.08,
    shear_range=0.08,
    zoom_range=0.15,
    horizontal_flip=True,
    vertical_flip=True,
    fill_mode='nearest'
)

val_datagen = ImageDataGenerator(preprocessing_function=preproc)

train_gen = train_datagen.flow_from_directory(
    TRAIN_DIR,
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    shuffle=True,
    seed=SEED
)

val_gen = val_datagen.flow_from_directory(
    VAL_DIR,
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    shuffle=False
)

num_classes = train_gen.num_classes
class_indices = train_gen.class_indices
print("Classes:", class_indices)

Found 8010 images belonging to 7 classes.
Found 2005 images belonging to 7 classes.
Classes: {'akiec': 0, 'bcc': 1, 'bkl': 2, 'df': 3, 'mel': 4, 'nv': 5, 'vasc': 6}


In [None]:
# ----------------------
# Compute class weights (to help imbalance)
# ----------------------
y_train_classes = train_gen.classes  # integer labels for each file in the directory iterator
classes_unique = np.unique(y_train_classes)
cw = compute_class_weight(class_weight='balanced', classes=classes_unique, y=y_train_classes)
class_weights = {int(k): float(v) for k, v in zip(classes_unique, cw)}
print("Class weights:", class_weights)

Class weights: {0: 4.384236453201971, 1: 2.78415015641293, 2: 1.3018039980497318, 3: 12.437888198757763, 4: 1.2857142857142858, 5: 0.21332694151486098, 6: 10.126422250316056}


In [None]:
# ----------------------
# Build model (base frozen)
# ----------------------
base_model = EfficientNetB0(weights='imagenet', include_top=False,
                           input_shape=(IMG_SIZE, IMG_SIZE, 3))
base_model.trainable = False

inputs = layers.Input(shape=(IMG_SIZE, IMG_SIZE, 3))
x = base_model(inputs, training=False)
x = layers.GlobalAveragePooling2D()(x)
x = layers.Dropout(0.5)(x)
outputs = layers.Dense(num_classes, activation='softmax')(x)

model = models.Model(inputs, outputs)

model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

model.summary()

Downloading data from https://storage.googleapis.com/keras-applications/efficientnetb0_notop.h5
[1m16705208/16705208[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [None]:
# ----------------------
# Callbacks
# ----------------------
checkpoint_path = 'efficientnetb0_ham10000_best.h5'
callbacks = [
    EarlyStopping(monitor='val_loss', patience=6, restore_best_weights=True, verbose=1),
    ModelCheckpoint(checkpoint_path, monitor='val_accuracy', save_best_only=True, verbose=1),
    ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, min_lr=1e-7, verbose=1)
]

In [None]:
# ----------------------
# Train classifier head
# ----------------------
history1 = model.fit(
    train_gen,
    epochs=INITIAL_EPOCHS,
    validation_data=val_gen,
    class_weight=class_weights,
    callbacks=callbacks
)

  self._warn_if_super_not_called()


Epoch 1/12
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 567ms/step - accuracy: 0.2920 - loss: 1.9858
Epoch 1: val_accuracy improved from -inf to 0.50524, saving model to efficientnetb0_ham10000_best.h5




[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m203s[0m 695ms/step - accuracy: 0.2924 - loss: 1.9848 - val_accuracy: 0.5052 - val_loss: 1.3746 - learning_rate: 0.0010
Epoch 2/12
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 502ms/step - accuracy: 0.4569 - loss: 1.5517
Epoch 2: val_accuracy improved from 0.50524 to 0.56509, saving model to efficientnetb0_ham10000_best.h5




[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m139s[0m 553ms/step - accuracy: 0.4571 - loss: 1.5512 - val_accuracy: 0.5651 - val_loss: 1.1926 - learning_rate: 0.0010
Epoch 3/12
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 502ms/step - accuracy: 0.5232 - loss: 1.3478
Epoch 3: val_accuracy improved from 0.56509 to 0.58703, saving model to efficientnetb0_ham10000_best.h5




[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m139s[0m 553ms/step - accuracy: 0.5233 - loss: 1.3477 - val_accuracy: 0.5870 - val_loss: 1.1600 - learning_rate: 0.0010
Epoch 4/12
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 499ms/step - accuracy: 0.5362 - loss: 1.3102
Epoch 4: val_accuracy did not improve from 0.58703
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m138s[0m 549ms/step - accuracy: 0.5362 - loss: 1.3101 - val_accuracy: 0.5297 - val_loss: 1.2277 - learning_rate: 0.0010
Epoch 5/12
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 500ms/step - accuracy: 0.5475 - loss: 1.1855
Epoch 5: val_accuracy did not improve from 0.58703
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m138s[0m 549ms/step - accuracy: 0.5475 - loss: 1.1856 - val_accuracy: 0.5646 - val_loss: 1.2321 - learning_rate: 0.0010
Epoch 6/12
[1m251/251[0m [32m━━━━━━━━



[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m138s[0m 548ms/step - accuracy: 0.5522 - loss: 1.1446 - val_accuracy: 0.6015 - val_loss: 1.1166 - learning_rate: 5.0000e-04
Epoch 8/12
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 501ms/step - accuracy: 0.5722 - loss: 1.1589
Epoch 8: val_accuracy improved from 0.60150 to 0.60898, saving model to efficientnetb0_ham10000_best.h5




[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m139s[0m 553ms/step - accuracy: 0.5722 - loss: 1.1587 - val_accuracy: 0.6090 - val_loss: 1.0772 - learning_rate: 5.0000e-04
Epoch 9/12
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 498ms/step - accuracy: 0.5758 - loss: 1.1807
Epoch 9: val_accuracy improved from 0.60898 to 0.61496, saving model to efficientnetb0_ham10000_best.h5




[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m138s[0m 548ms/step - accuracy: 0.5758 - loss: 1.1804 - val_accuracy: 0.6150 - val_loss: 1.0600 - learning_rate: 5.0000e-04
Epoch 10/12
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 499ms/step - accuracy: 0.5720 - loss: 1.1228
Epoch 10: val_accuracy did not improve from 0.61496
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m138s[0m 548ms/step - accuracy: 0.5720 - loss: 1.1228 - val_accuracy: 0.5531 - val_loss: 1.1762 - learning_rate: 5.0000e-04
Epoch 11/12
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 501ms/step - accuracy: 0.5741 - loss: 1.1134
Epoch 11: val_accuracy did not improve from 0.61496
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m138s[0m 551ms/step - accuracy: 0.5741 - loss: 1.1135 - val_accuracy: 0.5855 - val_loss: 1.0962 - learning_rate: 5.0000e-04
Epoch 12/12
[1m251/251

In [None]:
# ----------------------
# Fine-tuning: unfreeze last few layers of base model
# ----------------------
# Decide how many layers to unfreeze
if FINE_TUNE_AT < 0:
    # negative index means relative to end; convert to positive cut index
    cutoff = len(base_model.layers) + FINE_TUNE_AT
else:
    cutoff = FINE_TUNE_AT

for i, layer in enumerate(base_model.layers):
    layer.trainable = True if i >= cutoff else False

# recompile with lower LR
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-5),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

print(f"Unfroze layers from index {cutoff} -> {len(base_model.layers)-1}")
model.summary()

Unfroze layers from index 198 -> 237


In [None]:
history2 = model.fit(
    train_gen,
    epochs=INITIAL_EPOCHS + FINE_TUNE_EPOCHS,
    initial_epoch=history1.epoch[-1] + 1 if hasattr(history1, 'epoch') else INITIAL_EPOCHS,
    validation_data=val_gen,
    class_weight=class_weights,
    callbacks=callbacks
)

Epoch 13/20
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 561ms/step - accuracy: 0.2936 - loss: 1.5295
Epoch 13: val_accuracy did not improve from 0.61496
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m196s[0m 658ms/step - accuracy: 0.2936 - loss: 1.5292 - val_accuracy: 0.3571 - val_loss: 1.6636 - learning_rate: 1.0000e-05
Epoch 14/20
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 508ms/step - accuracy: 0.3359 - loss: 1.3623
Epoch 14: val_accuracy did not improve from 0.61496
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m140s[0m 559ms/step - accuracy: 0.3359 - loss: 1.3622 - val_accuracy: 0.3726 - val_loss: 1.5733 - learning_rate: 1.0000e-05
Epoch 15/20
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 502ms/step - accuracy: 0.3882 - loss: 1.2639
Epoch 15: val_accuracy did not improve from 0.61496
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m139s[0m 553ms/step - accuracy: 0.3882 - loss: 1.2