### IMPORTING LIBRARIES

In [9]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import cv2
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from tensorflow.keras.utils import to_categorical
import tensorflow as tf
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Dense, Flatten, Dropout, GlobalAveragePooling2D
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from PIL import Image

### Reading the Data

In [10]:
# Load metadata CSV
df = pd.read_csv('/kaggle/input/skin-cancer-mnist-ham10000/HAM10000_metadata.csv')

# Define image folder paths
img_folder1 = '/kaggle/input/skin-cancer-mnist-ham10000/HAM10000_images_part_1'
img_folder2 = '/kaggle/input/skin-cancer-mnist-ham10000/HAM10000_images_part_2'


In [11]:
def get_image_path(image_id):
    filename = image_id + '.jpg'
    if os.path.exists(os.path.join(img_folder1, filename)):
        return os.path.join(img_folder1, filename)
    elif os.path.exists(os.path.join(img_folder2, filename)):
        return os.path.join(img_folder2, filename)
    else:
        return None

# Apply function to get image paths
df['image_path'] = df['image_id'].apply(get_image_path)

# Check if all images were found
missing_images = df[df['image_path'].isna()]
print(f"Images not found: {len(missing_images)}")

# Filter out rows with missing images
df = df.dropna(subset=['image_path'])


Images not found: 0


In [12]:
df.head()

Unnamed: 0,lesion_id,image_id,dx,dx_type,age,sex,localization,image_path
0,HAM_0000118,ISIC_0027419,bkl,histo,80.0,male,scalp,/kaggle/input/skin-cancer-mnist-ham10000/HAM10...
1,HAM_0000118,ISIC_0025030,bkl,histo,80.0,male,scalp,/kaggle/input/skin-cancer-mnist-ham10000/HAM10...
2,HAM_0002730,ISIC_0026769,bkl,histo,80.0,male,scalp,/kaggle/input/skin-cancer-mnist-ham10000/HAM10...
3,HAM_0002730,ISIC_0025661,bkl,histo,80.0,male,scalp,/kaggle/input/skin-cancer-mnist-ham10000/HAM10...
4,HAM_0001466,ISIC_0031633,bkl,histo,75.0,male,ear,/kaggle/input/skin-cancer-mnist-ham10000/HAM10...


### Preprocessing

In [13]:
# Encode labels
label_encoder = LabelEncoder()
df['label'] = label_encoder.fit_transform(df['dx'])

# Convert labels to strings
df['label'] = df['label'].astype(str)

# Split data into training, validation, and test sets
train_df, temp_df = train_test_split(df, test_size=0.3, stratify=df['label'], random_state=42)
val_df, test_df = train_test_split(temp_df, test_size=0.5, stratify=temp_df['label'], random_state=42)

# Image data generator for data augmentation
train_datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

val_test_datagen = ImageDataGenerator(preprocessing_function=preprocess_input)

# Create data generators
train_generator = train_datagen.flow_from_dataframe(
    train_df,
    x_col='image_path',
    y_col='label',
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical'
)

val_generator = val_test_datagen.flow_from_dataframe(
    val_df,
    x_col='image_path',
    y_col='label',
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical'
)

test_generator = val_test_datagen.flow_from_dataframe(
    test_df,
    x_col='image_path',
    y_col='label',
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical',
    shuffle=False
)

Found 7010 validated image filenames belonging to 7 classes.
Found 1502 validated image filenames belonging to 7 classes.
Found 1503 validated image filenames belonging to 7 classes.


In [14]:
df.head()

Unnamed: 0,lesion_id,image_id,dx,dx_type,age,sex,localization,image_path,label
0,HAM_0000118,ISIC_0027419,bkl,histo,80.0,male,scalp,/kaggle/input/skin-cancer-mnist-ham10000/HAM10...,2
1,HAM_0000118,ISIC_0025030,bkl,histo,80.0,male,scalp,/kaggle/input/skin-cancer-mnist-ham10000/HAM10...,2
2,HAM_0002730,ISIC_0026769,bkl,histo,80.0,male,scalp,/kaggle/input/skin-cancer-mnist-ham10000/HAM10...,2
3,HAM_0002730,ISIC_0025661,bkl,histo,80.0,male,scalp,/kaggle/input/skin-cancer-mnist-ham10000/HAM10...,2
4,HAM_0001466,ISIC_0031633,bkl,histo,75.0,male,ear,/kaggle/input/skin-cancer-mnist-ham10000/HAM10...,2


### Model

In [15]:
# Load the ResNet50 model with pre-trained weights
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

# Freeze the base model
base_model.trainable = False

# Add custom layers on top of the base model
model = Sequential([
    base_model,
    GlobalAveragePooling2D(),
    Dense(1024, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])

# Define callbacks
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=0.00001)

# Train the model
history = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=50,
    callbacks=[early_stopping, reduce_lr]
)

Epoch 1/50


  self._warn_if_super_not_called()


[1m220/220[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m129s[0m 534ms/step - accuracy: 0.6462 - loss: 1.5491 - val_accuracy: 0.7457 - val_loss: 0.6900 - learning_rate: 0.0010
Epoch 2/50
[1m220/220[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m114s[0m 502ms/step - accuracy: 0.7347 - loss: 0.7279 - val_accuracy: 0.7423 - val_loss: 0.6799 - learning_rate: 0.0010
Epoch 3/50
[1m220/220[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m115s[0m 508ms/step - accuracy: 0.7442 - loss: 0.6846 - val_accuracy: 0.7710 - val_loss: 0.6233 - learning_rate: 0.0010
Epoch 4/50
[1m220/220[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m114s[0m 505ms/step - accuracy: 0.7571 - loss: 0.6676 - val_accuracy: 0.7517 - val_loss: 0.6391 - learning_rate: 0.0010
Epoch 5/50
[1m220/220[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m114s[0m 503ms/step - accuracy: 0.7476 - loss: 0.6710 - val_accuracy: 0.7756 - val_loss: 0.5992 - learning_rate: 0.0010
Epoch 6/50
[1m220/220[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m

### Model Evaluation

In [16]:
# Evaluate the model on the validation set
val_loss, val_accuracy = model.evaluate(val_generator)
print(f"Validation Loss: {val_loss}")
print(f"Validation Accuracy: {val_accuracy}")

# Evaluate the model on the test set
test_loss, test_accuracy = model.evaluate(test_generator)
print(f"Test Loss: {test_loss}")
print(f"Test Accuracy: {test_accuracy}")

[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 221ms/step - accuracy: 0.8229 - loss: 0.4924
Validation Loss: 0.49036550521850586
Validation Accuracy: 0.82356858253479
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 274ms/step - accuracy: 0.8225 - loss: 0.5566
Test Loss: 0.5680029988288879
Test Accuracy: 0.8150365948677063
