In [1]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.utils import class_weight
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import GlobalAveragePooling2D, Dropout, Dense
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

In [2]:
# === CONFIG ===
IMG_SIZE = 224
BATCH_SIZE = 32
DATA_DIR = "C:\Skin Classification\HAM10000_images"
CSV_PATH = "C:\Skin Classification\HAM10000_images\HAM10000_metadata.csv"

In [3]:
# === LOAD DATA ===
df = pd.read_csv(CSV_PATH)
df['image_id'] = df['image_id'] + ".jpg"
df['path'] = df['image_id'].apply(lambda x: os.path.join(DATA_DIR, x))

le = LabelEncoder()
df['label'] = le.fit_transform(df['dx'])
num_classes = len(le.classes_)

train_df, val_df = train_test_split(df, test_size=0.2, stratify=df['label'], random_state=42)

In [4]:
df

Unnamed: 0,lesion_id,image_id,dx,dx_type,age,sex,localization,path,label
0,HAM_0000118,ISIC_0027419.jpg,bkl,histo,80.0,male,scalp,C:\Skin Classification\HAM10000_images\ISIC_00...,2
1,HAM_0000118,ISIC_0025030.jpg,bkl,histo,80.0,male,scalp,C:\Skin Classification\HAM10000_images\ISIC_00...,2
2,HAM_0002730,ISIC_0026769.jpg,bkl,histo,80.0,male,scalp,C:\Skin Classification\HAM10000_images\ISIC_00...,2
3,HAM_0002730,ISIC_0025661.jpg,bkl,histo,80.0,male,scalp,C:\Skin Classification\HAM10000_images\ISIC_00...,2
4,HAM_0001466,ISIC_0031633.jpg,bkl,histo,75.0,male,ear,C:\Skin Classification\HAM10000_images\ISIC_00...,2
...,...,...,...,...,...,...,...,...,...
10010,HAM_0002867,ISIC_0033084.jpg,akiec,histo,40.0,male,abdomen,C:\Skin Classification\HAM10000_images\ISIC_00...,0
10011,HAM_0002867,ISIC_0033550.jpg,akiec,histo,40.0,male,abdomen,C:\Skin Classification\HAM10000_images\ISIC_00...,0
10012,HAM_0002867,ISIC_0033536.jpg,akiec,histo,40.0,male,abdomen,C:\Skin Classification\HAM10000_images\ISIC_00...,0
10013,HAM_0000239,ISIC_0032854.jpg,akiec,histo,80.0,male,face,C:\Skin Classification\HAM10000_images\ISIC_00...,0


In [5]:
print(df['dx'].unique())

['bkl' 'nv' 'df' 'mel' 'vasc' 'bcc' 'akiec']


In [6]:
# === IMAGE DATA GENERATOR ===
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=30,
    zoom_range=0.2,
    horizontal_flip=True,
    vertical_flip=True,
    brightness_range=[0.8, 1.2],
    shear_range=0.2
)

val_datagen = ImageDataGenerator(rescale=1./255)

train_gen = train_datagen.flow_from_dataframe(
    train_df,
    x_col='path',
    y_col='label',
    target_size=(IMG_SIZE, IMG_SIZE),
    class_mode='raw',
    batch_size=BATCH_SIZE
)

val_gen = val_datagen.flow_from_dataframe(
    val_df,
    x_col='path',
    y_col='label',
    target_size=(IMG_SIZE, IMG_SIZE),
    class_mode='raw',
    batch_size=BATCH_SIZE
)

Found 8012 validated image filenames.
Found 2003 validated image filenames.


In [7]:
# === MODEL ===
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(IMG_SIZE, IMG_SIZE, 3))
base_model.trainable = True

x = GlobalAveragePooling2D()(base_model.output)
x = Dropout(0.5)(x)
output = Dense(num_classes, activation='softmax')(x)

model = Model(inputs=base_model.input, outputs=output)

In [8]:
# === CLASS WEIGHTS ===
y_train_labels = train_df['label'].values
class_weights = class_weight.compute_class_weight('balanced', classes=np.unique(y_train_labels), y=y_train_labels)
class_weights_dict = dict(enumerate(class_weights))

In [9]:
# === COMPILE ===
model.compile(optimizer=Adam(1e-5), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [None]:
# === TRAIN ===
history = model.fit(
    train_gen,
    validation_data=val_gen,
    epochs=20,
    class_weight=class_weights_dict
)

# === EVALUATE ===
val_loss, val_acc = model.evaluate(val_gen)
print(f"Validation Accuracy: {val_acc*100:.2f}%")

In [None]:
# Evaluate on test set
test_loss, test_acc = model.evaluate(test_gen)
print(f"Test Accuracy: {test_acc * 100:.2f}%")