In [124]:
import keras
import tensorflow
import h5py
import numpy as np
import os
import pandas as pd
import shutil
import tensorflow as tf
from keras.api.models import Sequential, Model
from keras.api.layers import Dense, Conv2D, Flatten, MaxPooling2D, Dropout, BatchNormalization, LSTM, Input, GlobalAveragePooling2D
from keras.api.optimizers import Adam
from keras import regularizers
from tensorflow.keras.callbacks import ReduceLROnPlateau
from tensorflow.keras.callbacks import ModelCheckpoint

In [125]:
meta = pd.read_csv("Ham10k Original/HAM10000_metadata.csv")
meta = meta[meta['dx'].isin(['nv', 'mel', 'bkl'])]

label_map = {
    'bkl': 'Benign keratosis-like lesions',
    'nv': 'Melanocytic nevi',
    'mel': 'Melanoma',
}

meta['diagnosis_full'] = meta['dx'].map(label_map)

print(meta)
print(meta['dx'].value_counts())
print(meta['diagnosis_full'].value_counts())

         lesion_id      image_id   dx    dx_type   age     sex localization  \
0      HAM_0000118  ISIC_0027419  bkl      histo  80.0    male        scalp   
1      HAM_0000118  ISIC_0025030  bkl      histo  80.0    male        scalp   
2      HAM_0002730  ISIC_0026769  bkl      histo  80.0    male        scalp   
3      HAM_0002730  ISIC_0025661  bkl      histo  80.0    male        scalp   
4      HAM_0001466  ISIC_0031633  bkl      histo  75.0    male          ear   
...            ...           ...  ...        ...   ...     ...          ...   
9683   HAM_0000102  ISIC_0031547   nv  consensus  20.0    male         back   
9684   HAM_0000102  ISIC_0032221   nv  consensus  20.0    male         back   
9685   HAM_0005314  ISIC_0030693   nv  consensus  40.0    male         neck   
9686   HAM_0003322  ISIC_0031649   nv  consensus  50.0  female         face   
10014  HAM_0003521  ISIC_0032258  mel      histo  70.0  female         back   

                      diagnosis_full  
0      Benig

In [126]:
#shuffle 6k data and only save 1
#get the nevi
nevi_dataset = meta[meta['dx'].isin(['nv'])]
nevi_dataset = nevi_dataset.sample(n=1100, random_state=42)
#get the mel
mel_dataset = meta[meta['dx'].isin(['mel'])]
#get the bkl
bkl_dataset = meta[meta['dx'].isin(['bkl'])]
model_dataset1 = pd.concat([nevi_dataset, mel_dataset, bkl_dataset], ignore_index=True)
model_dataset1 = model_dataset1.sample(frac=1, random_state=42)

model_dataset2 = model_dataset1.copy(deep=True)
print(model_dataset1)


        lesion_id      image_id   dx    dx_type   age     sex  \
52    HAM_0000972  ISIC_0031605   nv  follow_up  60.0    male   
680   HAM_0000528  ISIC_0028782   nv  follow_up  45.0    male   
1376  HAM_0000179  ISIC_0033700  mel      histo  45.0    male   
1237  HAM_0001202  ISIC_0031517  mel      histo  70.0  female   
203   HAM_0007475  ISIC_0034207   nv      histo  50.0  female   
...           ...           ...  ...        ...   ...     ...   
1095  HAM_0003451  ISIC_0027298   nv      histo  70.0    male   
1130  HAM_0005557  ISIC_0028215  mel      histo  75.0  female   
1294  HAM_0005642  ISIC_0027659  mel      histo  70.0    male   
860   HAM_0003025  ISIC_0027440   nv  follow_up  35.0  female   
3174  HAM_0007427  ISIC_0033660  bkl  consensus  60.0  female   

         localization                 diagnosis_full  
52              chest               Melanocytic nevi  
680   lower extremity               Melanocytic nevi  
1376             back                       Melanoma  

In [127]:
# Get original image path
def get_original_image_path(image_id):
    return f"Ham10k Original/HAM10000 All Image/{image_id}.jpg" # Original

# Get preprocessed image path
# def get_preprocessed_image_path(image_id):
#     return f"Preprocessed/{image_id}.jpg" # Segmented

# Apply separately
model_dataset1['image_path'] = model_dataset1['image_id'].apply(get_original_image_path) # Original
# model_dataset2['image_path'] = model_dataset2['image_id'].apply(get_preprocessed_image_path) # Segmented


In [128]:

print(model_dataset1['image_path'].values[0])
# print(model_dataset2['image_path'].values[0])

Ham10k Original/HAM10000 All Image/ISIC_0031605.jpg


In [129]:
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
model_dataset1['encoded_label'] = le.fit_transform(model_dataset1['diagnosis_full'])
# model_dataset2['encoded_label'] = le.fit_transform(model_dataset2['diagnosis_full'])
print(model_dataset1)


        lesion_id      image_id   dx    dx_type   age     sex  \
52    HAM_0000972  ISIC_0031605   nv  follow_up  60.0    male   
680   HAM_0000528  ISIC_0028782   nv  follow_up  45.0    male   
1376  HAM_0000179  ISIC_0033700  mel      histo  45.0    male   
1237  HAM_0001202  ISIC_0031517  mel      histo  70.0  female   
203   HAM_0007475  ISIC_0034207   nv      histo  50.0  female   
...           ...           ...  ...        ...   ...     ...   
1095  HAM_0003451  ISIC_0027298   nv      histo  70.0    male   
1130  HAM_0005557  ISIC_0028215  mel      histo  75.0  female   
1294  HAM_0005642  ISIC_0027659  mel      histo  70.0    male   
860   HAM_0003025  ISIC_0027440   nv  follow_up  35.0  female   
3174  HAM_0007427  ISIC_0033660  bkl  consensus  60.0  female   

         localization                 diagnosis_full  \
52              chest               Melanocytic nevi   
680   lower extremity               Melanocytic nevi   
1376             back                       Melanom

In [None]:
# combined_df = pd.concat([model_dataset1, model_dataset2], ignore_index=True)
# combined_df.head()

In [None]:
from sklearn.model_selection import train_test_split

train_df, test_df = train_test_split(
    combined_df,
    test_size=0.2,
    stratify=combined_df['encoded_label'],
    random_state=42
)
train_df['encoded_label'] = train_df['encoded_label'].astype(str)
test_df['encoded_label'] = test_df['encoded_label'].astype(str)

print(train_df)
print(test_df)

In [None]:
# print(train_df)
# print(test_df)

# test_orig_folder = "downsampled_data/test"
# train_orig_folder = "downsampled_data/train"

# test_pre_folder = "downsampled_data/test2"
# train_pre_folder = "downsampled_data/train2"

# os.makedirs(test_orig_folder, exist_ok=True)
# os.makedirs(train_orig_folder, exist_ok=True)
# os.makedirs(test_pre_folder, exist_ok=True)
# os.makedirs(train_pre_folder, exist_ok=True)


# for path in train_df['original_image_path']:
#     shutil.copy(path, test_orig_folder)

# for path in test_df['original_image_path']:
#     shutil.copy(path, train_orig_folder)
    
# for path in train_df['preprocessed_image_path']:
#     shutil.copy(path, test_pre_folder)

# for path in test_df['preprocessed_image_path']:
#     shutil.copy(path, train_pre_folder)


In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

train_datagen = ImageDataGenerator(
     rescale=1./255,
     rotation_range=20,
     width_shift_range=0.1,
     height_shift_range=0.1,
     zoom_range=0.2,
     horizontal_flip=True
 )

test_datagen = ImageDataGenerator(rescale=1./255)


In [None]:
train_generator = train_datagen.flow_from_dataframe(
    train_df,
    x_col='image_path',
    y_col='encoded_label',
    target_size=(224, 224),
    class_mode='sparse',
    batch_size=32,
    shuffle=True
)

test_generator = test_datagen.flow_from_dataframe(
    test_df,
    x_col='image_path',
    y_col='encoded_label',
    target_size=(224, 224),
    class_mode='sparse',
    batch_size=32,
    shuffle=False
)

In [None]:
# from sklearn.utils.class_weight import compute_class_weight
# import numpy as np

# class_weights = compute_class_weight(
#     class_weight='balanced',
#     classes=np.unique(train_df['encoded_label']),
#     y=train_df['encoded_label']
# )

# class_weights = dict(enumerate(class_weights))

In [None]:
from tensorflow.keras.applications import ResNet50V2
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D

# Load ResNet50v2 pretrained model (with ImageNet weights)
RES_base_model = ResNet50V2(weights='imagenet', include_top=False)

for layer in RES_base_model.layers:
    layer.trainable = False

for layer in RES_base_model.layers[-102:]:
    layer.trainable = True

for i, layer in enumerate(RES_base_model.layers):
    if layer.trainable:
        print(f"Layer {i}: {layer.name}")



In [None]:
# Add custom layers for classification
x = RES_base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(256, activation='relu')(x)
predictions = Dense(3, activation='softmax')(x)

# Create model
RES_model = Model(inputs=RES_base_model.input, outputs=predictions)

for i, layer in enumerate(RES_model.layers):
    if layer.trainable:
        print(f"Layer {i}: {layer.name}")

In [None]:

# Recompile the model after unfreezing
RES_model.compile(optimizer=Adam(learning_rate=0.0001), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [None]:

reduce_lr = ReduceLROnPlateau(
    monitor='val_loss',  # Monitor the validation loss
    factor=0.2,  # Reduce the learning rate by this factor
    patience=5,  # Number of epochs to wait before reducing the LR
    min_lr=1e-7,  # Minimum learning rate
    verbose=1  # Print message when learning rate is reduced
)
# Define the ModelCheckpoint callback
checkpoint = ModelCheckpoint(
    'model_epoch_{epoch:02d}_acc_{accuracy:.2f}_loss_{loss:.2f}_valacc_{val_accuracy:.2f}_valloss_{val_loss:.2f}.keras',
    monitor='val_loss',  # Monitor validation loss to save the best model
    save_best_only=True,  # Save only the best model based on validation loss
    save_weights_only=False,  # Save the full model (architecture + weights)
    verbose=1
)
RES_model.fit(
    train_generator,
    validation_data=test_generator,
    epochs=80,
    callbacks=[reduce_lr, checkpoint]
)

In [None]:
from keras.api.models import load_model
RES_model = load_model("model_epoch_22_acc_0.98_loss_0.05_valacc_0.89_valloss_0.39.keras")
results = RES_model.evaluate(test_generator)
print("Test Loss, Test Accuracy:", results[0], results[1])
RES_y_true = test_generator.classes
RES_y_pred = np.argmax(RES_model.predict(test_generator), axis=1)


In [None]:
import numpy as np
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt

# Get true and predicted labels
RES_y_true = test_generator.classes
RES_y_pred_probs = RES_model.predict(test_generator)
RES_y_pred = np.argmax(RES_y_pred_probs, axis=1)

# Print classification report
target_names = list(test_generator.class_indices.keys())  # class labels as strings
print("\nClassification Report:\n")
print(classification_report(RES_y_true, RES_y_pred, target_names=target_names))

# Plot confusion matrix
cm = confusion_matrix(RES_y_true, RES_y_pred)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=target_names)

plt.figure(figsize=(10, 8))
disp.plot(cmap=plt.cm.Blues, xticks_rotation=45)
plt.title("Confusion Matrix")
plt.show()