In [34]:
from mpl_toolkits.mplot3d import Axes3D
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import os
import pandas as pd

In [35]:
meta_df = pd.read_csv('./data/archive/HAM10000_metadata.csv', delimiter=',')

meta_df.head()

Unnamed: 0,lesion_id,image_id,dx,dx_type,age,sex,localization
0,HAM_0000118,ISIC_0027419,bkl,histo,80.0,male,scalp
1,HAM_0000118,ISIC_0025030,bkl,histo,80.0,male,scalp
2,HAM_0002730,ISIC_0026769,bkl,histo,80.0,male,scalp
3,HAM_0002730,ISIC_0025661,bkl,histo,80.0,male,scalp
4,HAM_0001466,ISIC_0031633,bkl,histo,75.0,male,ear


In [36]:
data_df = pd.read_csv('./data/archive/hmnist_28_28_RGB.csv', delimiter=',')

In [37]:
data_df = pd.read_csv('./data/archive/hmnist_28_28_RGB.csv')

X = data_df.drop('label', axis=1)
y = data_df['label']

In [29]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [30]:
X_train = X_train.values
X_test = X_test.values

# Reshape the data
img_size = int(np.sqrt(X_train.shape[1] // 3))  # Assuming square RGB images
X_train = X_train.reshape(-1, img_size, img_size, 3)
X_test = X_test.reshape(-1, img_size, img_size, 3)

total_pixels = X_train.shape[1]
img_size = np.sqrt(total_pixels // 3)

# Normalize the data
X_train = X_train.astype('float32') / 255.0
X_test = X_test.astype('float32') / 255.0

In [31]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.utils.class_weight import compute_class_weight
import numpy as np

datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.1,
    height_shift_range=0.1,
    horizontal_flip=True,
    vertical_flip=True,
    zoom_range=0.1
)

class_weights = compute_class_weight(
    class_weight='balanced',
    classes=np.unique(y_train),
    y=y_train
)
class_weight_dict = dict(enumerate(class_weights))

In [33]:
import tensorflow as tf
from tensorflow.keras import layers, models, optimizers
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
import numpy as np

# Squeeze and Excitation block
def squeeze_excite_block(input_tensor, ratio=16):
    channels = input_tensor.shape[-1]
    se = layers.GlobalAveragePooling2D()(input_tensor)
    se = layers.Reshape((1, 1, channels))(se)
    se = layers.Dense(channels // ratio, activation='relu', kernel_initializer='he_normal', use_bias=False)(se)
    se = layers.Dense(channels, activation='sigmoid', kernel_initializer='he_normal', use_bias=False)(se)
    return layers.multiply([input_tensor, se])

# SE-ResNeXt block
def se_resnext_block(input_tensor, filters, cardinality=32):
    residual = input_tensor

    x = layers.Conv2D(filters, kernel_size=1, use_bias=False)(input_tensor)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)

    x = layers.Conv2D(filters, kernel_size=3, padding='same', groups=cardinality, use_bias=False)(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)

    x = layers.Conv2D(filters * 2, kernel_size=1, use_bias=False)(x)
    x = layers.BatchNormalization()(x)

    x = squeeze_excite_block(x)

    if input_tensor.shape[-1] != filters * 2:
        residual = layers.Conv2D(filters * 2, kernel_size=1, strides=1, use_bias=False)(input_tensor)
        residual = layers.BatchNormalization()(residual)

    x = layers.add([x, residual])
    x = layers.Activation('relu')(x)

    return x

# SE-ResNeXt-50 model
def se_resnext50(input_shape, num_classes):
    inputs = layers.Input(shape=input_shape)

    x = layers.Conv2D(64, kernel_size=7, strides=2, padding='same', use_bias=False)(inputs)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)
    x = layers.MaxPooling2D(pool_size=3, strides=2, padding='same')(x)

    x = se_resnext_block(x, 64)
    x = se_resnext_block(x, 64)
    x = se_resnext_block(x, 64)

    x = se_resnext_block(x, 128)
    x = se_resnext_block(x, 128)
    x = se_resnext_block(x, 128)
    x = se_resnext_block(x, 128)

    x = se_resnext_block(x, 256)
    x = se_resnext_block(x, 256)
    x = se_resnext_block(x, 256)
    x = se_resnext_block(x, 256)
    x = se_resnext_block(x, 256)
    x = se_resnext_block(x, 256)

    x = se_resnext_block(x, 512)
    x = se_resnext_block(x, 512)
    x = se_resnext_block(x, 512)

    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dense(num_classes, activation='softmax')(x)

    model = models.Model(inputs, x)
    return model

# Create and compile the model
input_shape = X_train.shape[1:]  # (height, width, channels)
num_classes = len(np.unique(y))
model = se_resnext50(input_shape, num_classes)

model.compile(
    optimizer=optimizers.Adam(learning_rate=0.001),
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

# Train the model
history = model.fit(
    datagen.flow(X_train, y_train, batch_size=32),
    steps_per_epoch=len(X_train) // 32,
    epochs=50,
    validation_data=(X_test, y_test),
    class_weight=class_weight_dict,
    learning_rate=0.001,
    callbacks=[
        tf.keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True),
        tf.keras.callbacks.ReduceLROnPlateau(factor=0.5, patience=5)
    ]
)

# Evaluate the model
y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)

print(classification_report(y_test, y_pred_classes))
print(confusion_matrix(y_test, y_pred_classes))

# Save the model
model.save('se_resnext50_melanoma_model.h5')

Epoch 1/50


  self._warn_if_super_not_called()


[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m875s[0m 3s/step - accuracy: 0.3041 - loss: 2.5316 - val_accuracy: 0.2706 - val_loss: 1.9016 - learning_rate: 0.0010
Epoch 2/50
[1m  1/250[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m13:47[0m 3s/step - accuracy: 0.3438 - loss: 0.9675

2024-09-07 20:59:42.289767: I tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
  self.gen.throw(typ, value, traceback)


[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 28ms/step - accuracy: 0.3438 - loss: 0.9675 - val_accuracy: 0.3015 - val_loss: 1.8998 - learning_rate: 0.0010
Epoch 3/50
[1m146/250[0m [32m━━━━━━━━━━━[0m[37m━━━━━━━━━[0m [1m11:09[0m 6s/step - accuracy: 0.4029 - loss: 1.8777