In [None]:
%pip install --upgrade tensorflow keras scikit-learn opencv-python matplotlib keras_tuner keras_cv numpy seaborn

In [2]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'  # Suppress TensorFlow INFO and WARNING messages

In [None]:
import tensorflow as tf
import keras

print(tf.__version__)
print(f"GPU Available: {tf.config.list_physical_devices('GPU')}")

In [4]:
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
    except RuntimeError as e:
        print(e)


In [5]:
from tensorflow.keras import mixed_precision

mixed_precision.set_global_policy('mixed_float16')

In [None]:
import tensorflow as tf
import keras
import pandas as pd
import os
import keras_cv
import matplotlib.pyplot as plt

In [None]:
csv_file = 'ham10000_data/HAM10000_metadata.csv'
img_dir = 'ham10000_data/images'
file_ext = '.jpg'

df = pd.read_csv(csv_file)

image_paths = [
    os.path.join(img_dir, image_id + file_ext)
    for image_id in df['image_id']
]

print(f"Number of images: {len(image_paths)}")
print(f"First 5 image paths: {image_paths[:5]}")

classes = sorted(df['dx'].unique())
num_classes = len(classes)

print(f"Number of classes: {num_classes}")
print(f"Classes: {classes}")

labels = df['dx'].map(lambda x: classes.index(x)).values
class_to_idx = {cls: idx for idx, cls in enumerate(classes)}

print(f"Class to Index Mapping: {class_to_idx}")
print(f"Labels: {labels}")

In [None]:
from IPython.display import display

display(df.head())

In [None]:
df.isnull().sum()

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(10, 6))
value_counts = df['dx'].value_counts()

plt.bar(value_counts.index, value_counts.values)
plt.title('Distribution of Lesion Categories')
plt.xlabel('Lesion Category')
plt.ylabel('Frequency')
plt.xticks(rotation=45, ha='right')
plt.show()

In [11]:
# import matplotlib.pyplot as plt
# import random
# import tensorflow as tf

# n_samples_per_class = 5

# plt.figure(figsize=(15, len(classes) * 3))

# for class_index, class_name in enumerate(classes):
#     class_indices = [i for i, label in enumerate(labels) if label == class_index]

#     random_class_indices = random.sample(class_indices, min(n_samples_per_class, len(class_indices)))

#     for i, idx in enumerate(random_class_indices):
#         img_path = image_paths[idx]
#         label = labels[idx]

#         img = tf.io.read_file(img_path)
#         img = tf.image.decode_jpeg(img, channels=3)

#         # Display image
#         plt.subplot(len(classes), n_samples_per_class, class_index * n_samples_per_class + i + 1)
#         plt.imshow(img)
#         plt.title(f"Class: {class_name}")
#         plt.axis('off')

# plt.tight_layout()
# plt.show()


In [12]:
def load_and_preprocess_image(image_id, label):
    # Load and preprocess the image
    image = tf.io.read_file(image_id)
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.image.resize(image, (224, 224))  # ResNet expects 224x224 images
    image = tf.keras.applications.resnet50.preprocess_input(image)  # Preprocessing for ResNet

    return image, label


In [None]:
from sklearn.model_selection import StratifiedShuffleSplit

train_paths = []
train_labels = []
val_paths = []
val_labels = []

sss = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=42)
for train_index, val_index in sss.split(image_paths, labels):
    train_paths = [image_paths[i] for i in train_index]
    train_labels = labels[train_index]
    val_paths = [image_paths[i] for i in val_index]
    val_labels = labels[val_index]

print(f"Training set size: {len(train_paths)}")
print(f"Validation set size: {len(val_paths)}")

train_dataset = tf.data.Dataset.from_tensor_slices((train_paths, train_labels))
val_dataset = tf.data.Dataset.from_tensor_slices((val_paths, val_labels))

In [14]:
batch_size = 4

train_dataset = train_dataset.map(load_and_preprocess_image, num_parallel_calls=tf.data.AUTOTUNE)
train_dataset = train_dataset.shuffle(buffer_size=1000).batch(batch_size).prefetch(tf.data.AUTOTUNE)
train_dataset = train_dataset.cache()

val_dataset = val_dataset.map(load_and_preprocess_image, num_parallel_calls=tf.data.AUTOTUNE)
val_dataset = val_dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE)
val_dataset = val_dataset.cache()


In [15]:
data_augmentation = keras.Sequential([
    keras.layers.RandomFlip('horizontal'),
    keras.layers.RandomRotation(0.1),
    keras.layers.RandomZoom(0.1),
    keras.layers.RandomContrast(0.1),
    keras.layers.RandomTranslation(0.1, 0.1),
])

In [16]:
import numpy as np

class_counts = np.bincount(train_labels)
total_counts = np.sum(class_counts)
class_prior = class_counts / total_counts


def build_model(hp):

    strategy = tf.distribute.MirroredStrategy()

    with strategy.scope():
        base_model = keras.applications.resnet.ResNet101(
            include_top=False,
            weights='imagenet',
            input_shape=(224, 224, 3),
            pooling=None
        )

        base_model.trainable = False

        inputs = keras.Input(shape=(224, 224, 3))
        # x = data_augmentation(inputs)
        x = base_model(inputs, training=False)
        x = keras.layers.GlobalAveragePooling2D()(x)

        # # Tune Dropout Rate
        # dropout_rate = hp.Float('dropout_rate', min_value=0.0, max_value=0.7, step=0.1)
        # if dropout_rate > 0.0:
        #     x = keras.layers.Dropout(dropout_rate)(x)

        output_bias = np.log(class_prior)

        # Output Layer
        outputs = keras.layers.Dense(
            num_classes,
            activation='softmax',
            bias_initializer=keras.initializers.Constant(output_bias),
            # kernel_regularizer=keras.regularizers.l2(
            #     hp.Float('l2_regularization', min_value=0.0, max_value=0.1, step=0.01)
            # )
        )(x)

        model = keras.Model(inputs, outputs)

        learning_rate = hp.Choice('learning_rate', values=[1e-3, 1e-4, 1e-5, 1e-6])

        optimizer = keras.optimizers.AdamW(
            learning_rate=learning_rate,
            weight_decay=hp.Float('weight_decay', min_value=0.0, max_value=0.01, step=0.001)
        )

        # Conditionally select loss function
        # loss_choice = hp.Choice('loss_function', values=['sparse_categorical_crossentropy', 'focal_loss'])

        # if loss_choice == 'focal_loss':
        #     loss = CategoricalFocalLoss(alpha=0.25, gamma=2)
        #     class_weights = None
        # else:
        #     loss = 'sparse_categorical_crossentropy'
        #

        model.compile(
            optimizer=optimizer,
            loss='sparse_categorical_crossentropy',
            metrics=['accuracy']
        )

        return model


In [None]:
from keras_tuner import RandomSearch

tuner = RandomSearch(
    build_model,
    objective='val_accuracy',
    max_trials=20,
    executions_per_trial=1,
    directory='random_search',
    project_name='hyperparameter_tuning-resnet101'
)

tuner.search_space_summary()


In [None]:
tuner.search(
    train_dataset,
    validation_data=val_dataset,
    epochs=10,
)

In [None]:
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]

print(f"""
Hyperparameters:
Best Learning Rate: {best_hps.get('learning_rate')}
Best Weight Decay: {best_hps.get('weight_decay')}
""")



In [None]:
from keras.callbacks import ModelCheckpoint

best_model = tuner.hypermodel.build(best_hps)

callbacks = [
    keras.callbacks.EarlyStopping(
        monitor='val_loss',
        patience=10,
        min_delta=1e-4,
        restore_best_weights=True
    ),
    keras.callbacks.ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.1,
        patience=5,
        min_lr=1e-7
    ),
    ModelCheckpoint(
        filepath='models/resnet50-ham10000.keras',
        monitor='val_loss',
        save_best_only=True,
        save_weights_only=False,
        verbose=1
    ),
]

history = best_model.fit(
    train_dataset,
    validation_data=val_dataset,
    epochs=500,
    callbacks=callbacks
)

best_model.save('models/resnet50-ham10000-final.keras')

In [None]:
plt.figure(figsize=(12, 4))

plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

plt.show()

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import classification_report, confusion_matrix
import tensorflow as tf

y_true = []
y_pred = []

def get_predictions(dataset):
    for images, labels in dataset:
        predictions = best_model.predict(images)
        y_true.extend(labels.numpy())
        y_pred.extend(np.argmax(predictions, axis=1))
    return np.array(y_true), np.array(y_pred)

# Get predictions for validation dataset
y_true, y_pred = get_predictions(val_dataset)

In [None]:
# Classification Report

print("Classification Report:")
print(classification_report(y_true, y_pred, target_names=classes))

In [None]:
# Confusion Matrix
cm = confusion_matrix(y_true, y_pred)
plt.figure(figsize=(10, 8))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=classes, yticklabels=classes)
plt.title('Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('True')
plt.show()

In [None]:
# Per-class Accuracy Plot
class_accuracies = cm.diagonal() / cm.sum(axis=1)
plt.figure(figsize=(10, 6))
sns.barplot(x=classes, y=class_accuracies)
plt.title('Per-class Accuracy')
plt.xlabel('Class')
plt.ylabel('Accuracy')
plt.xticks(rotation=45)
plt.show()

In [None]:
# Class distribution in training set
train_labels = np.concatenate([labels.numpy() for _, labels in train_dataset])
train_class_dist = np.bincount(train_labels) / len(train_labels)

plt.figure(figsize=(10, 6))
sns.barplot(x=classes, y=train_class_dist)
plt.title('Class Distribution in Training Set')
plt.xlabel('Class')
plt.ylabel('Proportion')
plt.xticks(rotation=45)
plt.show()