In [18]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import os
import cv2
from keras.models import Sequential, load_model
from keras.utils.np_utils import to_categorical
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight

In [19]:
# Import missing layers from Keras
from keras.layers import Conv2D, MaxPool2D, BatchNormalization, Flatten, Dense, Dropout

In [20]:
# Constants and Configuration
IMAGE_SIZE = (100, 100)
EPOCHS = 10
BATCH_SIZE = 32

In [21]:
# Separate Classes and Functions

class DataLoader:
    """Handles loading and preprocessing data from disk."""

    def __init__(self, metadata_path, img_dir1, img_dir2):
        self.metadata_path = metadata_path
        self.img_dir1 = img_dir1
        self.img_dir2 = img_dir2
        self.df_skin = None

    def load_metadata(self):
        self.df_skin = pd.read_csv(self.metadata_path)
        lesion_type_dict = {
            'nv': 'Melanocytic nevi',
            'mel': 'Melanoma',
            'bkl': 'Benign keratosis-like lesions ',
            'bcc': 'Basal cell carcinoma',
            'akiec': 'Actinic keratoses',
            'vasc': 'Vascular lesions',
            'df': 'Dermatofibroma'
        }
        lesion_ID_dict = {
            'nv': 0,
            'mel': 1,
            'bkl': 2,
            'bcc': 3,
            'akiec': 4,
            'vasc': 5,
            'df': 6
        }
        self.df_skin['lesion_type'] = self.df_skin['dx'].map(lesion_type_dict)
        self.df_skin['lesion_ID'] = self.df_skin['dx'].map(lesion_ID_dict)
        return self.df_skin

    def produce_new_img(self, img):
        """Produce augmented images by rotating and flipping the original image."""
        return [
            cv2.rotate(img, cv2.ROTATE_90_CLOCKWISE),
            cv2.rotate(img, cv2.ROTATE_90_COUNTERCLOCKWISE),
            cv2.rotate(img, cv2.ROTATE_180),
            cv2.flip(img, 0),
            cv2.flip(img, 1)
        ]

    def load_images(self):
        X, y = [], []
        # Process images from folder 1 and 2
        for folder in [self.img_dir1, self.img_dir2]:
            for file in os.listdir(folder):
                image_path = os.path.join(folder, file)
                img_id = file.replace('.jpg', '')

                img = cv2.imread(image_path)
                img_resized = cv2.resize(img, IMAGE_SIZE)

                X.append(img_resized)
                
                # Fix the conversion to integer using iloc[0]
                label = int(self.df_skin[self.df_skin['image_id'] == img_id].lesion_ID.iloc[0])
                y.append(label)

                # Augment data for minority classes
                if label != 0:
                    augmented_imgs = self.produce_new_img(img_resized)
                    for aug_img in augmented_imgs:
                        X.append(aug_img)
                        y.append(label)
        
        return np.array(X), to_categorical(np.array(y), num_classes=7)


class ModelFactory:
    """Factory class to create model instances."""
    
    @staticmethod
    def create_cnn(input_shape, num_classes):
        model = Sequential([
            Conv2D(filters=96, kernel_size=(11, 11), strides=(4, 4), activation='relu', input_shape=input_shape),
            BatchNormalization(),
            MaxPool2D(pool_size=(3, 3), strides=(2, 2)),
            Conv2D(filters=256, kernel_size=(5, 5), strides=(1, 1), activation='relu', padding="same"),
            BatchNormalization(),
            MaxPool2D(pool_size=(3, 3), strides=(2, 2)),
            Conv2D(filters=384, kernel_size=(3, 3), strides=(1, 1), activation='relu', padding="same"),
            BatchNormalization(),
            Conv2D(filters=384, kernel_size=(1, 1), strides=(1, 1), activation='relu', padding="same"),
            BatchNormalization(),
            Conv2D(filters=256, kernel_size=(1, 1), strides=(1, 1), activation='relu', padding="same"),
            BatchNormalization(),
            MaxPool2D(pool_size=(3, 3), strides=(2, 2)),
            Flatten(),
            Dense(4096, activation='relu'),
            Dropout(0.5),
            Dense(4096, activation='relu'),
            Dropout(0.5),
            Dense(num_classes, activation='softmax')
        ])
        return model


class ModelTrainer:
    """Handles training and evaluation of the model."""
    
    def __init__(self, model, class_weights, epochs, batch_size):
        self.model = model
        self.epochs = epochs
        self.batch_size = batch_size
        self.class_weights = class_weights

    def compile_model(self, learning_rate):
        optimizer = Adam(lr=learning_rate)
        self.model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

    def train(self, X_train, y_train, X_val, y_val):
        datagen = ImageDataGenerator(zoom_range=0.2, horizontal_flip=True, shear_range=0.2)
        datagen.fit(X_train)

        early_stopping = EarlyStopping(patience=10, monitor='val_accuracy')
        model_checkpoint = ModelCheckpoint(filepath='model.h5', save_best_only=True, monitor='val_accuracy', verbose=1)

        history = self.model.fit(
            datagen.flow(X_train, y_train),
            epochs=self.epochs,
            batch_size=self.batch_size,
            shuffle=True,
            validation_data=(X_val, y_val),
            class_weight=self.class_weights,
            callbacks=[early_stopping, model_checkpoint]
        )

        return history

    def evaluate(self, X_test, y_test):
        scores = self.model.evaluate(X_test, y_test, verbose=1)
        print(f"Accuracy: {scores[1] * 100:.2f}%")
        return scores

    def save_model(self, model_path="model.h5"):
        self.model.save(model_path)
        print(f"Model saved to {model_path}")

In [22]:
# Main Execution Code

# Load data
data_loader = DataLoader(
    metadata_path='./kaggle/input/skin-cancer-mnist-ham10000/HAM10000_metadata.csv',
    img_dir1='./kaggle/input/skin-cancer-mnist-ham10000/HAM10000_images_part_1/',
    img_dir2='./kaggle/input/skin-cancer-mnist-ham10000/HAM10000_images_part_2/'
)

df_skin = data_loader.load_metadata()
X, y = data_loader.load_images()

In [23]:
# Train/Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=50, stratify=y)

In [24]:
# Create model
model = ModelFactory.create_cnn(input_shape=(100, 100, 3), num_classes=7)

In [25]:
# Compute class weights for unbalanced data
class_weights = compute_class_weight(class_weight='balanced', classes=np.unique(np.argmax(y, axis=1)), y=np.argmax(y, axis=1))
class_weights_dict = dict(enumerate(class_weights))

In [26]:
# Train model
model_trainer = ModelTrainer(model=model, class_weights=class_weights_dict, epochs=EPOCHS, batch_size=BATCH_SIZE)
model_trainer.compile_model(learning_rate=0.0001)

history = model_trainer.train(X_train=X_train, y_train=y_train, X_val=X_test, y_val=y_test)

  super().__init__(name, **kwargs)


Epoch 1/10


2024-10-03 22:35:41.327093: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]


Epoch 1: val_accuracy improved from -inf to 0.08908, saving model to model.h5
Epoch 2/10
Epoch 2: val_accuracy improved from 0.08908 to 0.27683, saving model to model.h5
Epoch 3/10
Epoch 3: val_accuracy improved from 0.27683 to 0.45181, saving model to model.h5
Epoch 4/10
Epoch 4: val_accuracy improved from 0.45181 to 0.57762, saving model to model.h5
Epoch 5/10
Epoch 5: val_accuracy did not improve from 0.57762
Epoch 6/10
Epoch 6: val_accuracy did not improve from 0.57762
Epoch 7/10
Epoch 7: val_accuracy did not improve from 0.57762
Epoch 8/10
Epoch 8: val_accuracy did not improve from 0.57762
Epoch 9/10
Epoch 9: val_accuracy improved from 0.57762 to 0.61766, saving model to model.h5
Epoch 10/10
Epoch 10: val_accuracy did not improve from 0.61766


In [27]:
# Evaluate model
model_trainer.evaluate(X_test=X_test, y_test=y_test)

Accuracy: 58.75%


[1.0716054439544678, 0.5875442028045654]

In [28]:
# Save model
model_trainer.save_model()

Model saved to model.h5
