### Loading data

In [4]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score

# Load and preprocess images
def load_images_from_folder(folder, label):
    images = []
    labels = []
    for filename in os.listdir(folder):
        img_path = os.path.join(folder, filename)
        if os.path.isfile(img_path):
            img = tf.keras.preprocessing.image.load_img(img_path, target_size=(224, 224))
            img_array = tf.keras.preprocessing.image.img_to_array(img)
            images.append(img_array)
            labels.append(label)
    return np.array(images), np.array(labels)

def load_dataset(base_path):
    benign_train, benign_labels_train = load_images_from_folder(os.path.join(base_path, 'train', 'benign'), 'benign')
    malignant_train, malignant_labels_train = load_images_from_folder(os.path.join(base_path, 'train', 'malignant'), 'malignant')
    
    benign_test, benign_labels_test = load_images_from_folder(os.path.join(base_path, 'test', 'benign'), 'benign')
    malignant_test, malignant_labels_test = load_images_from_folder(os.path.join(base_path, 'test', 'malignant'), 'malignant')
    
    X_train = np.concatenate((benign_train, malignant_train), axis=0)
    y_train = np.concatenate((benign_labels_train, malignant_labels_train), axis=0)
    
    X_test = np.concatenate((benign_test, malignant_test), axis=0)
    y_test = np.concatenate((benign_labels_test, malignant_labels_test), axis=0)
    
    return X_train, y_train, X_test, y_test

base_path = '/kaggle/input/melanoma-skin-cancer-dataset-of-10000-images/melanoma_cancer_dataset/'
X_train, y_train, X_test, y_test = load_dataset(base_path)

### Imports and normalisation

In [5]:
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, BatchNormalization, MaxPooling2D, GlobalAveragePooling2D, Dense, Dropout
from tensorflow.keras.optimizers import Adam
import numpy as np

label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_test_encoded = label_encoder.transform(y_test)

X_train_normalized = X_train / 224.0
X_test_normalized = X_test / 224.0

### Declaration of CNN

In [6]:
def create_cnn_model():
    model = Sequential([
        Conv2D(32, (3, 3), padding='same', activation='relu', input_shape=(224, 224, 3)),
        BatchNormalization(),
        MaxPooling2D((2, 2)),
        
        Conv2D(64, (3, 3), padding='same', activation='relu'),
        BatchNormalization(),
        MaxPooling2D((2, 2)),
        
        Conv2D(128, (3, 3), padding='same', activation='relu'),
        BatchNormalization(),
        MaxPooling2D((2, 2)),
        
        Conv2D(256, (3, 3), padding='same', activation='relu'),
        BatchNormalization(),
        MaxPooling2D((2, 2)),
        
        GlobalAveragePooling2D(),
        
        Dense(512, activation='relu'),
        Dropout(0.5),
        Dense(256, activation='relu'),
        Dropout(0.5),
        Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])
    return model

### Cross Validation

In [None]:
kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=1234)
fold_no = 1
accuracies = []

for train_idx, val_idx in kfold.split(X_train_normalized, y_train_encoded):
    print(f"Training fold {fold_no}...")

    X_train_fold, X_val_fold = X_train_normalized[train_idx], X_train_normalized[val_idx]
    y_train_fold, y_val_fold = y_train_encoded[train_idx], y_train_encoded[val_idx]

    model = create_cnn_model()
    model.fit(X_train_fold, y_train_fold, epochs=50, batch_size=32, validation_data=(X_val_fold, y_val_fold), verbose=1)

    y_val_pred = (model.predict(X_val_fold) > 0.5).astype("int32")
    accuracy = accuracy_score(y_val_fold, y_val_pred)
    accuracies.append(accuracy)
    print(f'Fold {fold_no} Accuracy: {accuracy * 100:.2f}%')

    fold_no += 1

average_accuracy = np.mean(accuracies)
print(f'Average Accuracy: {average_accuracy * 100:.2f}%')

Training fold 1...


  super().__init__(


Epoch 1/50
[1m  2/241[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m19s[0m 83ms/step - accuracy: 0.5391 - loss: 0.7771   

I0000 00:00:1716474584.670408     161 device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m241/241[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 97ms/step - accuracy: 0.8072 - loss: 0.4429

W0000 00:00:1716474607.979346     164 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update


[1m241/241[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 121ms/step - accuracy: 0.8074 - loss: 0.4427 - val_accuracy: 0.5206 - val_loss: 1.1820
Epoch 2/50
[1m241/241[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 64ms/step - accuracy: 0.9038 - loss: 0.2440 - val_accuracy: 0.8480 - val_loss: 0.3448
Epoch 11/50
[1m241/241[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 64ms/step - accuracy: 0.9076 - loss: 0.2395 - val_accuracy: 0.8417 - val_loss: 0.3719
Epoch 12/50
[1m241/241[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 64ms/step - accuracy: 0.8992 - loss: 0.2466 - val_accuracy: 0.8662 - val_loss: 0.3101
Epoch 13/50
[1m241/241[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 64ms/step - accuracy: 0.9102 - loss: 0.2291 - val_accuracy: 0.9094 - val_loss: 0.2482
Epoch 14/50
[1m241/241[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 64ms/step - accuracy: 0.9102 - loss: 0.2216 - val_accuracy: 0.9073 - val_loss: 0.2309
Epoch 15/50
[1m241/24

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



[1m215/241[0m [32m━━━━━━━━━━━━━━━━━[0m[37m━━━[0m [1m1s[0m 60ms/step - accuracy: 0.9341 - loss: 0.1729

### Training, classification on validation set

In [None]:
final_model = create_cnn_model()
final_model.fit(X_train_normalized, y_train_encoded, epochs=50, batch_size=32, validation_data=(X_test_normalized, y_test_encoded), verbose=1)

y_test_pred = (final_model.predict(X_test_normalized) > 0.5).astype("int32")
final_accuracy = accuracy_score(y_test_encoded, y_test_pred)
print(f'Test Set Accuracy: {final_accuracy * 100:.2f}%')