In [1]:
#importing libraries

import pandas as pd
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers import Dense, Dropout, Conv2D, Flatten, MaxPooling2D
from keras.optimizers import SGD
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.metrics import accuracy_score, classification_report
import os
import cv2
import matplotlib.pyplot as plt
from keras.utils import to_categorical


In [2]:
#Loading Image Data:

width = 64
height = 64
channels = 3
num_classes = 7
input_shape = (width, height, channels)


In [3]:
metadata = pd.read_csv('/kaggle/input/skin-cancer-mnist-ham10000/HAM10000_metadata.csv')
metadara = metadata.drop(['age','sex','dx_type','localization'],axis = 1)

test_labels_df = pd.read_csv('/kaggle/input/isic-18/ISIC2018_Task3_Test_GroundTruth/ISIC2018_Task3_Test_GroundTruth/ISIC2018_Task3_Test_GroundTruth.csv')

class_dict = {'bkl':0, 'nv': 1,'df':2, 'mel':3, 'vasc':4, 'bcc':5, 'akiec':6 }

label_order = [x.upper() for x in class_dict.keys()]

In [None]:
# temp = np.asarray(test_labels_df[test_labels_df['image'] == image_id][label_order].iloc[0])

# np.asarray(temp[].iloc[0])
# 'BKL', 'NV', 'DF', 'MEL', 'VASC', 'BCC', 'AKIEC'
# .to_dict(orient='records')[0]

In [4]:
train_data_flatten = []
train_output_labels = []
test_data_flatten = []
test_output_labels = []

#Training Data
for dirname, _, filenames in os.walk('/kaggle/input/skin-cancer-mnist-ham10000/ham10000_images_part_1/'):
    for filename in filenames:
        if ".jpg" in filename:
            img_path = os.path.join(dirname, filename)
            img = cv2.imread(img_path, 1)
            img = cv2.resize(img, (width,height))
            img = img.reshape(1, width*height*channels)
            train_data_flatten.append(pd.DataFrame(img, dtype='uint8'))
            img_id = filename.replace('.jpg', '')
            train_output_labels.append(class_dict[metadata[metadata['image_id'] == img_id]['dx'].iloc[0]])

for dirname, _, filenames in os.walk('/kaggle/input/skin-cancer-mnist-ham10000/ham10000_images_part_2/'):
    for filename in filenames:
        if ".jpg" in filename:
            img_path = os.path.join(dirname, filename)
            img = cv2.imread(img_path, 1)
            img = cv2.resize(img, (width,height))
            img = img.reshape(1, width*height*channels)
            train_data_flatten.append(pd.DataFrame(img, dtype='uint8'))
            img_id = filename.replace('.jpg', '')
            train_output_labels.append(class_dict[metadata[metadata['image_id'] == img_id]['dx'].iloc[0]])
        
#Test Data
for dirname, _, filenames in os.walk('/kaggle/input/isic2018-testset/ISIC2018_Task3_Test_Input/'):
    for filename in filenames:
        if ".jpg" in filename:
            img_path = os.path.join(dirname, filename)
            img = cv2.imread(img_path, 1)
            img = cv2.resize(img, (width,height))
            img = img.reshape(1, width*height*channels)
            test_data_flatten.append(pd.DataFrame(img, dtype='uint8'))
            img_id = filename.replace('.jpg', '')
            test_label_1hot = test_labels_df[test_labels_df['image'] == img_id][label_order].iloc[0]
            test_output_labels.append(test_label_1hot.values)

In [5]:
#Train
print(len(train_data_flatten))
train_x = (pd.concat(train_data_flatten, axis=0))/255
print(train_x.shape)

#Test
print(len(test_data_flatten))
test_x = (pd.concat(test_data_flatten, axis=0))/255
print(test_x.shape)


10015
(10015, 12288)
1512
(1512, 12288)


# Data Augmentation

In [6]:
from imblearn.over_sampling import SMOTE
smote = SMOTE()
# X_train_resampled = train_x.copy()
# y_train_resampled = np.asarray(train_output_labels).copy()
# X_train_resampled, y_train_resampled = smote.fit_resample(train_x, np.asarray(train_output_labels))
train_x, train_output_labels = smote.fit_resample(train_x, np.asarray(train_output_labels))

In [None]:
#Splitting the data in train, val and test
y = np.asarray(train_output_labels)
y_test = [np.where(x == 1.0)[0][0] for x in test_output_labels]
x_train, x_val, y_train, y_val = train_test_split(train_x, y, test_size=0.20, random_state=0)

y_train_c = to_categorical(y_train, num_classes)
y_val_c = to_categorical(y_val, num_classes)
y_test_c = np.asarray(test_output_labels)

In [None]:
#Reshaping the X data to an array(required by Keras)
x_train = x_train.values.reshape(int(len(x_train)),width,height,channels)
x_val = x_val.values.reshape(int(len(x_val)),width,height,channels)
x_test = test_x.values.reshape(int(len(test_x)),width,height,channels)

# **VGG Based CNN**

In [None]:
#Creating the CNN based on VGGNet

cnn_model = Sequential()
cnn_model.add(Conv2D(filters=64, kernel_size=3, activation='relu', input_shape=(width,height,channels)))
cnn_model.add(Conv2D(filters=64, kernel_size=3, activation='relu'))
cnn_model.add(MaxPooling2D(pool_size=(2,2), strides=None, padding='same'))
cnn_model.add(Conv2D(filters=128, kernel_size=3, activation='relu'))
cnn_model.add(Conv2D(filters=128, kernel_size=3, activation='relu'))
cnn_model.add(MaxPooling2D(pool_size=(2,2), strides=None, padding='same'))
cnn_model.add(Flatten())
cnn_model.add(Dense(1024, kernel_initializer='glorot_uniform', activation='relu'))
cnn_model.add(Dropout(0.5))
cnn_model.add(Dense(1024, kernel_initializer='glorot_uniform', activation='relu'))
cnn_model.add(Dropout(0.5))
cnn_model.add(Dense(7, kernel_initializer='glorot_uniform', activation='softmax'))

In [None]:
#Setting up the same optimizer configuration
opt = SGD(learning_rate=0.01, momentum=0.9)

In [None]:
#Compiling and training the model

cnn_model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])
history = cnn_model.fit(x_train, np.asarray(y_train_c), batch_size=256, epochs=74, validation_data=(x_val,y_val_c), verbose=1)

In [None]:
fig, ax = plt.subplots()

ax.plot(history.history['loss'], label='Training Loss')
ax.plot(history.history['val_loss'], label='Validation Loss')

ax.set_title('Loss over Epochs')
ax.set_xlabel('Epoch')
ax.set_ylabel('Loss')

ax.legend()
plt.show()

In [None]:
cnn_pred = cnn_model.predict(x_test)

In [None]:
cnn_final_pred = []

for prob in cnn_pred:
    max = -1
    max_index = -1
    for index, value in enumerate(prob):
        if value > max :
            max_index = index
            max = value
    cnn_final_pred.append(max_index)

cnn_final_pred = np.asarray(cnn_final_pred)

In [None]:
accuracy_score(y_test, cnn_final_pred)

In [None]:
print(classification_report(y_test, cnn_final_pred, output_dict = False))

# RESNET50

In [None]:
import tensorflow as tf
from tensorflow.keras.applications import ResNet50, vgg19
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [None]:
# Set up data generators for training and validation sets
# train_datagen = ImageDataGenerator(rescale=1./255, validation_split=0.2)
# train_generator = train_datagen.flow_from_directory(
#     directory='/kaggle/input/skin-cancer-mnist-ham10000/',
#     target_size=input_shape[:2],
#     batch_size=32,
#     class_mode='categorical',
#     subset='training'
# )
# val_generator = train_datagen.flow_from_directory(
#     directory='path/to/train/dataset',
#     target_size=input_shape[:2],
#     batch_size=32,
#     class_mode='categorical',
#     subset='validation'
# )

In [None]:
# Load the pre-trained ResNet50 model
base_model = ResNet50(weights='/kaggle/input/resnet-package/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5', 
                      include_top=False, 
                      input_shape=input_shape)

# Freeze the layers in the base model
for layer in base_model.layers:
    layer.trainable = False

# Add a new classifier layer on top of the base model
# FLatten, Dense , Dropout, Maxpooling
x = Flatten()(base_model.output)
x = Dense(512, activation='relu')(x)
x = Dense(num_classes, activation='softmax')(x)
model = Model(inputs=base_model.input, outputs=x)

# Compile the model with Adam optimizer and categorical crossentropy loss
model.compile(optimizer=Adam(lr=0.01), loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model on the data generators
model.fit(
    x_train,
    np.asarray(y_train_c),
    validation_data=(x_val,np.asarray(y_val_c)),
    epochs=74
)

# Save the trained model
# model.save('resnet50_skin_cancer.h5')

In [None]:
pred = model.predict(x_test)

In [None]:
final_pred = []

for prob in pred:
    max = -1
    max_index = -1
    for index, value in enumerate(prob):
        if value > max :
            max_index = index
            max = value
    final_pred.append(max_index)

final_pred = np.asarray(final_pred)

In [None]:
accuracy_score(y_test, final_pred)

In [None]:
print(classification_report(y_test, final_pred, output_dict = False))

# **VGG19**

In [None]:
# Load the pre-trained VGG19 model
vgg_base_model = vgg19.VGG19(weights='/kaggle/input/vgg19/vgg19_weights_tf_dim_ordering_tf_kernels_notop.h5', 
                      include_top=False, 
                      input_shape=input_shape)

# Freeze the layers in the base model
for layer in vgg_base_model.layers:
    layer.trainable = False

# Add a new classifier layer on top of the base model
x = Flatten()(vgg_base_model.output)
x = Dense(512, activation='relu')(x)
x = Dense(num_classes, activation='softmax')(x)
vgg_model = Model(inputs=vgg_base_model.input, outputs=x)

# Compile the model with Adam optimizer and categorical crossentropy loss
vgg_model.compile(optimizer=Adam(lr=0.01), loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model on the data generators
vgg_model.fit(
    x_train,
    np.asarray(y_train_c),
    validation_data=(x_val,np.asarray(y_val_c)),
    epochs=74
)

# Save the trained model
# model.save('resnet50_skin_cancer.h5')

In [None]:
vgg_pred = vgg_model.predict(x_test)

In [None]:
vgg_final_pred = []

for prob in vgg_pred:
    max = -1
    max_index = -1
    for index, value in enumerate(prob):
        if value > max :
            max_index = index
            max = value
    vgg_final_pred.append(max_index)

vgg_final_pred = np.asarray(vgg_final_pred)

In [None]:
accuracy_score(y_test, vgg_final_pred)

In [None]:
print(classification_report(y_test, vgg_final_pred, output_dict = False))

# **ENSEMBLING**

In [None]:
vgg_m_pred = cnn_model.predict(x_test)

ensemble_final_pred = []

total_pred = cnn_pred + vgg_pred + vgg_m_pred

for prob in total_pred:
    max = -1
    max_index = -1
    for index, value in enumerate(prob):
        if value > max :
            max_index = index
            max = value
    ensemble_final_pred.append(max_index)

ensemble_final_pred = np.asarray(ensemble_final_pred)
print(accuracy_score(y_test, ensemble_final_pred))
print("\n\n")
print(classification_report(y_test, ensemble_final_pred, output_dict = False))

# **VGG with Training multiple Layers**

In [None]:
# Load the pre-trained ResNet50 model
vgg_m_base_model = vgg19.VGG19(weights='/kaggle/input/vgg19/vgg19_weights_tf_dim_ordering_tf_kernels_notop.h5', 
                      include_top=False, 
                      input_shape=input_shape)

# Freeze the layers in the base model
for layer in vgg_m_base_model.layers:
    layer.trainable = True

# Add a new classifier layer on top of the base model
x = Flatten()(vgg_m_base_model.output)
x = Dense(512, activation='relu')(x)
x = Dense(num_classes, activation='softmax')(x)
vgg_m_model = Model(inputs=vgg_m_base_model.input, outputs=x)

# Compile the model with Adam optimizer and categorical crossentropy loss
vgg_m_model.compile(optimizer=Adam(lr=0.01), loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model on the data generators
vgg_m_model.fit(
    x_train,
    np.asarray(y_train_c),
    validation_data=(x_val,np.asarray(y_val_c)),
    epochs=25
)

# Save the trained model
# model.save('resnet50_skin_cancer.h5')

In [None]:
vgg_m_pred = cnn_model.predict(x_test)

vgg_m_final_pred = []

for prob in vgg_m_pred:
    max = -1
    max_index = -1
    for index, value in enumerate(prob):
        if value > max :
            max_index = index
            max = value
    vgg_m_final_pred.append(max_index)

vgg_m_final_pred = np.asarray(vgg_m_final_pred)
print(accuracy_score(y_test, vgg_m_final_pred))
print("\n\n")
print(classification_report(y_test, vgg_m_final_pred, output_dict = False))