# CNN for skin lesion classification dataset
Implementation of the task to design and evaluate Convolutional Neural Network (CNN) for skin lesion classification task. Provided dataset consisted of 900 skin lesion images with binary classification labels *(malignant/benign).*


In [None]:
import os
from google.colab import drive

drive.mount('/content/gdrive')
root_path = 'gdrive/My Drive/KU MIA/Handin 4'  
os.chdir(root_path)
os.getcwd()

## Preparing train and test sets


### Image data

In [None]:
from PIL import Image
import numpy as np
from matplotlib import pyplot as plt

lesionImg = Image.open("Skin lesion data/TrainData/imagesTrain/ISIC_0000001.jpg") 
plt.imshow(lesionImg)

In [3]:
import os
import glob

TRAIN_MASK_ROOT = "Skin lesion data/TrainData/imagesTrain"
TEST_MASK_ROOT = "Skin lesion data/TrainData/imagesTest"

In [None]:
n_train_cases = len(os.listdir(f"{TRAIN_MASK_ROOT}/"))
n_test_cases = len(os.listdir(f"{TEST_MASK_ROOT}/"))
print(f"Found {n_train_cases} train cases")
print(f"Found {n_test_cases} test cases")

In [5]:
train_img = sorted(os.listdir(f"{TRAIN_MASK_ROOT}/"))
test_img = sorted(os.listdir(f"{TEST_MASK_ROOT}/"))

In [6]:
def get_image_as_array(image_name, root_path):
    loaded_img = np.array(Image.open(f"{root_path}/{image_name}"))
    img_arr = loaded_img / 255
    img_arr.resize((256,256))
    return img_arr

In [None]:
im_array_training = np.stack([get_image_as_array(x, TRAIN_MASK_ROOT) for x in train_img], 0)
im_array_testing = np.stack([get_image_as_array(x, TEST_MASK_ROOT) for x in test_img], 0)

print("Completed loading data")

In [None]:
assert im_array_training.shape[0] == n_train_cases, "missing train img"
assert im_array_testing.shape[0] == n_test_cases, "missing test img"

print(f"Loaded train images to array of shape {im_array_training.shape}")
print(f"Loaded test images to array of shape {im_array_testing.shape}")

### Classification labels 


In [9]:
import numpy as np
import pandas as pd

LABELS_ROOT = "Skin lesion data/SkinLesionTraining_GroundTruth.csv"

labels_data = pd.read_csv(LABELS_ROOT, header=None).values

train_labels = labels_data[0:800, 1]
test_labels = labels_data[800:900, 1]

In [None]:
assert train_labels.shape[0] == n_train_cases, "missing train labels"
assert test_labels.shape[0] == n_test_cases, "missing test labels"

print(f"Loaded train labels to array of shape {train_labels.shape}")
print(f"Loaded test labels to array of shape {test_labels.shape}")

In [11]:
train_labels_num = np.unique(train_labels, return_inverse=True)[1]
test_labels_num = np.unique(test_labels, return_inverse=True)[1]

# Train CNN

In [12]:
import numpy as np 
import os

import skimage.io as io
import skimage.transform as trans

from tensorflow.keras.models import *
from tensorflow.keras.layers import *
from tensorflow.keras.optimizers import *
from tensorflow.keras.callbacks import ModelCheckpoint, LearningRateScheduler
from tensorflow.keras import backend as keras

In [14]:
def cnn(pretrained_weights = None,input_size = (256,256,1), lr=1e-4):
    inputs = Input(input_size)
    
    conv1 = Conv2D(64, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal', kernel_regularizer='l2')(inputs)
    pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)

    conv2 = Conv2D(64, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal', kernel_regularizer='l2')(pool1)
    pool2 = MaxPooling2D(pool_size=(2, 2))(conv2)

    conv3 = Conv2D(64, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal', kernel_regularizer='l2')(pool2)
    pool3 = MaxPooling2D(pool_size=(2, 2))(conv3)

    flat1 = Flatten()(pool3)
    
    dense1 = Dense(256, activation='relu')(flat1)
    # drop1 = Dropout(0.5)(dense1)
    dense2 = Dense(1, activation='sigmoid')(dense1)

    model = Model(inputs=inputs, outputs=dense2)

    model.compile(optimizer = Adam(lr = lr),
                  loss = 'binary_crossentropy',
                  metrics = ['accuracy'])
    
    if(pretrained_weights):
    	model.load_weights(pretrained_weights)
    
    return model

In [15]:
par_batch_size = 10
par_epochs = 50
par_validation_split = 0.15
par_learning_rate = 0.0001
validationSplit = 0.15

In [None]:
im_array_training = np.expand_dims(np.asarray(im_array_training, dtype = np.float), axis = 3)
print(f"Converted arrays to shape {im_array_training.shape} for inputs")

In [None]:
train_labels_num = train_labels_num.reshape((800,1,1))
print(f"Converted arrays to shape {train_labels_num.shape} for targets.")

In [None]:
model = cnn(input_size = (256, 256, 1), lr=par_learning_rate)

In [19]:
model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 256, 256, 1)]     0         
                                                                 
 conv2d (Conv2D)             (None, 256, 256, 64)      640       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 128, 128, 64)     0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 128, 128, 64)      36928     
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 64, 64, 64)       0         
 2D)                                                             
                                                                 
 conv2d_2 (Conv2D)           (None, 64, 64, 64)        36928 

In [None]:
print(f'train img shape:', im_array_training.shape)
print(f'train labels shape:', train_labels_num.shape)

In [None]:
history = model.fit(im_array_training, 
                    train_labels_num, 
                    batch_size=par_batch_size, 
                    epochs=par_epochs, 
                    validation_split=par_validation_split)

In [22]:
model.save('resultCNN_lesion.hdf5')

In [None]:
print(history.history.keys())

In [None]:
# summarize history for accuracy
plt.figure(figsize=(16, 4))
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc='upper left')
plt.show()
# summarize history for loss
plt.figure(figsize=(16, 4))
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc='upper left')
plt.show()

# Prediction and evaluation

In [None]:
from tensorflow.keras.models import *

model = load_model('resultCNN_lesion.hdf5')
results = model.predict(np.expand_dims(np.asarray(im_array_testing[:10]), axis=3))

In [None]:
all_results = model.predict(np.expand_dims(np.asarray(im_array_testing), axis=3))

In [None]:
print(all_results.shape, im_array_testing.shape)

In [28]:
all_results = np.where(all_results > 0.5, 1, 0) 

In [None]:
import pandas as pd

df = pd.DataFrame()
df['target'] = test_labels_num
df['pred'] = all_results
df.head(10)

In [None]:
from sklearn.metrics import confusion_matrix

cm = confusion_matrix(test_labels_num, all_results.astype(int))
print(cm)

In [None]:
total=sum(sum(cm))
accuracy1=(cm[0,0]+cm[1,1])/total
print ('Accuracy : ', accuracy1)

sensitivity1 = cm[0,0]/(cm[0,0]+cm[0,1])
print('Sensitivity : ', sensitivity1 )

specificity1 = cm[1,1]/(cm[1,0]+cm[1,1])
print('Specificity : ', specificity1)