In [4]:
%matplotlib inline
import matplotlib.pyplot as plt
plt.style.use("ggplot")

import numpy as np
import pandas as pd
import os
from tqdm import tqdm
from glob import glob
import seaborn as sns
import pprint as pp

from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.utils import resample

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from keras.utils import np_utils

import itertools

import cv2
from PIL import Image

In [5]:
base_skin_dir = os.path.join('..', 'input/skin-cancer-mnist-ham10000')

imageid_path_dict = {os.path.splitext(os.path.basename(x))[0]: x
                     for x in glob(os.path.join(base_skin_dir, '*', '*.jpg'))}

lesion_type_dict = {
    'nv': 'Melanocytic nevi',
    'mel': 'Melanoma',
    'bkl': 'Benign keratosis-like lesions ',
    'bcc': 'Basal cell carcinoma'
}

In [6]:
data = pd.read_csv(os.path.join(base_skin_dir, 'HAM10000_metadata.csv'))
data = data[data.dx != 'akiec']
data = data[data.dx != 'vasc']
data = data[data.dx != 'df']

data['path'] = data['image_id'].map(imageid_path_dict.get)
data['cell_type'] = data['dx'].map(lesion_type_dict.get) 
data['cell_type_idx'] = pd.Categorical(data['cell_type']).codes

data.head()

In [7]:
bcc_path = data.loc[data['cell_type_idx'] == 0]['path'][:5]

In [8]:
bcc_path = np.array(bcc_path)

In [9]:
bcc_path

In [10]:
bkl_path = data.loc[data['cell_type_idx'] == 1]['path'][:5]
bkl_path = np.array(bkl_path)

In [11]:
nv_path = data.loc[data['cell_type_idx'] == 2]['path'][:5]
nv_path = np.array(nv_path)

In [12]:
ml_path = data.loc[data['cell_type_idx'] == 3]['path'][:5]
ml_path = np.array(ml_path)

In [13]:
data_1 = data.loc[data['dx'] == 'nv'][:1000]

In [14]:
data = data[data.dx != 'nv']
data = data.append(data_1, ignore_index=True)

In [15]:
Image.open('../input/skin-cancer-mnist-ham10000/HAM10000_images_part_1/ISIC_0028155.jpg')

In [16]:
fig, ax1 = plt.subplots(1, 1, figsize = (10, 5))
data['cell_type'].value_counts().plot(kind='bar', ax=ax1)

# Balancing the dataset

In [18]:
def balanced_dataset(df):
    df_balanced = pd.DataFrame()  
    for cat in df['cell_type_idx'].unique():
        temp = resample(df[df['cell_type_idx'] == cat], 
                        replace=True,     
                        n_samples=7000,   
                        random_state=123)

        df_balanced = pd.concat([df_balanced, temp])
 
    df_balanced['cell_type'].value_counts()

    return df_balanced

# Load Images

In [19]:
def load_img_data(size, df, balanced=False):
    img_h, img_w = size, size
    imgs = []
    
    if balanced:
        df = balanced_dataset(df)
    
    image_paths = list(df['path'])

    for i in tqdm(range(len(image_paths))):
        img = cv2.imread(image_paths[i])
        img = cv2.resize(img, (img_h, img_w))
        img = img.astype(np.float32) / 255.
        imgs.append(img)

    imgs = np.stack(imgs, axis=0)
    print(imgs.shape)

    return imgs, df['cell_type_idx'].values

In [20]:
imgs, target   = load_img_data(128, data, balanced=False)

In [21]:
x_train, x_test, y_train, y_test = train_test_split(imgs, target, test_size=0.15)
x_train, x_val, y_train, y_val = train_test_split(imgs, target, test_size=0.05)

train_val_test = (x_train, y_train, x_val, y_val, x_test, y_test)

[x_train.shape, x_val.shape, x_test.shape]

In [31]:
mv2 = tf.keras.applications.MobileNetV2(weights='imagenet', include_top=False, input_shape=(128, 128, 3))

In [32]:
model = tf.keras.models.Sequential()
model.add(mv2)
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dropout(0.25)) 
model.add(tf.keras.layers.Dense(512, activation='relu'))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.Dropout(0.46))  
model.add(tf.keras.layers.Dense(128, activation='relu'))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.Dropout(0.46))

model.add(tf.keras.layers.Dense(len(set(target)), activation='softmax'))
print (model.summary())

In [33]:
model.compile(loss='sparse_categorical_crossentropy', optimizer=tf.keras.optimizers.Adam(lr=0.001), metrics=['accuracy'])

In [51]:
best_weights = "mobile_net_v2_weights.hdf5"
checkpoint = tf.keras.callbacks.ModelCheckpoint(best_weights, monitor="val_loss", mode="min", save_best_only=True, verbose=1)

In [48]:
history = model.fit(
            x_train, y_train,
            epochs=25,
            validation_data=(train_val_test[2], train_val_test[3]),
            callbacks = [checkpoint],
            batch_size = 32)

In [49]:
best_acc = max(history.history["val_accuracy"])
print('Max validation accuracy:', best_acc)

In [52]:
model.load_weights("mobile_net_v2_weights.hdf5")

In [56]:
epochs = 25
plt.figure(figsize=(12,8))
plt.plot(np.arange(0, epochs), history.history["loss"], label="train_loss")
plt.plot(np.arange(0, epochs), history.history["val_loss"], label="val_loss")
plt.plot(np.arange(0, epochs), history.history["accuracy"], label="train_acc")
plt.plot(np.arange(0, epochs), history.history["val_accuracy"], label="val_acc")
plt.title("Training Loss and Accuracy")
plt.xlabel("Epoch #")
plt.ylabel("Loss/Accuracy")
plt.legend()
plt.show()

In [61]:
image = np.array(Image.open(bcc_path[1]).resize((128, 128)))
image = image / 255.0
image = np.reshape(image, (1, 128, 128, 3))

In [62]:
model.predict(image)

In [63]:
test_score = model.evaluate(train_val_test[4], train_val_test[5], verbose=0)
print('Test set score:', test_score)
predictions = model.predict(train_val_test[4], batch_size=32)

In [65]:
   def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]

    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, cm[i, j],
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

Y_pred = predictions
Y_pred_classes = np.argmax(Y_pred,axis = 1) 
confusion_mtx = confusion_matrix(y_test, Y_pred_classes)

plot_confusion_matrix(confusion_mtx, classes = range(4)) 