In [1]:
import shutil
import random
random.seed(1)

run = True
if run:
    # Create train/val/test split
    with open("data/ISIC_2019_Training_GroundTruth.csv") as file:
        next(file)  # Skip header
        for line in file:
            # Parse line for image path and whether benign/malignant
            tokens = line.split(",")
            image = tokens[0] + ".jpg"
            melanoma = int(float(tokens[1]))

            # Decide whether sample is train/val/test
            rand = random.random()
            if rand < 0.8:
                loc = "data/train/"
            elif rand < 0.9:
                loc = "data/val/"
            else:
                loc = "data/test/"

            if melanoma == 0:
                loc += "benign/"
            else:
                loc += "malignant/"

            # Copy to folder
            src = "data/ISIC_2019_Training_Input/" + image
            dst = loc + image
            shutil.copy(src, dst)

In [2]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.utils import image_dataset_from_directory



In [3]:
BATCH_SIZE = 32
IMG_SIZE = (160, 160)
train_dataset = image_dataset_from_directory("data/train", shuffle=True, batch_size=BATCH_SIZE, image_size=IMG_SIZE)
validation_dataset = image_dataset_from_directory("data/val", shuffle=True, batch_size=BATCH_SIZE, image_size=IMG_SIZE)

Found 20225 files belonging to 2 classes.
Found 2551 files belonging to 2 classes.


In [7]:
val_batches = tf.data.experimental.cardinality(validation_dataset)
test_dataset = validation_dataset.take(val_batches // 5)
validation_dataset = validation_dataset.skip(val_batches // 5)

In [8]:
print('Number of validation batches: %d' % tf.data.experimental.cardinality(validation_dataset))
print('Number of test batches: %d' % tf.data.experimental.cardinality(test_dataset))

Number of validation batches: 64
Number of test batches: 16


In [9]:
AUTOTUNE = tf.data.AUTOTUNE

train_dataset = train_dataset.prefetch(buffer_size=AUTOTUNE)
validation_dataset = validation_dataset.prefetch(buffer_size=AUTOTUNE)
test_dataset = test_dataset.prefetch(buffer_size=AUTOTUNE)

In [10]:
data_augmentation = tf.keras.Sequential([
  tf.keras.layers.RandomFlip('horizontal'),
  tf.keras.layers.RandomRotation(0.2),
])

In [12]:
preprocess_input = tf.keras.applications.mobilenet_v2.preprocess_input

In [13]:
rescale = tf.keras.layers.Rescaling(1./127.5, offset=-1)

In [14]:
IMG_SHAPE = IMG_SIZE + (3,)
base_model = tf.keras.applications.MobileNetV2(input_shape=IMG_SHAPE, include_top=False, weights='imagenet')

In [15]:
image_batch, label_batch = next(iter(train_dataset))
feature_batch = base_model(image_batch)
print(feature_batch.shape)

(32, 5, 5, 1280)


In [16]:
base_model.trainable = False

In [17]:
base_model.summary()

Model: "mobilenetv2_1.00_160"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 160, 160, 3) 0                                            
__________________________________________________________________________________________________
Conv1 (Conv2D)                  (None, 80, 80, 32)   864         input_1[0][0]                    
__________________________________________________________________________________________________
bn_Conv1 (BatchNormalization)   (None, 80, 80, 32)   128         Conv1[0][0]                      
__________________________________________________________________________________________________
Conv1_relu (ReLU)               (None, 80, 80, 32)   0           bn_Conv1[0][0]                   
_______________________________________________________________________________

In [18]:
global_average_layer = tf.keras.layers.GlobalAveragePooling2D()
feature_batch_average = global_average_layer(feature_batch)
print(feature_batch_average.shape)

(32, 1280)


In [19]:
prediction_layer = tf.keras.layers.Dense(1)
prediction_batch = prediction_layer(feature_batch_average)
print(prediction_batch.shape)

(32, 1)


In [20]:
inputs = tf.keras.Input(shape=(160, 160, 3))
x = data_augmentation(inputs)
x = preprocess_input(x)
x = base_model(x, training=False)
x = global_average_layer(x)
x = tf.keras.layers.Dropout(0.2)(x)
outputs = prediction_layer(x)
model = tf.keras.Model(inputs, outputs)

In [21]:
base_learning_rate = 0.0001
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=base_learning_rate),
              loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
              metrics=['accuracy'])

In [22]:
len(model.trainable_variables)

2

In [23]:
initial_epochs = 1

loss0, accuracy0 = model.evaluate(validation_dataset)



In [24]:
print("initial loss: {:.2f}".format(loss0))
print("initial accuracy: {:.2f}".format(accuracy0))

initial loss: 0.53
initial accuracy: 0.82


In [26]:
history = model.fit(train_dataset,
                    epochs=initial_epochs,
                    validation_data=validation_dataset)



In [29]:
base_model.trainable = True

In [30]:
print("Number of layers in the base model: ", len(base_model.layers))
fine_tune_at = 100
for layer in base_model.layers[:fine_tune_at]:
    layer.trainable =  False

Number of layers in the base model:  154


In [31]:
model.compile(loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
              optimizer = tf.keras.optimizers.RMSprop(learning_rate=base_learning_rate/10),
              metrics=['accuracy'])

In [32]:
len(model.trainable_variables)

56

In [33]:
fine_tune_epochs = 1
total_epochs =  initial_epochs + fine_tune_epochs

history_fine = model.fit(train_dataset,
                         epochs=total_epochs,
                         initial_epoch=history.epoch[-1],
                         validation_data=validation_dataset)

Epoch 1/2
Epoch 2/2


In [36]:
loss, accuracy = model.evaluate(test_dataset)
print('Test accuracy :', accuracy)

Test accuracy : 0.853515625


In [38]:
from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix
from sklearn.metrics import classification_report

preds = []
labels = []

for image_batch, label_batch in test_dataset.as_numpy_iterator():
    # Apply a sigmoid since our model returns logits
    predictions = model.predict_on_batch(image_batch).flatten()
    predictions = tf.nn.sigmoid(predictions)
    predictions = tf.where(predictions < 0.5, 0, 1)
    preds.extend(predictions.numpy().tolist())
    labels.extend(label_batch.tolist())

print(confusion_matrix(labels, preds))
print(precision_score(labels, preds))
print(recall_score(labels, preds))
print(f1_score(labels, preds))
print(classification_report(labels, preds))

[[417   1]
 [ 78  16]]
0.9411764705882353
0.1702127659574468
0.2882882882882883
              precision    recall  f1-score   support

           0       0.84      1.00      0.91       418
           1       0.94      0.17      0.29        94

    accuracy                           0.85       512
   macro avg       0.89      0.58      0.60       512
weighted avg       0.86      0.85      0.80       512



In [85]:
model.save("my_model")

INFO:tensorflow:Assets written to: my_model\assets


