In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import os
import pandas as pd
from tensorflow.keras.applications import ResNet50V2

tf.__version__

'2.3.0-dev20200522'

In [2]:
PATH = 'data'

os.listdir(PATH)

['jpeg224',
 'sample_submission.csv',
 'subset.csv',
 'subset_clean.csv',
 'test.csv',
 'test_clean.csv',
 'train.csv',
 'train_clean.csv',
 'train_clean_split.csv',
 'train_split.csv',
 'val_clean_split.csv',
 'val_split.csv']

In [3]:
#train = pd.read_csv(f'{PATH}/train.csv')
train = pd.read_csv(f'{PATH}/subset.csv')
val = pd.read_csv(f'{PATH}/val_split.csv')

train.shape, val.shape

((2220, 8), (10932, 8))

In [4]:
train.head()

Unnamed: 0,image_name,patient_id,sex,age_approx,anatom_site_general_challenge,diagnosis,benign_malignant,target
0,ISIC_0533349,IP_5208504,female,45.0,lower extremity,unknown,benign,0
1,ISIC_8814612,IP_0414408,male,50.0,torso,unknown,benign,0
2,ISIC_6515241,IP_6245507,male,45.0,lower extremity,unknown,benign,0
3,ISIC_5075261,IP_2117218,male,40.0,upper extremity,unknown,benign,0
4,ISIC_2624460,IP_1969685,male,50.0,torso,unknown,benign,0


In [5]:
BATCH_SIZE = 64
AUTO = tf.data.experimental.AUTOTUNE

def decode(filename, label):
    img = tf.io.read_file(filename)
    img = tf.image.decode_jpeg(img, channels=3)
    img = tf.cast(img, tf.float32)
    return img, label

def augment(img, label):
    # augmentation
    #img = tf.image.flip_left_right(img)
    img = tf.image.random_flip_left_right(img)
    img = tf.image.random_flip_up_down(img)
    return img, label

def load_ds(df):
    options = tf.data.Options()
    options.experimental_deterministic = False
    imgs, labels = df["image_name"].values, df["target"].values
    imgs = [f'{PATH}/jpeg224/train/{img}.jpg' for img in imgs]
    ds = tf.data.Dataset.from_tensor_slices((imgs, labels))
    ds = ds.with_options(options)
    ds = ds.map(decode, num_parallel_calls=AUTO)
    ds = ds.cache()
    ds = ds.map(augment, num_parallel_calls=AUTO)
    ds = ds.shuffle(2048)
    ds = ds.batch(BATCH_SIZE)
    ds = ds.prefetch(buffer_size=AUTO)
    return ds

In [6]:
train_ds = load_ds(train)
val_ds = load_ds(val)

In [7]:
IMAGE_SIZE = (224, 224, 3)

encoder = ResNet50V2(
    include_top=False,
    input_shape=IMAGE_SIZE,
    weights='imagenet'
)
encoder.trainable = False

inputs = keras.Input(shape=IMAGE_SIZE)
x = keras.layers.experimental.preprocessing.Rescaling(1./255)(inputs)
x = encoder(x, training=False)
x = keras.layers.GlobalAveragePooling2D()(x)
outputs = keras.layers.Dense(1, activation="sigmoid")(x)
model = keras.Model(inputs, outputs)
model.summary()

Model: "functional_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         [(None, 224, 224, 3)]     0         
_________________________________________________________________
rescaling (Rescaling)        (None, 224, 224, 3)       0         
_________________________________________________________________
resnet50v2 (Functional)      (None, 7, 7, 2048)        23564800  
_________________________________________________________________
global_average_pooling2d (Gl (None, 2048)              0         
_________________________________________________________________
dense (Dense)                (None, 1)                 2049      
Total params: 23,566,849
Trainable params: 2,049
Non-trainable params: 23,564,800
_________________________________________________________________


In [8]:
model.compile(
    optimizer=tf.keras.optimizers.Adam(),
    loss=tf.keras.losses.BinaryCrossentropy(),
    metrics=[keras.metrics.AUC(name="auc")]
)

In [9]:
filepath = './checkpoints/checkpoint'
cb = tf.keras.callbacks.ModelCheckpoint(
    filepath = filepath,
    monitor="val_auc",
    verbose=1,
    save_best_only=True,
    save_weights_only=True,
    mode="max"
)

In [10]:
model.fit(
    train_ds, 
    epochs=10, 
    validation_data=val_ds, 
    validation_steps=10,
    callbacks=[cb]
)

Epoch 1/10
Epoch 00001: val_auc improved from -inf to 0.41231, saving model to ./checkpoints\checkpoint
Epoch 2/10
Epoch 00002: val_auc improved from 0.41231 to 0.43204, saving model to ./checkpoints\checkpoint
Epoch 3/10
Epoch 00003: val_auc improved from 0.43204 to 0.55793, saving model to ./checkpoints\checkpoint
Epoch 4/10
Epoch 00004: val_auc improved from 0.55793 to 0.65794, saving model to ./checkpoints\checkpoint
Epoch 5/10
Epoch 00005: val_auc improved from 0.65794 to 0.74915, saving model to ./checkpoints\checkpoint
Epoch 6/10
Epoch 00006: val_auc improved from 0.74915 to 0.79628, saving model to ./checkpoints\checkpoint
Epoch 7/10
Epoch 00007: val_auc did not improve from 0.79628
Epoch 8/10
Epoch 00008: val_auc improved from 0.79628 to 0.81781, saving model to ./checkpoints\checkpoint
Epoch 9/10
Epoch 00009: val_auc did not improve from 0.81781
Epoch 10/10
Epoch 00010: val_auc did not improve from 0.81781


<tensorflow.python.keras.callbacks.History at 0x235eb64dd88>

In [11]:
model.load_weights(filepath)
model.save("model.h5")

In [12]:
model.evaluate(val_ds)



[0.0781068503856659, 0.7815940976142883]

In [13]:
m = tf.keras.metrics.AUC()
aucs = []
for imgs, labels in val_ds:
    imgs_lr = tf.image.flip_left_right(imgs)    
    imgs_ud = tf.image.flip_up_down(imgs)
    preds = (model.predict(imgs) + model.predict(imgs_lr) + model.predict(imgs_ud)) / 3
    _ = m.update_state(labels, preds)
    aucs.append(m.result().numpy())

In [14]:
import numpy as np

np.mean(aucs)

0.77226335

In [15]:
test = pd.read_csv(f'{PATH}/test.csv')

test.head()

Unnamed: 0,image_name,patient_id,sex,age_approx,anatom_site_general_challenge
0,ISIC_0052060,IP_3579794,male,70.0,
1,ISIC_0052349,IP_7782715,male,40.0,lower extremity
2,ISIC_0058510,IP_7960270,female,55.0,torso
3,ISIC_0073313,IP_6375035,female,50.0,torso
4,ISIC_0073502,IP_0589375,female,45.0,lower extremity


In [16]:
def decode_test(filename):
    img = tf.io.read_file(filename)
    img = tf.image.decode_jpeg(img, channels=3)
    img = tf.cast(img, tf.float32)
    return img

def load_test_ds(df):
    imgs = df["image_name"].values
    imgs = [f'{PATH}/jpeg224/test/{img}.jpg' for img in imgs]
    ds = tf.data.Dataset.from_tensor_slices(imgs)
    ds = ds.map(decode_test)
    ds = ds.batch(BATCH_SIZE)
    return ds

In [17]:
test_ds = load_test_ds(test)

In [18]:
preds = []
# TTAx3
for imgs in test_ds:
    imgs_lr = tf.image.flip_left_right(imgs)    
    imgs_ud = tf.image.flip_up_down(imgs)
    _preds = (model.predict(imgs) + model.predict(imgs_lr) + model.predict(imgs_ud)) / 3
    preds += _preds.ravel().tolist()

In [19]:
preds

[0.001971568213775754,
 0.002981459489092231,
 0.019012002274394035,
 0.0012822275748476386,
 0.004484205041080713,
 0.01675780862569809,
 0.009379414841532707,
 0.4928835332393646,
 0.04189993441104889,
 0.03770130127668381,
 0.012979435734450817,
 0.0014842894161120057,
 0.030318059027194977,
 0.0038964441046118736,
 0.05177859589457512,
 0.012068033218383789,
 0.004757618065923452,
 0.0043905009515583515,
 0.016407674178481102,
 0.09448408335447311,
 0.013995363377034664,
 0.00235938117839396,
 0.022054597735404968,
 0.009006153792142868,
 0.06566380709409714,
 0.014822444878518581,
 0.0022171877790242434,
 0.016176514327526093,
 0.0029272325336933136,
 0.006256499793380499,
 0.014300706796348095,
 0.008656014688313007,
 0.083595871925354,
 0.04811738803982735,
 0.04390553757548332,
 0.004414091352373362,
 0.010540011338889599,
 0.051752861589193344,
 0.0024811953771859407,
 0.0073437620885670185,
 0.0033128447830677032,
 0.035892054438591,
 0.07691255211830139,
 0.00293913856148719

In [20]:
submission = pd.DataFrame({'image_name': test['image_name'].values, 'target': preds})

submission

Unnamed: 0,image_name,target
0,ISIC_0052060,0.001972
1,ISIC_0052349,0.002981
2,ISIC_0058510,0.019012
3,ISIC_0073313,0.001282
4,ISIC_0073502,0.004484
...,...,...
10977,ISIC_9992485,0.016382
10978,ISIC_9996992,0.030199
10979,ISIC_9997917,0.032493
10980,ISIC_9998234,0.002046


In [21]:
submission.to_csv('submission.csv', index=False)