In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import load_model
import os
import pandas as pd
from tensorflow.keras.applications import ResNet50V2
from sklearn.model_selection import StratifiedKFold, train_test_split
import numpy as np

tf.__version__

'2.3.0-dev20200522'

In [2]:
PATH = 'data'

os.listdir(PATH)

['jpeg224',
 'sample_submission.csv',
 'subset.csv',
 'test.csv',
 'train.csv',
 'train_split.csv',
 'val_split.csv']

In [3]:
train = pd.read_csv(f'{PATH}/train.csv')

train.shape

(33126, 8)

In [4]:
train.head()

Unnamed: 0,image_name,patient_id,sex,age_approx,anatom_site_general_challenge,diagnosis,benign_malignant,target
0,ISIC_2637011,IP_7279968,male,45.0,head/neck,unknown,benign,0
1,ISIC_0015719,IP_3075186,female,45.0,upper extremity,unknown,benign,0
2,ISIC_0052212,IP_2842074,female,50.0,lower extremity,nevus,benign,0
3,ISIC_0068279,IP_6890425,female,45.0,head/neck,unknown,benign,0
4,ISIC_0074268,IP_8723313,female,55.0,upper extremity,unknown,benign,0


In [5]:
BATCH_SIZE = 64
AUTO = tf.data.experimental.AUTOTUNE

def decode(filename, label):
    img = tf.io.read_file(filename)
    img = tf.image.decode_jpeg(img, channels=3)
    img = tf.cast(img, tf.float32)
    return img, label

def load_ds(df):
    options = tf.data.Options()
    options.experimental_deterministic = False
    imgs, labels = df["image_name"].values, df["target"].values
    imgs = [f'{PATH}/jpeg224/train/{img}.jpg' for img in imgs]
    ds = tf.data.Dataset.from_tensor_slices((imgs, labels))
    ds = ds.with_options(options)
    ds = ds.map(decode, num_parallel_calls=AUTO)
    ds = ds.cache()
    ds = ds.shuffle(2048)
    ds = ds.batch(BATCH_SIZE)
    ds = ds.prefetch(buffer_size=AUTO)
    return ds

In [6]:
FOLDS = 3
aucs = []
skf = StratifiedKFold(n_splits=FOLDS, random_state=42, shuffle=True)
for f, (train_index,val_index) in enumerate(skf.split(X=np.zeros(len(train)), y=train['target'])):
    print("Fold: ", f+1)
    
    train_fold = train.iloc[train_index]
    _, subset = train_test_split(train_fold, random_state=42, test_size=0.1, shuffle=True, stratify=train_fold['target'])
    val_fold = train.iloc[val_index]
    
    # DESCOMENTAR PARA UTILIZAR TODO EL DATASET !!!
    #train_ds = load_ds(train_fold)
    train_ds = load_ds(subset)
    val_ds = load_ds(val_fold)
    
    # PROBAD A ENTRENAR TAMBIÉN RESNET A VER SI DA MEJOR !!!
    IMAGE_SIZE = (224, 224, 3)
    encoder = ResNet50V2(
        include_top=False,
        input_shape=IMAGE_SIZE,
        weights='imagenet'
    )
    encoder.trainable = False
    inputs = keras.Input(shape=IMAGE_SIZE)
    x = keras.layers.experimental.preprocessing.Rescaling(1./255)(inputs)
    x = encoder(x, training=False)
    x = keras.layers.GlobalAveragePooling2D()(x)
    outputs = keras.layers.Dense(1, activation="sigmoid")(x)
    model = keras.Model(inputs, outputs)
    #model.summary()
    
    model.compile(
        optimizer=tf.keras.optimizers.Adam(),
        loss=tf.keras.losses.BinaryCrossentropy(),
        metrics=[keras.metrics.AUC(name="auc")]
    )
    
    filepath = './checkpoints/checkpoint'
    cb = tf.keras.callbacks.ModelCheckpoint(
        filepath = filepath,
        monitor="val_auc",
        verbose=1,
        save_best_only=True,
        save_weights_only=True,
        mode="max"
    )
    
    model.fit(
        train_ds, 
        epochs=10, 
        validation_data=val_ds, 
        validation_steps=10, # COMENTAR PARA HACER BIEN LA VALIDACIÓN !!!
        callbacks=[cb]
    )
    
    model.load_weights(filepath)
    model.save(f"model_fold{f+1}.h5")
    
    _, auc = model.evaluate(val_ds)
    aucs.append(auc)

Fold:  1
Epoch 1/10
Epoch 00001: val_auc improved from -inf to 0.42846, saving model to ./checkpoints\checkpoint
Epoch 2/10
Epoch 00002: val_auc did not improve from 0.42846
Epoch 3/10
Epoch 00003: val_auc did not improve from 0.42846
Epoch 4/10
Epoch 00004: val_auc improved from 0.42846 to 0.52633, saving model to ./checkpoints\checkpoint
Epoch 5/10
Epoch 00005: val_auc improved from 0.52633 to 0.64246, saving model to ./checkpoints\checkpoint
Epoch 6/10
Epoch 00006: val_auc did not improve from 0.64246
Epoch 7/10
Epoch 00007: val_auc improved from 0.64246 to 0.73899, saving model to ./checkpoints\checkpoint
Epoch 8/10
Epoch 00008: val_auc improved from 0.73899 to 0.77990, saving model to ./checkpoints\checkpoint
Epoch 9/10
Epoch 00009: val_auc did not improve from 0.77990
Epoch 10/10
Epoch 00010: val_auc did not improve from 0.77990
Fold:  2
Epoch 1/10
Epoch 00001: val_auc improved from -inf to 0.38723, saving model to ./checkpoints\checkpoint
Epoch 2/10
Epoch 00002: val_auc did not 

In [7]:
aucs

[0.7450290322303772, 0.7576072812080383, 0.7205542325973511]

In [8]:
np.mean(aucs), np.std(aucs)

(0.7410635153452555, 0.01538453977127963)

In [9]:
test = pd.read_csv(f'{PATH}/test.csv')

test.head()

Unnamed: 0,image_name,patient_id,sex,age_approx,anatom_site_general_challenge
0,ISIC_0052060,IP_3579794,male,70.0,
1,ISIC_0052349,IP_7782715,male,40.0,lower extremity
2,ISIC_0058510,IP_7960270,female,55.0,torso
3,ISIC_0073313,IP_6375035,female,50.0,torso
4,ISIC_0073502,IP_0589375,female,45.0,lower extremity


In [10]:
def decode_test(filename):
    img = tf.io.read_file(filename)
    img = tf.image.decode_jpeg(img, channels=3)
    img = tf.cast(img, tf.float32)
    return img

def load_test_ds(df):
    imgs = df["image_name"].values
    imgs = [f'{PATH}/jpeg224/test/{img}.jpg' for img in imgs]
    ds = tf.data.Dataset.from_tensor_slices(imgs)
    ds = ds.map(decode_test, num_parallel_calls=AUTO)
    ds = ds.batch(BATCH_SIZE)
    return ds

In [11]:
test_ds = load_test_ds(test)

In [12]:
preds = []
for f in range(1, FOLDS+1):
    print(f"Folds {f}")
    model_fold = load_model(f"model_fold{f}.h5")
    probas = model_fold.predict(test_ds)
    preds.append(probas)

Folds 1
Folds 2
Folds 3


In [13]:
preds

[array([[0.05757857],
        [0.01011392],
        [0.01120207],
        ...,
        [0.02039555],
        [0.00633859],
        [0.03073774]], dtype=float32),
 array([[0.00089891],
        [0.00300865],
        [0.01120145],
        ...,
        [0.07141326],
        [0.00105115],
        [0.00647851]], dtype=float32),
 array([[0.00727905],
        [0.01598136],
        [0.01208515],
        ...,
        [0.07996412],
        [0.00636409],
        [0.09287672]], dtype=float32)]

In [14]:
preds_mean = 
preds_mean

array([[0.02191885],
       [0.00970131],
       [0.01149622],
       ...,
       [0.05725764],
       [0.00458461],
       [0.04336432]], dtype=float32)

In [15]:
submission = pd.DataFrame({'image_name': test['image_name'].values, 'target': preds_mean.ravel()})

submission

Unnamed: 0,image_name,target
0,ISIC_0052060,0.021919
1,ISIC_0052349,0.009701
2,ISIC_0058510,0.011496
3,ISIC_0073313,0.003990
4,ISIC_0073502,0.003079
...,...,...
10977,ISIC_9992485,0.009039
10978,ISIC_9996992,0.058055
10979,ISIC_9997917,0.057258
10980,ISIC_9998234,0.004585


In [16]:
submission.to_csv('submission.csv', index=False)