In [23]:
import tensorflow as tf
from tensorflow import keras
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from keras.applications import ResNet50V2
from sklearn.model_selection import StratifiedKFold
from tensorflow.keras.models import load_model
from_tensorflow.keras import layers

tf.__version__

SyntaxError: invalid syntax (3456464010.py, line 11)

In [2]:
PATH = 'data'

os.listdir(PATH)

['archive (12).zip',
 'jpeg224',
 'sample_submission.csv',
 'test.csv',
 'train.csv',
 'train_split.csv',
 'val_split.csv',
 'subset.csv']

In [22]:
train = pd.read_csv(f'{PATH}/subset.csv')

#val = pd.read_csv(f'{PATH}/val_split.csv')

test = pd.read_csv(f'{PATH}/test.csv')

train.shape,# val.shape, test.shape

((2220, 8),)

In [4]:
train.head()

Unnamed: 0,image_name,patient_id,sex,age_approx,anatom_site_general_challenge,diagnosis,benign_malignant,target
0,ISIC_0533349,IP_5208504,female,45.0,lower extremity,unknown,benign,0
1,ISIC_8814612,IP_0414408,male,50.0,torso,unknown,benign,0
2,ISIC_6515241,IP_6245507,male,45.0,lower extremity,unknown,benign,0
3,ISIC_5075261,IP_2117218,male,40.0,upper extremity,unknown,benign,0
4,ISIC_2624460,IP_1969685,male,50.0,torso,unknown,benign,0


In [6]:
BATCH_SIZE = 64

AUTO = tf.data.experimental.AUTOTUNE

def decode(name, label):
    img = tf.io.read_file(name)
    img = tf.image.decode_jpeg(img, channels=3)
    img = tf.cast(img, tf.float32) 
    return img, label


def load_ds(df):
    options = tf.data.Options()
    options.experimental_deterministic = False
    imgs, labels = df["image_name"].values, df["target"].values
    imgs = [f'{PATH}/jpeg224//train/{name}.jpg' for name in imgs]
    ds = tf.data.Dataset.from_tensor_slices((imgs, labels))
    ds = ds.with_options(options)
    ds = ds.map(decode, num_parallel_calls=AUTO)
    ds = ds.cache()
    ds = ds.shuffle(2048)
    ds = ds.batch(BATCH_SIZE)
    #ds = ds.prefetch(buffer_size=AUTO)
    return ds

In [16]:
FOLDS = 3
aucs = []

skf = StratifiedKFold(n_splits=FOLDS, shuffle=True, random_state=42)
for f, (train_idx, val_idx) in enumerate(skf.split(X=np.zeros(len(train)), y=train['target'])):
    print("Fold: ", f+1)

    train_fold = train.iloc[train_idx]
    val_fold = train.iloc[val_idx]   

    train_ds = load_ds(train_fold)
    val_ds = load_ds(val_fold)

    IMAGE_SIZE = (224, 224, 3)

    encoder = ResNet50V2(
        include_top=False,
        input_shape=IMAGE_SIZE,
        weights='imagenet'
    )

    encoder.trainable = False

    inputs = tf.keras.Input(shape=IMAGE_SIZE)
    x = tf.keras.layers.experimental.preprocessing.Rescaling(1./255)(inputs)
    x = encoder(x, training=False)
    x = tf.keras.layers.GlobalAveragePooling2D()(x)
    outputs = tf.keras.layers.Dense(1, activation='sigmoid')(x)

    model = tf.keras.Model(inputs, outputs)
    #model.summary()
    
    model.compile(
        optimizer='adam',
        loss='binary_crossentropy',
        metrics=[keras.metrics.AUC(name='auc')]
    )
    
    filepath = './checkpoints/checkpoints'
    cb = tf.keras.callbacks.ModelCheckpoint(
        filepath = filepath,
        monitor = "val_auc",
        verbose = 0,
        save_best_only = True,
        save_weights_only = True,
        mode = "max"
    )
    
    model.fit(
        train_ds, 
        epochs=10, 
        validation_data=val_ds, 
        #validation_steps=10,
        callbacks = [cb]
    )
    
    model.load_weights(filepath)
    model.save(f"model_fold {f+1}.h5")
    
    _, auc = model.evaluate(val_ds)
    aucs.append(auc)
    

Fold:  1
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Fold:  2
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Fold:  3
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [17]:
aucs

[0.6150143146514893, 0.7447360157966614, 0.6810920238494873]

In [18]:
np.mean(aucs), np.std(aucs)

(0.6802807847658793, 0.052961769347567536)

In [19]:
BATCH_SIZE = 256

def decode_test(name):
    img = tf.io.read_file(name)
    img = tf.image.decode_jpeg(img, channels=3)
    img = tf.cast(img, tf.float32) 
    return img


def load_test_ds(df):
    imgs = df["image_name"].values
    imgs = [f'{PATH}/jpeg224/test/{name}.jpg' for name in imgs]
    ds = tf.data.Dataset.from_tensor_slices(imgs)
    ds = ds.map(decode_test, num_parallel_calls=AUTO)
    ds = ds.batch(BATCH_SIZE)
    return ds

In [20]:
test_ds = load_test_ds(test)

In [21]:
preds = []
for f in range(1, FOLDS+1):
    print(f"Fold: {f}")
    model_fold = tf.keras.models.load_model(f"model_fold {f}.h5")
    probas = model_fold.predict(test_ds)
    preds.append(probas)

Fold: 1
Fold: 2
Fold: 3


In [24]:
preds

[array([[0.02105139],
        [0.00125025],
        [0.02116091],
        ...,
        [0.02618294],
        [0.00495866],
        [0.04358496]], dtype=float32),
 array([[0.00426918],
        [0.02951675],
        [0.02794935],
        ...,
        [0.02703756],
        [0.00152806],
        [0.05563672]], dtype=float32),
 array([[0.01446082],
        [0.00740031],
        [0.01693306],
        ...,
        [0.02398477],
        [0.00716682],
        [0.03772961]], dtype=float32)]

In [25]:
preds_mean = np.mean(preds, axis=0)
preds_mean

array([[0.01326046],
       [0.01272244],
       [0.02201444],
       ...,
       [0.02573509],
       [0.00455118],
       [0.04565043]], dtype=float32)

In [26]:
submission = pd.DataFrame({
    'image_name': test['image_name'].values,
    'target': preds_mean.ravel()
})

submission.head()

Unnamed: 0,image_name,target
0,ISIC_0052060,0.01326
1,ISIC_0052349,0.012722
2,ISIC_0058510,0.022014
3,ISIC_0073313,0.003465
4,ISIC_0073502,0.007031


In [27]:
submission.to_csv('submission.csv', index=False)