In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
from matplotlib import image
from matplotlib import pyplot
import os
import cv2
import random
import concurrent.futures
import time
import sklearn
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from itertools import cycle
from sklearn import svm, datasets
from sklearn.metrics import roc_curve, auc
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import label_binarize
from sklearn.multiclass import OneVsRestClassifier
from scipy import interp
from sklearn.metrics import roc_auc_score
import datetime

print(tf.__version__)


In [None]:
train = pd.read_csv("../input/ranzcr-clip-catheter-line-classification/train.csv", dtype=str)
train['image_names'] = [i+".jpg" for i in train['StudyInstanceUID'].values]
train.head()

In [None]:
labels = [[int(train['ETT - Abnormal'][i]),int(train['ETT - Borderline'][i]), \
           int(train['ETT - Normal'][i]),int(train['NGT - Abnormal'][i]), \
           int(train['NGT - Borderline'][i]),int(train['NGT - Incompletely Imaged'][i]),\
           int(train['NGT - Normal'][i]),int(train['CVC - Abnormal'][i]),\
           int(train['CVC - Borderline'][i]),int(train['CVC - Normal'][i]),\
           int(train['Swan Ganz Catheter Present'][i])] for i in range(len(train))]

In [None]:
img_names = train["image_names"]

def myfunc():
    return 0.5
c = list(zip(img_names, labels))
random.shuffle(c, myfunc)
img_names, labels = zip(*c)

In [None]:
IMSIZE = 256
def read_img(image):
    img = tf.keras.preprocessing.image.load_img(image, color_mode='rgb', target_size=(IMSIZE, IMSIZE))
    return img
def prepare_dataset(namelist, labels, path):
    start = time.time()
    labels = np.array(labels)
    labels = tf.convert_to_tensor(labels)
    labels = tf.cast(labels, tf.int8)
    namelist = [os.path.join(path, ele) for ele in namelist]
    imgs = []
    with concurrent.futures.ThreadPoolExecutor(max_workers = 8) as executor:
        i = 0
        for value in executor.map(read_img, namelist):
            i+=1
            print("\rFetching: [{}/{}]".format(i, len(namelist)), end="", flush=True)
            imgs.append(value)
        imgs = np.stack(imgs)
        imgs = tf.convert_to_tensor(imgs)
    print("\nExecution time: ",time.time() - start, "s")
    return imgs, labels

In [None]:
with tf.device('/cpu:0'):
    path = '../input/ranzcr-clip-catheter-line-classification/train'
    OFFSET = 10240
    TRAIN_SIZE = 6400
    VAL_SIZE = 2560
    train_images, train_labels = prepare_dataset(img_names[OFFSET:OFFSET+TRAIN_SIZE], \
                                                 labels[OFFSET:OFFSET+TRAIN_SIZE], path)
    val_images, val_labels = prepare_dataset(img_names[OFFSET+TRAIN_SIZE:OFFSET+VAL_SIZE+TRAIN_SIZE], \
                                             labels[OFFSET+TRAIN_SIZE:OFFSET+VAL_SIZE+TRAIN_SIZE], path)

In [None]:
print("Training Image tensor shape", train_images.shape)
print("Training Labels tensor shape", train_labels.shape)
print("Testing Image tensor shape", val_images.shape)
print("Tesing Labels tensor shape", val_labels.shape)

In [None]:
resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu='')
tf.config.experimental_connect_to_cluster(resolver)
tf.tpu.experimental.initialize_tpu_system(resolver)
print("All devices: ", tf.config.list_logical_devices('TPU'))

In [None]:
SEED = 1000
random_rotation = tf.keras.layers.experimental.preprocessing.RandomRotation(3.142/2, seed=SEED)
random_flip = tf.keras.layers.experimental.preprocessing.RandomFlip(mode="horizontal_and_vertical", seed=SEED)
random_zoom = tf.keras.layers.experimental.preprocessing.RandomZoom((0, 0.25), seed=SEED)
random_translate = tf.keras.layers.experimental.preprocessing.RandomTranslation((-0, 0.25), (-0, 0.25), seed=SEED)

def preprocess(imgs, label):
    imgs = random_rotation.call(imgs)
    imgs = random_flip.call(imgs)
    imgs = random_zoom.call(imgs)
    imgs = random_translate.call(imgs)
    return imgs, label

def normalize(imgs, label):
    return tf.cast(imgs, tf.float16)/255, label

strategy = tf.distribute.experimental.TPUStrategy(resolver)
with tf.device('/cpu:0'):
    TRAIN_BATCH_SIZE = 16 * strategy.num_replicas_in_sync
    VAL_BATCH_SIZE = 8 * strategy.num_replicas_in_sync
    train_dataset = tf.data.Dataset.from_tensor_slices((tf.cast(train_images, tf.uint8), \
                                                        tf.cast(train_labels, tf.uint8)))\
                    .shuffle(TRAIN_SIZE).repeat().batch(TRAIN_BATCH_SIZE).map(preprocess, num_parallel_calls=tf.data.experimental.AUTOTUNE)\
                    .cache()
    del train_images, train_labels
    val_dataset = tf.data.Dataset.from_tensor_slices((tf.cast(val_images, tf.uint8), tf.cast(val_labels, tf.uint8))).repeat().batch(VAL_BATCH_SIZE)
    del val_images, val_labels
    train_dataset = train_dataset.map(normalize, num_parallel_calls=tf.data.experimental.AUTOTUNE).prefetch(tf.data.AUTOTUNE)
    val_dataset = val_dataset.map(normalize, num_parallel_calls=tf.data.experimental.AUTOTUNE).prefetch(tf.data.AUTOTUNE)

In [None]:
y_test = []
i = 0
for x, y in val_dataset:
    i+=1
    if(i > VAL_SIZE/VAL_BATCH_SIZE):
        break
    print('\r',i, end='')
    for ele in y:
        y_test.append(list(ele.numpy()))
y_test = np.array(y_test)

def roc_auc_plot(y_test, y_score):
    lw = 1
    # Compute ROC curve and ROC area for each class
    fpr = dict()
    tpr = dict()
    roc_auc = dict()
    n_classes = y_score.shape[1]
    for i in range(n_classes):
        fpr[i], tpr[i], _ = roc_curve(y_test[:, i], y_score[:, i])
        roc_auc[i] = auc(fpr[i], tpr[i])

    # Compute micro-average ROC curve and ROC area
    fpr["micro"], tpr["micro"], _ = roc_curve(y_test.ravel(), y_score.ravel())
    roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])

    # First aggregate all false positive rates
    all_fpr = np.unique(np.concatenate([fpr[i] for i in range(n_classes)]))

    # Then interpolate all ROC curves at this points
    mean_tpr = np.zeros_like(all_fpr)
    for i in range(n_classes):
        mean_tpr += np.interp(all_fpr, fpr[i], tpr[i])

    # Finally average it and compute AUC
    mean_tpr /= n_classes

    fpr["macro"] = all_fpr
    tpr["macro"] = mean_tpr
    roc_auc["macro"] = auc(fpr["macro"], tpr["macro"])

    # Plot all ROC curves
    plt.figure()
    plt.plot(fpr["micro"], tpr["micro"],
             label='micro-average ROC curve (area = {0:0.2f})'
                   ''.format(roc_auc["micro"]),
             color='deeppink', linestyle=':', linewidth=4)

    plt.plot(fpr["macro"], tpr["macro"],
             label='macro-average ROC curve (area = {0:0.2f})'
                   ''.format(roc_auc["macro"]),
             color='navy', linestyle=':', linewidth=4)

    colors = cycle(['aqua', 'darkorange', 'cornflowerblue'])
    for i, color in zip(range(n_classes), colors):
        plt.plot(fpr[i], tpr[i], color=color, lw=lw,
                 label='ROC curve of class {0} (area = {1:0.2f})'
                 ''.format(i, roc_auc[i]))

    plt.plot([0, 1], [0, 1], 'k--', lw=lw)
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('Some extension of Receiver operating characteristic to multi-class')
    plt.legend(loc="lower right")
    plt.show()
    return roc_auc

In [None]:
%load_ext tensorboard

IMSIZE = 256
with strategy.scope():
    base_model = tf.keras.applications.InceptionResNetV2(include_top=False,\
                                                   weights='imagenet', pooling = 'max')
    base_model.trainable = True
    for layer in base_model.layers[:150]:
        layer.trainable = False
    model = tf.keras.Sequential([
        tf.keras.layers.Input(shape=(IMSIZE, IMSIZE, 3)),
        #tf.keras.layers.experimental.preprocessing.RandomRotation(3.142/2, seed=SEED),
        #tf.keras.layers.experimental.preprocessing.RandomFlip(mode="horizontal_and_vertical", seed=SEED),
        #tf.keras.layers.experimental.preprocessing.RandomZoom((0, 0.25), seed=SEED),
        #tf.keras.layers.experimental.preprocessing.RandomTranslation((-0, 0.25), (-0, 0.25), seed=SEED),
        base_model,
        tf.keras.layers.Dense(11, activation='sigmoid')
    ])

In [None]:
class myCallback_EarlyStopping(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs = {}): 
        print("Predict: ", end='')
        y_score = model.predict(val_dataset, batch_size=VAL_BATCH_SIZE, steps = int(VAL_SIZE/VAL_BATCH_SIZE), verbose=1)
        THRESH=0.5
        y_pred = []
        for li in y_score:
            vec = []
            for ele in li:
                if(ele >= THRESH):
                    vec.append(1)
                else:
                    vec.append(0)
            y_pred.append(vec)
        y_score = np.array(y_pred)
        roc = roc_auc_plot(y_test, y_score)
        if(roc['macro']>0.75):
            model.save('./macro.h5')
        if(roc['micro']>0.75):
            model.save('./micro.h5')
        elif(roc['macro']>0.85):
            print("\n Validation Macro-Avg AUC of 90% has reached!")
            model.save('./best_macro.h5')
            self.model.stop_training = True
        
callback_EarlyStopping = myCallback_EarlyStopping()
log_dir = "./logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)    
model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath='./mobilenetv2-1.h5',
    save_weights_only=False,
    monitor='val_auc',
    mode='max',
    save_best_only=True)
lr_decay_plateau = tf.keras.callbacks.ReduceLROnPlateau(
    monitor="val_loss",
    factor=0.8,
    patience=5,
    verbose=1,
    mode="min",
)
dist_train_dataset = strategy.experimental_distribute_dataset(train_dataset)
dist_val_dataset = strategy.experimental_distribute_dataset(val_dataset)
metric_auc = tf.keras.metrics.AUC(num_thresholds=200, multi_label=True, name='auc')
model.compile(optimizer=tf.keras.optimizers.Adam(0.0001),\
              loss=tf.keras.losses.BinaryCrossentropy(), \
              metrics=['acc',metric_auc])
#%tensorboard --logdir ./logs/fit


In [None]:
model.summary()

In [None]:
history = model.fit(train_dataset, steps_per_epoch = int(TRAIN_SIZE/TRAIN_BATCH_SIZE), \
                    validation_data=val_dataset, validation_steps=int(VAL_SIZE/VAL_BATCH_SIZE),\
                    epochs=100, callbacks=[model_checkpoint_callback, callback_EarlyStopping, lr_decay_plateau])

In [None]:
model.save('./mobilenetv2.h5')

In [None]:
with strategy.scope():
    model = tf.keras.models.load_model('./mobilenetv2-1.h5', compile=True)

In [None]:
dist_val_dataset = strategy.experimental_distribute_dataset(val_dataset)


In [None]:
y_score=model.predict(val_dataset, batch_size=VAL_BATCH_SIZE, steps = int(VAL_SIZE/VAL_BATCH_SIZE), verbose=1)

In [None]:
y_test = []
i = 0
for x, y in val_dataset:
    i+=1
    if(i > VAL_SIZE/VAL_BATCH_SIZE):
        break
    print('\r',i, end='')
    for ele in y:
        y_test.append(list(ele.numpy()))
        

In [None]:
THRESH=0.5
y_pred = []
for li in y_score:
    vec = []
    for ele in li:
        if(ele >= THRESH):
            vec.append(1)
        else:
            vec.append(0)
    y_pred.append(vec)
y_score = np.array(y_pred)
y_test = np.array(y_test)

In [None]:
len(y_score)

In [None]:
roc = roc_auc_plot(y_test, y_score)

In [None]:
type(roc['macro'])

In [None]:
model.save('./mobilenetv2.h5')

In [None]:
def roc_auc_plot(y_test, y_score):
    lw = 1
    # Compute ROC curve and ROC area for each class
    fpr = dict()
    tpr = dict()
    roc_auc = dict()
    n_classes = y_score.shape[1]
    for i in range(n_classes):
        fpr[i], tpr[i], _ = roc_curve(y_test[:, i], y_score[:, i])
        roc_auc[i] = auc(fpr[i], tpr[i])
    # Compute micro-average ROC curve and ROC area
    fpr["micro"], tpr["micro"], _ = roc_curve(y_test.ravel(), y_score.ravel())
    roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])

    # First aggregate all false positive rates
    all_fpr = np.unique(np.concatenate([fpr[i] for i in range(n_classes)]))

    # Then interpolate all ROC curves at this points
    mean_tpr = np.zeros(len(all_fpr))
    for i in range(n_classes):
        mean_tpr += np.interp(all_fpr, fpr[i], tpr[i])

    # Finally average it and compute AUC
    mean_tpr /= n_classes

    fpr["macro"] = all_fpr
    tpr["macro"] = mean_tpr
    roc_auc["macro"] = auc(fpr["macro"], tpr["macro"])

    # Plot all ROC curves
    plt.figure()
    plt.plot(fpr["micro"], tpr["micro"],
             label='micro-average ROC curve (area = {0:0.2f})'
                   ''.format(roc_auc["micro"]),
             color='deeppink', linestyle=':', linewidth=4)

    plt.plot(fpr["macro"], tpr["macro"],
             label='macro-average ROC curve (area = {0:0.2f})'
                   ''.format(roc_auc["macro"]),
             color='navy', linestyle=':', linewidth=4)

    colors = cycle(['aqua', 'darkorange', 'cornflowerblue'])
    for i, color in zip(range(n_classes), colors):
        plt.plot(fpr[i], tpr[i], color=color, lw=lw,
                 label='ROC curve of class {0} (area = {1:0.2f})'
                 ''.format(i, roc_auc[i]))

    plt.plot([0, 1], [0, 1], 'k--', lw=lw)
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('Some extension of Receiver operating characteristic to multi-class')
    plt.legend(loc="lower right")
    plt.show()
    return roc_auc

In [None]:
roc[0]

In [None]:
model.save('./mobilenetv2.h5')