In [None]:
!pip install /kaggle/input/kerasapplications -q
!pip install /kaggle/input/efficientnet-keras-source-code/ -q --no-deps

import tensorflow as tf
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
import seaborn as sns
import cv2
import re
import tensorflow_addons as tfa
import efficientnet.tfkeras as efn

In [None]:
root_dir = "../input/ranzcr-clip-catheter-line-classification"  
images_train_dir = os.path.join(root_dir, "train")
train_dir = os.path.join(root_dir, 'train.csv')
train_annotations_dir = os.path.join(root_dir, 'train_annotations.csv')

In [None]:
train_df = pd.read_csv(train_dir)
print("there are: " + str(train_df.shape[0]) + " datapoints" )
display(train_df.head(10))

Checking for unique patients

In [None]:
print("There are: " + str(train_df['PatientID'].unique().shape[0]) + " Unique patients")

In [None]:
_ = plt.figure(figsize = (7,7))
sns.barplot(x = train_df.columns[1:-1] ,y = train_df.sum(axis = 0)[1:-1])
plt.xticks(rotation = 60) 
plt.show()

In [None]:
annotations_pd = pd.read_csv(train_annotations_dir)
display(annotations_pd.head(10))

In [None]:
def draw_circle(coordinate, image):
    
    image = cv2.circle(image, coordinate, 10, (255,0,0), 30)
    
    return image

def annotate_image(image, annotations):
    
    annotations = re.sub(r'[\[\],]', '', annotations).split()
    
    for i in range(len(annotations) //2):
        
        coordinates = (int(annotations[i*2]), int(annotations[i * 2 + 1]))
        image = draw_circle(coordinates, image)
    
    return image

In [None]:
for i in range(8):
    
    image_dir = os.path.join(images_train_dir, str(annotations_pd.iloc[i,0]) + ".jpg")
    image = cv2.imread(image_dir, cv2.IMREAD_COLOR)
    _ = plt.figure(figsize =(10,10))
    plt.subplot(1,2,1)
    plt.imshow(image)
    plt.title("Original Image")
    image = annotate_image(image, annotations_pd.iloc[i,2])
    plt.subplot(1,2,2)
    plt.imshow(image)
    plt.title("Annotated Image")
    plt.show()

In [None]:
seed = 456
batch_size = 2
np.random.seed(seed)
tf.random.set_seed(seed)

In [None]:
train_dir_tfr = os.path.join(root_dir, "train_tfrecords")

In [None]:
#functions to read the TFRecords

image_size = 750
autotune = tf.data.experimental.AUTOTUNE

feature_map = {
        'ETT - Abnormal' : tf.io.FixedLenFeature([], tf.int64),
        'ETT - Borderline' : tf.io.FixedLenFeature([], tf.int64),
        'ETT - Normal' : tf.io.FixedLenFeature([], tf.int64),
        "NGT - Abnormal" : tf.io.FixedLenFeature([], tf.int64),
        'NGT - Borderline' : tf.io.FixedLenFeature([], tf.int64),
        'NGT - Incompletely Imaged' : tf.io.FixedLenFeature([], tf.int64),
        'NGT - Normal' : tf.io.FixedLenFeature([], tf.int64),
        'CVC - Abnormal' : tf.io.FixedLenFeature([], tf.int64),
        'CVC - Borderline': tf.io.FixedLenFeature([], tf.int64),
        'CVC - Normal': tf.io.FixedLenFeature([], tf.int64),
        'StudyInstanceUID' : tf.io.FixedLenFeature([], tf.string),
        'Swan Ganz Catheter Present' : tf.io.FixedLenFeature([], tf.int64),
        'image' : tf.io.FixedLenFeature([], tf.string)
        }

def read_tfr(example):
    
    example = tf.io.parse_single_example(example, feature_map)
    
    image = tf.io.decode_jpeg(example['image'])
    
    image = tf.image.grayscale_to_rgb(image)
    
    image = tf.image.resize(image, (image_size,image_size))
    
    if augm:
        
        image = augment(image)
    
    
    image = image / 255
    
    features = tf.stack([
        example['ETT - Abnormal'],
        example['ETT - Borderline'],
        example['ETT - Normal'],
        example["NGT - Abnormal"],
        example['NGT - Borderline'],
        example['NGT - Incompletely Imaged'],
        example['NGT - Normal'],
        example['CVC - Abnormal'],
        example['CVC - Borderline'],
        example['CVC - Normal'],
        example['Swan Ganz Catheter Present']
        ])
    
    
    return image, features


def load_ds(filenames, aug):
    
    tfrecords = tf.data.TFRecordDataset(filenames)
    
    tfrecords = tfrecords.map(read_tfr, num_parallel_calls = autotune)
    
    return tfrecords

def augment(image):
    
    decider = tf.random.uniform(shape = (1,1), minval = 0, maxval = 1)
    
    if decider > 0.5:
        dx_dy = tf.random.uniform(shape = (1,2), minval = -20, maxval = 20)
        image = tf.image.random_flip_left_right(image)
        image = tf.image.random_flip_up_down(image)
        image = tf.image.random_brightness(image, 0.5)
        image = tf.image.random_contrast(image, 0.2, 0.5)

        image = tfa.image.translate(image, dx_dy)

    
    return image

def class_func(images, label):
    return label
    
    
def get_ds(filenames, aug):
    
    global augm
    
    augm = aug

    
    ds = load_ds(filenames,augm)
    ds = ds.batch(batch_size)
    #ds = ds.shuffle(512)
    #ds = ds.cache()
    ds = ds.repeat()
    ds = ds.prefetch(autotune)
    
    return ds
    

In [None]:
#splitting the dataset
validation_split = 0.2
TFR_fnames = tf.io.gfile.glob(train_dir_tfr + '/*.tfrec')
TFR_fnames_train = TFR_fnames[int(len(TFR_fnames) * validation_split):]
TFR_fnames_valid = TFR_fnames[:int(len(TFR_fnames) * validation_split)]

In [None]:
train_ds = get_ds(TFR_fnames_train, True)
valid_ds = get_ds(TFR_fnames_valid, False)

In [None]:
# creating the model
def create_model(base_model):
    
    inputs = tf.keras.Input(shape = (image_size, image_size, 3,))
    med_out = base_model(inputs)
    med_out = tf.keras.layers.GlobalAveragePooling2D()(med_out)
  # med_out = tf.keras.layers.Dense(1024, activation = 'relu')(med_out)
    outputs = tf.keras.layers.Dense(11, activation = 'sigmoid')(med_out)
    
    model = tf.keras.Model(inputs = inputs, outputs = outputs)
    
    model.summary()
    
    optimizer = tf.keras.optimizers.Adam(9e-6)
    model.compile(loss = 'binary_crossentropy', optimizer = optimizer, metrics = [tf.keras.metrics.AUC(multi_label=True)])
    
    return model

EfficientNet = efn.EfficientNetB7(
        include_top = False,
        weights = '../input/efficientnet-b7-no-top-keras/efficientnet-b7_weights_tf_dim_ordering_tf_kernels_autoaugment_notop.h5',
        input_shape = (image_size, image_size,3))


efficient = create_model(EfficientNet)

efficient.load_weights("../input/catheter-models-trained-with-tpu/efficientB7.h5")


In [None]:
train_samples = sum(1 for _ in tf.data.TFRecordDataset(TFR_fnames_train))
valid_samples = sum(1 for _ in tf.data.TFRecordDataset(TFR_fnames_valid))
print("There are: " + str(train_samples) + " train samples and " + str(valid_samples) + " validation samples")

In [None]:
#epochs = 15

#callbacks = [tf.keras.callbacks.EarlyStopping(monitor = 'val_auc',
#                                              mode = 'max',
#                                             patience = 1  ,
#                                             restore_best_weights = True)]

#efficient.fit(train_ds, epochs = epochs,
#             validation_data = valid_ds, steps_per_epoch = train_samples//batch_size,
#             validation_steps = valid_samples // batch_size, callbacks = callbacks)

In [None]:
#callbacks = [tf.keras.callbacks.EarlyStopping(monitor = 'val_auc_1',
                                             # mode = 'max',
                                             #patience = 1  ,
                                             #restore_best_weights = True)]

#resnet50.fit(train_ds, epochs = epochs,
#             validation_data = valid_ds, steps_per_epoch = train_samples//batch_size,
#             validation_steps = valid_samples // batch_size, callbacks = callbacks)

In [None]:
train_ds = None
valid_ds = None
test_dir = "../input/ranzcr-clip-catheter-line-classification/test_tfrecords"
TFR_fnames_test = tf.io.gfile.glob(test_dir + '/*.tfrec')

test_feature_map = {
    "StudyInstanceUID" : tf.io.FixedLenFeature([], tf.string),
    "image" : tf.io.FixedLenFeature([], tf.string)
    }

def read_tfr(example):
    
    example = tf.io.parse_single_example(example, test_feature_map)
    
    image = tf.io.decode_jpeg(example['image'])
    
    image = tf.image.resize(image, (image_size,image_size))
    
    image = tf.image.grayscale_to_rgb(image)
    
    image = image / 255
    
    return image



def load_ds(filenames):
    
    tfrecords = tf.data.TFRecordDataset(filenames)
    
    tfrecords = tfrecords.map(read_tfr, num_parallel_calls = autotune)
    
    return tfrecords
    
    
def get_ds(filenames):
    
    ds = load_ds(filenames)
    ds = ds.batch(4)
    ds = ds.prefetch(autotune)
    
    return ds

def read_ids(example):
    
    example = tf.io.parse_single_example(example, test_feature_map)
    ids = example['StudyInstanceUID']
    
    
    return ids

def load_ds_ids(filenames):
    
    tfrecords = tf.data.TFRecordDataset(filenames)
    
    tfrecords = tfrecords.map(read_ids, num_parallel_calls = autotune)
    
    return tfrecords
    
    
def get_ds_ids(filenames):
    
    ds = load_ds_ids(filenames)
    ds = ds.batch(test_samples)
    ds = ds.prefetch(autotune)
    
    return ds

test_samples = sum(1 for _ in tf.data.TFRecordDataset(TFR_fnames_test))
test_ds = get_ds(TFR_fnames_test)
test_ids = get_ds_ids(TFR_fnames_test)

In [None]:
results1 = efficient.predict(test_ds, batch_size = 4)
#results2 = resnet50.predict(test_ds, batch_size = 4)
#results = (results1 + results2) / 2
results = pd.DataFrame(results1)

In [None]:
#getting the ids of the test_results
ids = next(iter(test_ids)).numpy()
for i in range(ids.shape[0]):
    ex = str(ids[i])
    ex = ex[2:-1]
    ids[i] = ex
ids = pd.Series(ids)

In [None]:
columns = ['StudyInstanceUID', 'ETT - Abnormal', 'ETT - Borderline', 'ETT - Normal',
           "NGT - Abnormal", 'NGT - Borderline','NGT - Incompletely Imaged',
           'NGT - Normal', 'CVC - Abnormal','CVC - Borderline',
           'CVC - Normal','Swan Ganz Catheter Present']

In [None]:
results_df = pd.concat([ids, results], axis = 1)
results_df.columns = columns 
display(results_df.head(10))
results_df.to_csv('submission.csv', index = False)