In [None]:
import tensorflow as tf
from tensorflow.keras import layers
import os
import re
import math
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [None]:
GCS_DS_PATH = "../input/ranzcr-clip-catheter-line-classification"

In [None]:
train_df = pd.read_csv(GCS_DS_PATH+"/train.csv")
train_df.index = train_df["StudyInstanceUID"]
del train_df["StudyInstanceUID"]

train_annot_df = pd.read_csv(GCS_DS_PATH+"/train_annotations.csv")
train_annot_df.index = train_annot_df["StudyInstanceUID"]
del train_annot_df["StudyInstanceUID"]

In [None]:
train_df.head()

In [None]:
train_annot_df.head()

In [None]:
classes = list(train_df.columns[:-1])
classes_normal= [name for name in classes[:-1] if name.split(" - ")[1] == "Normal"]
classes_abnormal= [name for name in classes[:-1] if name.split(" - ")[1] == "Abnormal"]
classes_borderline = [name for name in classes[:-1] if name.split(" - ")[1] == "Borderline"]
classes_count = train_df[classes].sum(axis = 0)
num_classes = len(classes_count)


print("Number of Classes: {}".format(num_classes))
classes_count

In [None]:
class_weights = {}
ls = list(classes_count.values)
tot_samples = sum(ls)

for i in range(num_classes):
    class_weights[i] = tot_samples/(num_classes*ls[i])

class_weights

In [None]:
patient_ids = train_df["PatientID"].unique()
patientwise_count = train_df['PatientID'].value_counts()
num_patients = len(patientwise_count)
print("Number of patients: ",num_patients)
patientwise_count

In [None]:
try:
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver() 
    print('Running on TPU ', tpu.master())
except ValueError:
    tpu = None

if tpu:
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.experimental.TPUStrategy(tpu)
else:
    strategy = tf.distribute.get_strategy() 

print("REPLICAS: ", strategy.num_replicas_in_sync)

In [None]:
IMAGE_SIZE = [600,600]
AUTO = tf.data.experimental.AUTOTUNE


TEST_FILENAMES = tf.io.gfile.glob(GCS_DS_PATH + '/test_tfrecords/*.tfrec')

In [None]:
def decode_image(image_data):
    image = tf.image.decode_jpeg(image_data, channels=3)
    image = tf.cast(image, tf.float32) / 255.0 
    image = tf.image.resize(image, [*IMAGE_SIZE])
    return image


def read_labeled_tfrecord(example):
    LABELED_TFREC_FORMAT = {
        "StudyInstanceUID"           : tf.io.FixedLenFeature([], tf.string),
        "image"                      : tf.io.FixedLenFeature([], tf.string),
        "ETT - Abnormal"             : tf.io.FixedLenFeature([], tf.int64), 
        "ETT - Borderline"           : tf.io.FixedLenFeature([], tf.int64), 
        "ETT - Normal"               : tf.io.FixedLenFeature([], tf.int64), 
        "NGT - Abnormal"             : tf.io.FixedLenFeature([], tf.int64), 
        "NGT - Borderline"           : tf.io.FixedLenFeature([], tf.int64), 
        "NGT - Incompletely Imaged"  : tf.io.FixedLenFeature([], tf.int64), 
        "NGT - Normal"               : tf.io.FixedLenFeature([], tf.int64), 
        "CVC - Abnormal"             : tf.io.FixedLenFeature([], tf.int64), 
        "CVC - Borderline"           : tf.io.FixedLenFeature([], tf.int64), 
        "CVC - Normal"               : tf.io.FixedLenFeature([], tf.int64), 
        "Swan Ganz Catheter Present" : tf.io.FixedLenFeature([], tf.int64),
    }
    
    example = tf.io.parse_single_example(example, LABELED_TFREC_FORMAT)
    image = decode_image(example['image'])
    label = [example['ETT - Abnormal'],
                 example['ETT - Borderline'],
                 example['ETT - Normal'],
                 example['NGT - Abnormal'],
                 example['NGT - Borderline'],
                 example['NGT - Incompletely Imaged'],
                 example['NGT - Normal'],
                 example['CVC - Abnormal'],
                 example['CVC - Borderline'],
                 example['CVC - Normal'],
                 example['Swan Ganz Catheter Present']]
    label = [tf.cast(i,tf.float32) for i in label]
    return image, label


def read_unlabeled_tfrecord(example):
    UNLABELED_TFREC_FORMAT = {
        "StudyInstanceUID"           : tf.io.FixedLenFeature([], tf.string),
        "image"                      : tf.io.FixedLenFeature([], tf.string),
    }
    example = tf.io.parse_single_example(example, UNLABELED_TFREC_FORMAT)
    image = decode_image(example['image'])
    idnum = example['StudyInstanceUID']
    return image, idnum

def load_dataset(filenames, labeled=True, ordered=False):
    ignore_order = tf.data.Options()
    if not ordered:
        ignore_order.experimental_deterministic = False 
    dataset = tf.data.TFRecordDataset(filenames, num_parallel_reads=AUTO) 
    dataset = dataset.with_options(ignore_order)
    dataset = dataset.map(read_labeled_tfrecord if labeled else read_unlabeled_tfrecord, num_parallel_calls=AUTO)
    return dataset

In [None]:
def data_augment(image, label):
    image = tf.image.random_flip_left_right(image)
    return image,label


def get_test_dataset(ordered=False):
    dataset = load_dataset(TEST_FILENAMES, labeled=False, ordered=ordered)
    dataset = dataset.map(data_augment, num_parallel_calls=AUTO)
    dataset = dataset.batch(BATCH_SIZE)
    dataset = dataset.prefetch(AUTO)
    return dataset

def count_data_items(filenames):
    n = [int(re.compile(r"-([0-9]*)\.").search(filename).group(1)) for filename in filenames]
    return np.sum(n)


NUM_TEST_IMAGES = count_data_items(TEST_FILENAMES)
print('Dataset: {} unlabeled test images'.format(NUM_TEST_IMAGES))

In [None]:
BATCH_SIZE = 16 * strategy.num_replicas_in_sync

test_ds = get_test_dataset()
print("Test:", test_ds)

In [None]:
!pip install /kaggle/input/kerasapplications -q
!pip install /kaggle/input/efficientnet-keras-source-code/ -q --no-deps
import efficientnet.tfkeras as efn

In [None]:
model = tf.keras.models.load_model("../input/ranzcr-clip-tpu/model.h5")

In [None]:
test_ids=[]
test_pred = []
j=0
for batch in test_ds:
    images,ids_batch = batch
    pred_batch = model.predict(images)
    for i,ids in enumerate(ids_batch):
        j+=1
        if j%500 == 0:
            print(str(j),"Test Images Done")
        test_ids.append(ids)
        test_pred.append(pred_batch[i])

test_ids = [np.array(i).astype("str").tolist() for i in test_ids]
test_df = pd.DataFrame(test_pred,index=test_ids,columns=classes)
test_df.index.name = "StudyInstanceUID"
test_df.to_csv("./submission.csv")