In [None]:
import os

import numpy as np
import pandas as pd
from kaggle_datasets import KaggleDatasets
from sklearn.model_selection import train_test_split
import tensorflow as tf

In [None]:
def auto_select_accelerator():
    """
    Reference: 
        * https://www.kaggle.com/mgornergoogle/getting-started-with-100-flowers-on-tpu
        * https://www.kaggle.com/xhlulu/ranzcr-efficientnet-tpu-training
    """
    try:
        tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
        tf.config.experimental_connect_to_cluster(tpu)
        tf.tpu.experimental.initialize_tpu_system(tpu)
        strategy = tf.distribute.experimental.TPUStrategy(tpu)
        print("Running on TPU:", tpu.master())
    except ValueError:
        strategy = tf.distribute.get_strategy()
    print(f"Running on {strategy.num_replicas_in_sync} replicas")
    
    return strategy


def build_decoder(with_labels=True, target_size=(256, 256), ext='jpg'):
    def decode(path):
        file_bytes = tf.io.read_file(path)
        if ext == 'png':
            img = tf.image.decode_png(file_bytes, channels=3)
        elif ext in ['jpg', 'jpeg']:
            img = tf.image.decode_jpeg(file_bytes, channels=3)
        else:
            raise ValueError("Image extension not supported")

        img = tf.cast(img, tf.float32) / 255.0
        img = tf.image.resize(img, target_size)

        return img
    
    def decode_with_labels(path, label):
        return decode(path), label
    
    return decode_with_labels if with_labels else decode


def build_augmenter(with_labels=True):
    def augment(img):
        img = tf.image.random_flip_left_right(img)
        img = tf.image.random_flip_up_down(img)
        return img
    
    def augment_with_labels(img, label):
        return augment(img), label
    
    return augment_with_labels if with_labels else augment


def build_dataset(paths, labels=None, bsize=32, cache=True,
                  decode_fn=None, augment_fn=None,
                  augment=True, repeat=True, shuffle=1024, 
                  cache_dir=""):
    if cache_dir != "" and cache is True:
        os.makedirs(cache_dir, exist_ok=True)
    
    if decode_fn is None:
        decode_fn = build_decoder(labels is not None)
    
    if augment_fn is None:
        augment_fn = build_augmenter(labels is not None)
    
    AUTO = tf.data.experimental.AUTOTUNE
    slices = paths if labels is None else (paths, labels)
    
    dset = tf.data.Dataset.from_tensor_slices(slices)
    dset = dset.map(decode_fn, num_parallel_calls=AUTO)
    dset = dset.cache(cache_dir) if cache else dset
    dset = dset.map(augment_fn, num_parallel_calls=AUTO) if augment else dset
    dset = dset.repeat() if repeat else dset
    dset = dset.shuffle(shuffle) if shuffle else dset
    dset = dset.batch(bsize).prefetch(AUTO)
    
    return dset

TRAIN

In [None]:
# COMPETITION_NAME = "ranzcr-clip-catheter-line-classification"
# strategy = auto_select_accelerator()
# BATCH_SIZE = strategy.num_replicas_in_sync * 16
# GCS_DS_PATH = KaggleDatasets().get_gcs_path(COMPETITION_NAME)

In [None]:
# load_dir = f"/kaggle/input/{COMPETITION_NAME}/"
# df = pd.read_csv(load_dir + 'train.csv')

# # paths = load_dir + "train/" + df['StudyInstanceUID'] + '.jpg'
# paths = GCS_DS_PATH + "/train/" + df['StudyInstanceUID'] + '.jpg'

# sub_df = pd.read_csv(load_dir + 'sample_submission.csv')

# # test_paths = load_dir + "test/" + sub_df['StudyInstanceUID'] + '.jpg'
# test_paths = GCS_DS_PATH + "/test/" + sub_df['StudyInstanceUID'] + '.jpg'

# # Get the multi-labels
# label_cols = sub_df.columns[1:]
# labels = df[label_cols].values

# n_labels = labels.shape[1]

In [None]:
# (train_paths, valid_paths, train_labels, valid_labels) = train_test_split(paths, labels, test_size=0.1, random_state=42)

In [None]:
# # Build the tensorflow datasets
# IMSIZES = (224, 240, 260, 300, 380, 456, 528, 600)
# im_size = IMSIZES[7]

# decoder = build_decoder(with_labels=True, target_size=(im_size, im_size))
# test_decoder = build_decoder(with_labels=False, target_size=(im_size, im_size))

# train_dataset = build_dataset(
#     train_paths, train_labels, bsize=BATCH_SIZE, decode_fn=decoder
# )

# valid_dataset = build_dataset(
#     valid_paths, valid_labels, bsize=BATCH_SIZE, decode_fn=decoder,
#     repeat=False, shuffle=False, augment=False
# )

# test_dataset = build_dataset(
#     test_paths, cache=False, bsize=BATCH_SIZE, decode_fn=test_decoder,
#     repeat=False, shuffle=False, augment=False
# )

In [None]:
# from tensorflow.keras.optimizers import RMSprop, Adam, SGD
# from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
# from keras.models import Sequential
# from tensorflow.keras.layers import GlobalAveragePooling2D, Flatten, Dense, Dropout, BatchNormalization

# with strategy.scope():
#     base = tf.keras.applications.EfficientNetB7(weights='imagenet',
#                                             include_top=False, 
#                                             input_shape=(im_size, im_size, 3),
#                                             drop_connect_rate=0.5)

#     model = Sequential()

#     model.add(base)

#     model.add(GlobalAveragePooling2D())
#     model.add(Dense(n_labels, activation = 'sigmoid'))

#     model.compile(loss='binary_crossentropy', optimizer='adam', 
#                   metrics= [tf.keras.metrics.AUC(multi_label=True)])
#     model.summary()

In [None]:
# from tensorflow.keras.optimizers import RMSprop, Adam, SGD
# from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau

# checkpoint = ModelCheckpoint("bestmodel_tpu.h5",
#                                 save_best_only=True,
#                                 monitor='val_auc',
#                                 mode='max',
#                                 verbose=1)

# reduce_lr = ReduceLROnPlateau(monitor='val_auc',
#                               factor=0.2,
#                               patience=3,
#                               min_lr=1e-6,
#                               mode='max',
#                               verbose=1)

In [None]:
# steps_per_epoch = train_paths.shape[0] // BATCH_SIZE
# EPOCHS = 10

In [None]:
# history = model.fit(
#             train_dataset, 
#             epochs=10,
#             verbose=1,
#             callbacks=[checkpoint, reduce_lr],
#             steps_per_epoch=steps_per_epoch,
#             validation_data=valid_dataset)

# model.save('model_tpu.h5')

In [None]:
# COMPETITION_NAME = "ranzcr-clip-catheter-line-classification"
# strategy = auto_select_accelerator()
# BATCH_SIZE = strategy.num_replicas_in_sync * 16

In [None]:
# IMSIZE = (224, 240, 260, 300, 380, 456, 528, 600)

# load_dir = f"/kaggle/input/{COMPETITION_NAME}/"
# sub_df = pd.read_csv(load_dir + 'sample_submission.csv')
# test_paths = load_dir + "test/" + sub_df['StudyInstanceUID'] + '.jpg'

# # Get the multi-labels
# label_cols = sub_df.columns[1:]

# test_decoder = build_decoder(with_labels=False, target_size=(IMSIZE[7], IMSIZE[7]))
# dtest = build_dataset(
#     test_paths, bsize=BATCH_SIZE, repeat=False, 
#     shuffle=False, augment=False, cache=False,
#     decode_fn=test_decoder
# )

In [None]:
# with strategy.scope():
#     model = tf.keras.models.load_model('../input/modeltpu2/bestmodel_tpu (2).h5')

In [None]:
# sub_df[label_cols] = model_p.predict(dtest, verbose=1)
# sub_df.to_csv('submission.csv', index=False)

# sub_df

In [None]:
import cv2

model = tf.keras.models.load_model('../input/modeltpu2/bestmodel_tpu (2).h5')

sub = pd.read_csv('../input/ranzcr-clip-catheter-line-classification/sample_submission.csv')
test_dir = '../input/ranzcr-clip-catheter-line-classification/test/'

BS = 16
IMG_SIZE = 600
SIZE = (IMG_SIZE,IMG_SIZE)

classes = ['ETT - Abnormal',
         'ETT - Borderline',
         'ETT - Normal',
         'NGT - Abnormal',
         'NGT - Borderline',
         'NGT - Incompletely Imaged',
         'NGT - Normal',
         'CVC - Abnormal',
         'CVC - Borderline',
         'CVC - Normal',
         'Swan Ganz Catheter Present']

results = []

for img_name in sub['StudyInstanceUID']:
    img = cv2.imread(test_dir + img_name + '.jpg')
    img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))
    img = np.array(img)
    img = img / 255.0
    img = img.reshape(1, IMG_SIZE, IMG_SIZE, 3)
    prediction = model.predict(img)
    prediction = prediction[0]
    results.append(prediction)
    
pred_df = pd.DataFrame(columns=classes,data=results, index=sub.index)
pred_df = pd.concat([sub['StudyInstanceUID'],pred_df],axis=1)
pred_df.to_csv('submission.csv',index=False)