In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import os
from tensorflow.keras.layers.experimental import preprocessing
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import tensorflow as tf
from tensorflow.keras import layers
from kaggle_datasets import KaggleDatasets
from sklearn.model_selection import train_test_split
from sklearn.utils import class_weight
from keras.layers import Dense, Flatten, Activation, Conv2D, MaxPooling2D, Dropout, Conv2D,MaxPooling2D,GlobalAveragePooling2D,BatchNormalization
from tensorflow.keras import Model
from keras.applications import ResNet50 
from tensorflow.keras.applications import ResNet152
from keras.applications.inception_resnet_v2 import InceptionResNetV2
from keras.applications.inception_v3 import InceptionV3
from keras.models import Sequential
from keras import optimizers
from keras.optimizers import Adam,RMSprop
from keras.callbacks import EarlyStopping
from keras.callbacks import ReduceLROnPlateau
from keras.callbacks import ModelCheckpoint
import tensorflow_hub as hub

In [None]:
print("update TPU server tensorflow version...")

!pip install cloud-tpu-client
import tensorflow as tf 
from cloud_tpu_client import Client
print(tf.__version__)
Client().configure_tpu_version(tf.__version__, restart_type='ifNeeded')

In [None]:
# Initializing Parameters.
img_size = 224


In [None]:
def auto_select_accelerator():
    try:    
        # TPU detection. No parameters necessary if TPU_NAME environment variable is
        # set: this is always the case on Kaggle.
        tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
        print('Running on TPU ', tpu.master())
    except ValueError:
        tpu = None

    if tpu:
        tf.config.experimental_connect_to_cluster(tpu)
        tf.tpu.experimental.initialize_tpu_system(tpu)
        strategy = tf.distribute.experimental.TPUStrategy(tpu)
    else:
        # Default distribution strategy in Tensorflow. Works on CPU and single GPU.
        strategy = tf.distribute.get_strategy()

    print("REPLICAS: ", strategy.num_replicas_in_sync)
    def seed_everything(seed=0):
        np.random.seed(seed)
        tf.random.set_seed(seed)
        os.environ['PYTHONHASHSEED'] = str(seed)
        os.environ['TF_DETERMINISTIC_OPS'] = '1'

    seed = 1024
    seed_everything(seed)
    
    return strategy


def build_decoder(with_labels=True, target_size=(224, 224), ext='jpg'):
    def decode(path):
        file_bytes = tf.io.read_file(path)
        if ext == 'png':
            img = tf.image.decode_png(file_bytes, channels=3)
        elif ext in ['jpg', 'jpeg']:
            img = tf.image.decode_jpeg(file_bytes, channels=3)
        else:
            raise ValueError("Image extension not supported")

        img = tf.cast(img, tf.float32) / 255.0
        img = tf.image.resize(img, target_size)

        return img
    
    def decode_with_labels(path, label):
        return decode(path), label
    
    return decode_with_labels if with_labels else decode


def build_augmenter(with_labels=True):
    def augment(img):
        img = tf.image.random_flip_left_right(img)
        img = tf.image.random_flip_up_down(img)
        img = tf.image.random_hue(img, 0.01)
        img = tf.image.random_saturation(img, 0.70, 1.30)
        img = tf.image.random_contrast(img, 0.80, 1.20)
        img = tf.image.random_brightness(img, 0.10)
        return img
    
    def augment_with_labels(img, label):
        return augment(img), label
    
    return augment_with_labels if with_labels else augment


def build_dataset(paths, labels=None, bsize=32, cache=True,
                  decode_fn=None, augment_fn=None,
                  augment=True, repeat=True, shuffle=2048, 
                  cache_dir=""):
    if cache_dir != "" and cache is True:
        os.makedirs(cache_dir, exist_ok=True)
    
    if decode_fn is None:
        decode_fn = build_decoder(labels is not None)
    
    if augment_fn is None:
        augment_fn = build_augmenter(labels is not None)
    
    AUTO = tf.data.experimental.AUTOTUNE
    slices = paths if labels is None else (paths, labels)
    
    dset = tf.data.Dataset.from_tensor_slices(slices)
    dset = dset.map(decode_fn, num_parallel_calls=AUTO)
    dset = dset.cache(cache_dir) if cache else dset
    dset = dset.map(augment_fn, num_parallel_calls=AUTO) if augment else dset
    dset = dset.repeat() if repeat else dset
    dset = dset.shuffle(shuffle) if shuffle else dset
    dset = dset.batch(bsize).prefetch(AUTO)
    
    return dset

In [None]:
COMPETITION_NAME = "ranzcr-clip-catheter-line-classification"
strategy = auto_select_accelerator()
BATCH_SIZE = strategy.num_replicas_in_sync * 4
GCS_DS_PATH = KaggleDatasets().get_gcs_path(COMPETITION_NAME)
print('batch size', BATCH_SIZE)

# **Create Dataset.**

In [None]:
path = '/kaggle/input/ranzcr-clip-catheter-line-classification/'
train_df = pd.read_csv(path + 'train.csv')
train_images = GCS_DS_PATH + "/train/" + train_df['StudyInstanceUID'] + '.jpg'

sample_submissions_df = pd.read_csv(path + 'sample_submission.csv')
test_images = GCS_DS_PATH + "/test/" + sample_submissions_df['StudyInstanceUID'] + '.jpg'



# Get the multi-labels.
label_columns = sample_submissions_df.columns[1:]
labels = train_df[label_columns].values

# **Split Training Data.**

In [None]:
# Train Test Split.
train_img, valid_img, train_labels, valid_labels = train_test_split(train_images, 
                                                                    labels, 
                                                                    test_size=0.10, 
                                                                    random_state=42,
                                                                    shuffle=True
                                                                   )

# **Build TensorFlow Dataset.**

In [None]:
# Build the Tensorflow Train and Validation datasets.

decoder = build_decoder(with_labels=True, 
                        target_size=(img_size, img_size)
                       )

train_data = build_dataset(train_img,
                           train_labels, 
                           bsize=BATCH_SIZE, 
                           decode_fn=decoder 
                          )

valid_data = build_dataset(valid_img, 
                           valid_labels, 
                           bsize=BATCH_SIZE, 
                           repeat=False, 
                           shuffle=False, 
                           augment=False, 
                           decode_fn=decoder
                          )

# **Visualizing Data.**

In [None]:
# Visualize training data with augmentation.
import matplotlib.pyplot as plt

data, _ = train_data.take(2)
images = data[0].numpy()

fig, axes = plt.subplots(4, 4, figsize=(12,12))
axes = axes.flatten()
for img, ax in zip(images, axes):
    ax.imshow(img)
    ax.axis('off')
plt.tight_layout()
plt.show()

# **Building Model :Transfer learning with BiT_m_r_152x4.**

In [None]:
MODELPATH = KaggleDatasets().get_gcs_path('big-transfer-models-without-top')
# module = hub.KerasLayer(f'{MODELPATH}/bit_m-r101x1_1/')
module = hub.KerasLayer(f'{MODELPATH}/bit_m-r101x3_1/')
# module = hub.KerasLayer(f'{MODELPATH}/bit_m-r152x4_1/')
# module = hub.KerasLayer(f'{MODELPATH}/bit_m-r50x1_1/')
# module = hub.KerasLayer(f'{MODELPATH}/bit_m-r50x3_1/')

In [None]:
with strategy.scope():
    inputs = tf.keras.layers.Input(shape=(224,224,3))
    
    MODELPATH = KaggleDatasets().get_gcs_path('big-transfer-models-without-top')
    module = hub.KerasLayer(f'{MODELPATH}/bit_m-r101x3_1/')
    
    back_bone = module
    back_bone.trainable = True
    logits = back_bone(inputs)
    outputs = tf.keras.layers.Dense(11, activation='sigmoid', dtype='float32')(logits)
    model = tf.keras.Model(inputs=inputs, outputs=outputs, name='Dr_Kudzayi_bit_m-r101x3_1_ranzcr_clip_catheter_possition_model')

    model.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=0.03, momentum=0.9),
                  loss='binary_crossentropy',
                  metrics=[tf.keras.metrics.AUC()]
                 )
    
    model.summary()

In [None]:
STEPS_PER_EPOCH = train_images.shape[0] // BATCH_SIZE

In [None]:
# Save best Model weights.
check_point = ModelCheckpoint('BiT_m_r101x3_1_RANZCR_Model_Best_Weights_TPU.h5',
                              monitor = 'val_loss',
                              save_best_only = True, 
                              mode = 'min',
                              verbose = 1
                             )

# Reduce learning rate when a metric has stopped improving.
reduce_learning_rate = tf.keras.callbacks.ReduceLROnPlateau(monitor="val_auc", 
                                                            patience=2, 
                                                            factor=0.1,
                                                            min_delta = 1e-3, 
                                                            min_lr=1e-7, 
                                                            mode='max',
                                                            verbose = 1
                                                           )

early_stop = EarlyStopping(monitor = 'val_auc', 
                           min_delta = 1e-3, 
                           patience = 6, 
                           mode = 'max', 
                           restore_best_weights = True, 
                           verbose = 1)  

# lrschedule = tf.keras.callbacks.LearningRateScheduler(decay)
    

callbacks_list = [check_point,reduce_learning_rate]

initial_epochs = 40

# Train Model.
history = model.fit(train_data, 
                    validation_data=valid_data,
                    epochs= initial_epochs,
                    steps_per_epoch=STEPS_PER_EPOCH ,                  
                    callbacks=callbacks_list
                   )

# **Learning Curves.**

In [None]:
# Plot Training Loss vs Validation Loss and Training AUC vs Validation AUC.
def plot_history(history):
    plt.figure(figsize=(18,7))
    plt.subplot(1,2,1)
    plt.plot(history.history['loss'], label = 'Training Loss')
    plt.plot(history.history['val_loss'], label = 'Validation Loss')
    plt.grid(False)
    plt.xlabel('Epochs')
    plt.ylabel('Loss Magnitude')
    plt.title('Training Loss vs Training Loss')
    plt.legend()

    plt.subplot(1,2,2)
    plt.plot(history.history['auc'], label = 'Training AUC')
    plt.plot(history.history['val_auc'], label = 'Validation AUC')
    plt.grid(False)
    plt.xlabel('Epochs')
    plt.ylabel('Loss Magnitude')
    plt.title('Training AUC vs Validation AUC')
    plt.legend(loc='lower right')
    plt.show()
    
plot_history(history)