In [None]:
!pip install efficientnet -q

In [None]:
import os

import efficientnet.tfkeras as efficientnet
import numpy as np
import pandas as pd
from kaggle_datasets import KaggleDatasets
from sklearn.model_selection import train_test_split
import tensorflow as tf


def auto_select_accelerator():
    try:
        tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
        tf.config.experimental_connect_to_cluster(tpu)
        tf.tpu.experimental.initialize_tpu_system(tpu)
        strategy = tf.distribute.experimental.TPUStrategy(tpu)
        print("Running on TPU:", tpu.master())
    except ValueError:
        strategy = tf.distribute.get_strategy()
    print(f"Running on {strategy.num_replicas_in_sync} replicas")
    
    return strategy


def build_decoder(with_labels=True, target_size=(256, 256), ext='jpg'):
    def decode(path):
        file_bytes = tf.io.read_file(path)
        if ext == 'png':
            img = tf.image.decode_png(file_bytes, channels=3)
        elif ext in ['jpg', 'jpeg']:
            img = tf.image.decode_jpeg(file_bytes, channels=3)
        else:
            raise ValueError("Image extension not supported")

        img = tf.cast(img, tf.float32) / 255.0
        img = tf.image.resize(img, target_size)

        return img
    
    def decode_with_labels(path, label):
        return decode(path), label
    
    return decode_with_labels if with_labels else decode


def build_augmenter(with_labels=True):
    def augment(img):
        img = tf.image.random_flip_left_right(img)
        img = tf.image.random_flip_up_down(img)
        img = tf.image.random_brightness(img, 0.025)
        img = tf.image.random_contrast(img, 0.975, 1.025)
        return img
    
    def augment_with_labels(img, label):
        return augment(img), label
    
    return augment_with_labels if with_labels else augment


def build_dataset(
    paths, labels=None, bsize=32, cache=True, decode_fn=None, augment_fn=None,
    augment=True, repeat=True, shuffle=1024, cache_dir=''):
    if cache_dir != '' and cache is True:
        os.makedirs(cache_dir, exist_ok=True)
    
    if decode_fn is None:
        decode_fn = build_decoder(labels is not None)
    
    if augment_fn is None:
        augment_fn = build_augmenter(labels is not None)
    
    AUTO = tf.data.experimental.AUTOTUNE
    slices = paths if labels is None else (paths, labels)
    
    dset = tf.data.Dataset.from_tensor_slices(slices)
    dset = dset.map(decode_fn, num_parallel_calls=AUTO)
    dset = dset.cache(cache_dir) if cache else dset
    dset = dset.map(augment_fn, num_parallel_calls=AUTO) if augment else dset
    dset = dset.repeat() if repeat else dset
    dset = dset.shuffle(shuffle) if shuffle else dset
    dset = dset.batch(bsize).prefetch(AUTO)
    
    return dset

def load_csv(params, file_path, key, sampling_rate=1.0):
    f = pd.read_csv(file_path, index_col=0)

    if sampling_rate < 1.0:
        f = f.sample(frac=sampling_rate)

    f['path'] = '{}/{}/'.format(params['gcs_path'], key) + f.index + '.jpg'
    return f

In [None]:
info = {}
info['competition_name'] = 'ranzcr-clip-catheter-line-classification'
info['gcs_path'] = KaggleDatasets().get_gcs_path(info['competition_name'])
info['input_path'] = '/kaggle/input/{}'.format(info['competition_name'])
info['strategy'] = auto_select_accelerator()
info['batch_size'] = info['strategy'].num_replicas_in_sync * 16
info['image_sizes'] = (224, 240, 260, 300, 380, 456, 528, 616)
info['weight_level'] = 7
info['labels'] = [
    'ETT - Abnormal', 'ETT - Borderline', 'ETT - Normal',
    'NGT - Abnormal', 'NGT - Borderline', 'NGT - Incompletely Imaged', 'NGT - Normal',
    'CVC - Abnormal', 'CVC - Borderline', 'CVC - Normal',
    'Swan Ganz Catheter Present'
]
info['label_size'] = len(info['labels'])
info['epochs'] = 20
info['patience'] = 3
info['min_lr'] = 1e-6
info

In [None]:
%%time

def learn(info):
    f = load_csv(info, '{}/train.csv'.format(info['input_path']), 'train')
    f.sort_values('PatientID', inplace=True)

    tests = load_csv(info, '{}/sample_submission.csv'.format(info['input_path']), 'test')

    tra_paths, val_paths, tra_labels, val_labels = train_test_split(f['path'], f[info['labels']], test_size=0.2, shuffle=True, random_state=info['random_state'])
    test_paths = tests['path']

    image_size = (info['image_sizes'][info['weight_level']], info['image_sizes'][info['weight_level']])
    decoder = build_decoder(with_labels=True, target_size=image_size)
    test_decoder = build_decoder(with_labels=False, target_size=image_size)

    tra_dataset = build_dataset(tra_paths, tra_labels, bsize=info['batch_size'], decode_fn=decoder)
    val_dataset = build_dataset(val_paths, val_labels, bsize=info['batch_size'], decode_fn=decoder, repeat=False, shuffle=False, augment=False)
    test_dataset = build_dataset(test_paths, cache=False, bsize=info['batch_size'], decode_fn=test_decoder, repeat=False, shuffle=False, augment=False)

    input_shape = (info['image_sizes'][info['weight_level']], info['image_sizes'][info['weight_level']], 3)
    with info['strategy'].scope():
        model = tf.keras.Sequential([
            getattr(efficientnet, 'EfficientNetB{}'.format(info['weight_level']))(input_shape=input_shape, weights='imagenet', include_top=False),
            tf.keras.layers.GlobalAveragePooling2D(), tf.keras.layers.Dense(info['label_size'], activation='sigmoid')
        ])
        model.compile(optimizer='adam', loss='binary_crossentropy', metrics=[tf.keras.metrics.AUC(name='auc', multi_label=True)])

    steps_per_epoch = tra_paths.shape[0] // info['batch_size']
    checkpoint = tf.keras.callbacks.ModelCheckpoint('model_616_{}.h5'.format(info['random_state']), save_best_only=True, monitor='val_auc', mode='max')
    lr_reducer = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_auc', patience=info['patience'], min_lr=info['min_lr'], mode='max')

    history = model.fit(tra_dataset, validation_data=val_dataset, epochs=info['epochs'], verbose=2, callbacks=[checkpoint, lr_reducer], steps_per_epoch=steps_per_epoch)


for state in [51]:
    info['random_state'] = state
    learn(info)

In [None]:
ls -l