This notebook is fifth and the last part of [RANZCR 1st Place Solution by TF](https://www.kaggle.com/tt195361/ranzcr-1st-place-solution-by-tf-1-make-masks).
This notebook is based on [RANZCR 1st Place Soluiton Inference (small ver.)](https://www.kaggle.com/haqishen/ranzcr-1st-place-soluiton-inference-small-ver).

The last step is inference. The segmentation and classification model trained in the previous steps is used.

The original notebook uses 5 folds for both segmentation and classification models. This implementation uses 1 fold for the segmentation model and 5 folds for the classification model.

The structure of the inference model is as follows:

* 1024x1024 size images are put into the segmentation model. It outputs 1024x1024 masks.
* Masks from each folds are averaged.  This implementation has only 1 fold, so the averaging has no effect. The generated masks are resized to 512x512.
* 512x512 size images and the resized masks are concatenated to make 5 channel inputs. They are put into the classification model.
* The classification model has 3 outputs, ETT, other, and pred. For the inference model, only the pred output is used.

In [None]:
%env SM_FRAMEWORK=tf.keras
!pip install ../input/segmentation-models-keras/Keras_Applications-1.0.8-py3-none-any.whl --quiet
!pip install ../input/segmentation-models-keras/image_classifiers-1.0.0-py3-none-any.whl --quiet
!pip install ../input/segmentation-models-keras/efficientnet-1.0.0-py3-none-any.whl --quiet
!pip install ../input/segmentation-models-keras/segmentation_models-1.0.1-py3-none-any.whl --quiet

print("Segmentation Models installed.")

In [None]:
DEBUG = False

In [None]:
# libraries
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from tensorflow import keras
import tensorflow.keras.layers as L
import segmentation_models as sm

print(tf.__version__)

In [None]:
data_dir = '../input/ranzcr-clip-catheter-line-classification'
model_dir = '../input/ranzcr-1st-place-solution-by-tf-models'
seg_image_size = 1024
cls_image_size = 512
batch_size = 16 # original is 8

In [None]:
df_sub = pd.read_csv(os.path.join(data_dir, 'sample_submission.csv'))
# df_sub = df_sub.iloc[:358] if df_sub.shape[0] == 3582 else df_sub

In [None]:
# Weight file names for the segmentation and classification models.
# For the segmentation model, use the only one weight for all folds.
seg_model_names = [
    'seg_model_V10_0.hdf5'
]
cls_model_names = [
    'cls_model_V14_0.hdf5',
    'cls_model_V15_1.hdf5',
    'cls_model_V15_2.hdf5',
    'cls_model_V16_3.hdf5',
    'cls_model_V16_4.hdf5'
]

## Dataset

In [None]:
tfrec_path = data_dir + '/test_tfrecords/*.tfrec'
tfrec_file_names = sorted(tf.io.gfile.glob(tfrec_path))
tfrec_file_names = \
    [ tfrec_file_names[0] ] if DEBUG else tfrec_file_names

tfrec_file_names

In [None]:
AUTOTUNE = tf.data.experimental.AUTOTUNE

def decode_image(image_data):
    image = tf.image.decode_jpeg(image_data, channels=3)
    return image

def read_tfrecord(example):
    TFREC_FORMAT = {
        'image': tf.io.FixedLenFeature([], tf.string),
        'StudyInstanceUID': tf.io.FixedLenFeature([], tf.string),
    }
    
    example = tf.io.parse_single_example(example, TFREC_FORMAT)
    image = decode_image(example['image'])
    study_inst_id = example['StudyInstanceUID']
    return image, study_inst_id

def load_dataset(filenames):
    ds = tf.data.TFRecordDataset(filenames, num_parallel_reads=AUTOTUNE)
    ds = ds.map(read_tfrecord, num_parallel_calls=AUTOTUNE)
    return ds

In [None]:
raw_test_ds = load_dataset(tfrec_file_names)

raw_test_ds

In [None]:
study_inst_id_list = [
    study_inst_id.numpy().decode('utf-8') for image, study_inst_id in raw_test_ds
]

print(study_inst_id_list[ :10 ])
print(study_inst_id_list[ -10: ])

In [None]:
orig_df_sub_shape = df_sub.shape[0]
n_study_inst_id = len(study_inst_id_list)
df_sub = df_sub.iloc[ :n_study_inst_id ]

print("original df_sub.shape[0]:", orig_df_sub_shape)
print("updated df_sub.shape[0]: ", df_sub.shape[0])

In [None]:
def drop_study_inst_id(image, study_inst_id):
    return image

def preprocess_image(image):
    # Range 0..1 for segmentation model
    # tf.image.resize() returns float tensor.
    image_seg = tf.image.resize(image, (seg_image_size, seg_image_size))
    image_seg = image_seg / 255.0
    # Range 0..255 for class model.
    image_cls = tf.image.resize(image, (cls_image_size, cls_image_size))
    return ((image_seg, image_cls), )

def make_test_dataset():
    ds = load_dataset(tfrec_file_names)
    ds = ds.map(drop_study_inst_id, num_parallel_calls=AUTOTUNE)
    ds = ds.map(preprocess_image, num_parallel_calls=AUTOTUNE)
    ds = ds.batch(batch_size)
    ds = ds.prefetch(AUTOTUNE)
    return ds

In [None]:
test_ds = make_test_dataset()

test_ds

In [None]:
from pylab import rcParams
rcParams['figure.figsize'] = 20,10

f, axarr = plt.subplots(1,5)
test_ds_iter = iter(test_ds.unbatch())
img512_list = []
for p in range(5):
    items = next(test_ds_iter)
    img1024, img512 = items[0]
    axarr[p].imshow(img1024)
    img512_list.append(img512)
    
f, axarr = plt.subplots(1,5)
for p in range(5):
    axarr[p].imshow(img512_list[p] / 255.0)

## Model

In [None]:
def load_model(weight_file_name):
    weight_file_path = os.path.join(model_dir, weight_file_name)
    model = tf.keras.models.load_model(weight_file_path)
    return model

def make_seg_masks(x):
    fold_seg_masks = tf.stack(x, axis=0)
    # [ fold, batch, height, width, channel ]
    average_seg_masks = \
        tf.math.reduce_mean(fold_seg_masks, axis=0)
    return average_seg_masks

def make_cls_inputs(x):
    cls_images = x[0] # (512, 512, 3), [0..255]
    seg_masks = x[1]  # (1024, 1024, 2), [0..1]
    
    seg_masks = tf.image.resize(
        seg_masks, (cls_image_size, cls_image_size))
    seg_masks = seg_masks * 255.0
    
    cls_inputs = tf.concat([cls_images, seg_masks], axis=-1)
    return cls_inputs

def make_model(cls_model_name):
    seg_images = tf.keras.Input(
        shape=(seg_image_size, seg_image_size, 3),
        name="seg_images")
    seg_outputs = []
    for seg_model_name in seg_model_names:
        seg_model = load_model(seg_model_name)
        seg_output = seg_model(seg_images)
        seg_outputs.append(seg_output)
    seg_masks = L.Lambda(
        make_seg_masks, name="seg_masks")(seg_outputs)
    
    cls_images = tf.keras.Input(
        shape=(cls_image_size, cls_image_size, 3),
        name="cls_images")
    cls_inputs = L.Lambda(
        make_cls_inputs, name="cls_inputs")(
        [cls_images, seg_masks])
    cls_model = load_model(cls_model_name)
    ett, others, pred = cls_model(cls_inputs)

    model = tf.keras.Model(
        inputs=[seg_images, cls_images],
        outputs=pred,
        name="infer_model")
    return model

In [None]:
# default distribution strategy in Tensorflow. Works on CPU and single GPU.
strategy = tf.distribute.get_strategy()

print("REPLICAS: ", strategy.num_replicas_in_sync)

In [None]:
PROBS = []
show_summary = True
for cls_model_name in cls_model_names:
    print("####################")
    print("# {0}".format(cls_model_name))
    with strategy.scope():
        model = make_model(cls_model_name)
    if show_summary:
        model.summary()
        show_summary = False
    
    pred = model.predict(test_ds, verbose=1)
    PROBS.append(pred)
    print()
    
PROBS = np.array(PROBS)
PROBS.shape

## Check Distribution

In [None]:
# Put study instance ID values from TFRecords to submission
df_sub['StudyInstanceUID'] = study_inst_id_list

In [None]:
target_cols = [
    'ETT - Abnormal',
    'ETT - Borderline',
    'ETT - Normal',
    'NGT - Abnormal',
    'NGT - Borderline',
    'NGT - Incompletely Imaged',
    'NGT - Normal',
    'CVC - Abnormal',
    'CVC - Borderline',
    'CVC - Normal',
    'Swan Ganz Catheter Present'
]

In [None]:
# Put mean prediction values to submission
df_sub[target_cols] = PROBS.mean(0)

sns.distplot(df_sub[[
    'CVC - Abnormal',
    'CVC - Borderline',
    'CVC - Normal',
]])
plt.show()

## Rank Prediction & Submit

In [None]:
# Calculate rank for each folds.
df_subs = [df_sub.copy() for _ in range(PROBS.shape[0])]
for i, this_sub in enumerate(df_subs):
    this_sub[target_cols] = PROBS[i]
    this_sub[target_cols] = \
        this_sub[target_cols].rank(pct=True)  # rank

In [None]:
# Calculate mean rank values.
rank_values = \
    [this_sub[target_cols].values for this_sub in df_subs]
df_sub[target_cols] = \
    np.stack(rank_values, 0).mean(0)  # mean

In [None]:
# Submit the result.
df_sub.to_csv('submission.csv', index=False)

!head submission.csv