# Reference

* [AW-Madison: EDA & In Depth Mask Exploration](https://www.kaggle.com/code/andradaolteanu/aw-madison-eda-in-depth-mask-exploration)
* [UWMGI Image Segmentation EDA](https://www.kaggle.com/code/tt195361/uwmgi-image-segmentation-eda)
* [UWMGI Image Segmentation Make TFRecords](https://www.kaggle.com/code/tt195361/uwmgi-image-segmentation-make-tfrecords)
* [UWMGI Image Segmentation Training](https://www.kaggle.com/code/tt195361/uwmgi-image-segmentation-training)

# Preparation

In [None]:
%env SM_FRAMEWORK=tf.keras
!pip install ../input/segmentation-models-keras/Keras_Applications-1.0.8-py3-none-any.whl --quiet
!pip install ../input/segmentation-models-keras/image_classifiers-1.0.0-py3-none-any.whl --quiet
!pip install ../input/segmentation-models-keras/efficientnet-1.0.0-py3-none-any.whl --quiet
!pip install ../input/segmentation-models-keras/segmentation_models-1.0.1-py3-none-any.whl --quiet

print("Segmentation Models installed.")

In [None]:
import numpy as np
import pandas as pd
import segmentation_models as sm
import tensorflow as tf
import os
import glob
import matplotlib.pyplot as plt
import cv2

print(tf.__version__)

In [None]:
IMAGE_SIZE = 224
BATCH_SIZE = 32

VID = 'V43'
FOLD_I_LIST = [0]
MODEL_FILE_NAME_LIST = \
    [ "seg_model_{0}_{1}.hdf5".format(VID, i) for i in FOLD_I_LIST ]

MODEL_FILE_NAME_LIST

In [None]:
DATA_SRC = 'uw-madison-gi-tract-image-segmentation'
DATA_DIR = os.path.join('..', 'input', DATA_SRC)
MODEL_SRC = 'uwmgi-image-segmentation-my-data'
MODEL_DIR = os.path.join('..', 'input', MODEL_SRC)

AUTOTUNE = tf.data.experimental.AUTOTUNE

# DataFrame

In [None]:
sample_sub_file_path = os.path.join(DATA_DIR, "sample_submission.csv")
sample_sub_df = pd.read_csv(sample_sub_file_path)

sample_sub_df

In [None]:
if 0 < len(sample_sub_df):
    # Non-empty sample_submission.csv, use it.
    submission_df = sample_sub_df.copy()
    test_dir = os.path.join(DATA_DIR, "test")
else:
    # Empty sample_submission.csv, make dummy from train.csv
    test_dir = os.path.join(DATA_DIR, "train")
    train_csv_path = os.path.join(DATA_DIR, 'train.csv')
    train_df = pd.read_csv(train_csv_path)
    # 198 is large_bowel, 399 is stomach
    # 399 - 198 = 201 is divisible by 3.
    train_test_df = train_df.iloc[198:399] \
        .copy() \
        .fillna('')
    submission_df = train_df.iloc[198:399] \
        .copy() \
        .rename(columns={"segmentation": "predicted"})

submission_df['predicted'] = ''

print("test_dir:", test_dir)
submission_df

In [None]:
test_df = pd.DataFrame()
test_df['id'] = submission_df.loc[::3, 'id']

test_df

In [None]:
test_df[['case_no', 'day_no', 'slice_no']] = \
    test_df['id'] \
        .str \
        .extract(r'case(\d+)_day(\d+)_slice_(\d+)')

test_df

In [None]:
test_df['slice_count'] = \
    test_df \
        .groupby(['case_no', 'day_no']) \
        ['slice_no'] \
        .transform(lambda x: x.iloc[-1])

test_df

In [None]:
file_path_pattern = os.path.join(test_dir, '**', '*.png')
file_paths = glob.glob(file_path_pattern, recursive=True)
file_info_df = pd.DataFrame({"file_path": file_paths})

file_info_df

In [None]:
file_info_df['id'] = file_info_df['file_path'] \
    .str \
    .replace(
        pat=r'^.*/(case\d+)_(day\d+)/scans/(slice_\d+)_.*$',
        repl=r'\1_\2_\3', regex=True)

file_info_df[['height', 'width']] = file_info_df['file_path'] \
    .str \
    .extract(r'/slice_\d+_(\d+)_(\d+)')

file_info_df

In [None]:
test_data_df = pd.merge(
    test_df, file_info_df,
    how='left', on='id')
test_data_df = test_data_df.set_index('id')

test_data_df

# Dataset

In [None]:
# https://www.kaggle.com/code/andradaolteanu/aw-madison-eda-in-depth-mask-exploration
def read_cv2_image(path):
    '''Reads and converts the image.
    path: the full complete path to the .png file'''

    # Read image in a corresponding manner
    # convert int16 -> float32
    image = cv2.imread(path, cv2.IMREAD_UNCHANGED).astype('float32')
    # Scale to [0, 255]
    image = cv2.normalize(image, None, alpha = 0, beta = 255, 
                        norm_type = cv2.NORM_MINMAX, dtype = cv2.CV_32F)
    image = image.astype(np.uint8)
    
    return image

In [None]:
def read_image(file_path_bytes, height, width):
    file_path = file_path_bytes.decode('utf-8')
    image_cv2 = read_cv2_image(file_path)
    image_tf = tf.constant(image_cv2, dtype=tf.uint8)
    image_scaled = tf.cast(image_tf, dtype=tf.float32) / 255.0
    image_scaled = tf.reshape(image_scaled, [width, height, 1])
    resized_image = tf.image.resize(
        image_scaled, [IMAGE_SIZE, IMAGE_SIZE],
        method=tf.image.ResizeMethod.BILINEAR)
    resized_image = tf.reshape(
        resized_image, [IMAGE_SIZE, IMAGE_SIZE, 1])
    return resized_image

def make_test_data(
        file_path, height_str, width_str, slice_no_str, slice_count_str):
    height = tf.strings.to_number(height_str, out_type=tf.int32)
    width = tf.strings.to_number(width_str, out_type=tf.int32)

    image_1ch = tf.numpy_function(
        func=read_image, inp=[file_path, height, width],
        Tout=tf.float32)

    slice_no = tf.strings.to_number(slice_no_str, out_type=tf.float32)
    slice_count = tf.strings.to_number(slice_count_str, out_type=tf.float32)
    slice_info = slice_no / slice_count
    slice_info_ch = slice_info * tf.ones_like(image_1ch)
    
    image_3ch = tf.concat(
        [image_1ch, image_1ch, slice_info_ch], axis=-1)
    return image_3ch

In [None]:
test_ds = tf.data.Dataset.from_tensor_slices((
        test_data_df["file_path"], 
        test_data_df["height"],
        test_data_df["width"],
        test_data_df["slice_no"],
        test_data_df["slice_count"]))
test_ds = test_ds \
    .map(make_test_data, num_parallel_calls=None) \
    .batch(BATCH_SIZE) \
    .prefetch(AUTOTUNE)

test_ds

# Model

In [None]:
dice_loss_fun = sm.losses.DiceLoss()
bce_loss_fun = sm.losses.BinaryCELoss()

def bce_dice_loss(y_true, y_pred):
    dice_loss = dice_loss_fun(y_true, y_pred)
    bce_loss = bce_loss_fun(y_true, y_pred)
    return 0.5 * dice_loss + 0.5 * bce_loss

In [None]:
# https://www.kaggle.com/code/ammarnassanalhajali/uwmgi-unet-keras-train-with-eda
def dice_coef(y_true, y_pred, smooth=1):
    y_true_f = K.flatten(y_true)
    y_pred_f = K.flatten(y_pred)
    intersection = K.sum(y_true_f * y_pred_f)
    coef = (2. * intersection + smooth) \
        / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth)
    return coef

In [None]:
def load_model(model_file_path):
    model = tf.keras.models.load_model(
        model_file_path,
        custom_objects={
            "bce_dice_loss": bce_dice_loss,
            "dice_coef": dice_coef})
    return model

In [None]:
# default distribution strategy in Tensorflow. Works on CPU and single GPU.
strategy = tf.distribute.get_strategy()

print("REPLICAS: ", strategy.num_replicas_in_sync)

In [None]:
def make_model(model_file_name):
    model_file_path = os.path.join(MODEL_DIR, model_file_name)
    with strategy.scope():
        model = load_model(model_file_path)
    return model

In [None]:
model_list = \
    [ make_model(fn) for fn in MODEL_FILE_NAME_LIST ]

# Predict

In [None]:
def resize_image_to(image, height, width):
    resized_image = tf.image.resize(
        image, [width, height],
        method=tf.image.ResizeMethod.BILINEAR)
    return resized_image

In [None]:
def encode_pred(one_pred):
    curr_pred = one_pred.flatten()
    
    prev_pred = np.empty_like(curr_pred)
    prev_pred[1:] = curr_pred[:-1]
    prev_pred[0] = 0
    
    next_pred = np.empty_like(curr_pred)
    next_pred[:-1] = curr_pred[1:]
    next_pred[-1] = 0
    
    pixel_no = np.arange(len(curr_pred))
    start_pixels = pixel_no[(prev_pred == 0) & (curr_pred == 1)]
    end_pixels = pixel_no[(curr_pred == 1) & (next_pred == 0)]
    
    encode_list = []
    for start_pixel, end_pixel in zip(start_pixels, end_pixels):
        encode_list.append(str(start_pixel))
        encode_list.append(str(end_pixel - start_pixel + 1))
    
    encoded_pred = ' '.join(encode_list)
    return encoded_pred

In [None]:
def make_predictions(raw_pred, height, width):
    resized_image = resize_image_to(raw_pred, height, width)
    bin_pred = np.where(resized_image >= 0.5, 1, 0)
    large_bowel_pred = encode_pred(bin_pred[:, :, 0])
    small_bowel_pred = encode_pred(bin_pred[:, :, 1])
    stomach_pred = encode_pred(bin_pred[:, :, 2])
    return large_bowel_pred, small_bowel_pred, stomach_pred

In [None]:
submission_df = submission_df.set_index(['id', 'class'])

submission_df

In [None]:
def make_test_data_df_iter():
    for sample_id, df_row in test_data_df.iterrows():
        height = int(df_row['height'])
        width = int(df_row['width'])
        yield sample_id, height, width

In [None]:
def aug_none(image):
    return image

def aug_horizontal_flip(image):
    aug_image = tf.image.flip_left_right(image)
    return aug_image

In [None]:
test_data_df_iter = make_test_data_df_iter()
aug_fun_list = [aug_none, aug_horizontal_flip]

for test_batch in test_ds:
    print('.', end='', flush=True)
    
    bs = test_batch.shape[0]
    raw_pred_batch = np.zeros(
        (bs, IMAGE_SIZE, IMAGE_SIZE, 3), dtype=np.float32)
    for model in model_list:
        for aug_fun in aug_fun_list:
            aug_test_batch = aug_fun(test_batch)
            aug_pred_batch = model.predict_on_batch(aug_test_batch)
            pred_batch = aug_fun(aug_pred_batch)
            raw_pred_batch += pred_batch
    raw_pred_batch /= len(model_list) * len(aug_fun_list)
    
    for raw_pred in raw_pred_batch:
        sample_id, height, width = next(test_data_df_iter)
        large_bowel_pred, small_bowel_pred, stomach_pred = \
            make_predictions(raw_pred, height, width)
        submission_df.at[(sample_id, 'large_bowel'), 'predicted'] = large_bowel_pred
        submission_df.at[(sample_id, 'small_bowel'), 'predicted'] = small_bowel_pred
        submission_df.at[(sample_id, 'stomach'), 'predicted'] = stomach_pred
print()
        
submission_df

# Submit

In [None]:
submission_df = submission_df.reset_index()

submission_df

In [None]:
submission_df.to_csv('submission.csv', index=False)

! head submission.csv