# Motivation

I noticed a discrepancy between my local validation and the public leaderboard, so I set out on a quest to find out where the problem was. 

It turned out that I had made a problem while upsizing the predicted masks where I switched heights and widths. 

So when I corrected that, I stumbled upon an insight.

### Slices that have different heights and widths had different dice scores before and after upsizing, why?

Well, it had to do something with the resize method I have used, which was padding. Let's take a look.

## Librarires and data

In [None]:
import os
import gc

from itertools import chain
from fastai.vision.all import *

GRADIENT = os.path.exists('train')
KAGGLE = os.path.exists('../input')
model_name = 'baseline-model'
SEED = 42

if KAGGLE:
    data_path = '../input/uw-madison-gi-tract-image-segmentation/'
elif GRADIENT:
    data_path = ''
    
path = Path(data_path+'train')
test_path = Path(data_path+'test')
train = pd.read_csv(data_path+'train.csv', low_memory=False)
fnames = get_image_files(path)

## Some helper functions

In [None]:
# Extract case id from fname
def get_case_id(fname):
    if KAGGLE: i = 5
    elif GRADIENT: i = 2
    return fname.parts[i] + '_' + fname.parts[i+2][:10]

def check_file(file_id, fname):
    case_id, day, _, slice_no = file_id.split('_')
    if case_id == fname.parts[1] and day == fname.parts[2].split('_')[1] and slice_no in fname.parts[-1]:
        return True
    return False

def get_file(file_id):
    return fnames.filter(lambda f: check_file(not_null_train.id[0], f))[0]

# https://www.kaggle.com/code/dschettler8845/uwm-gi-tract-image-segmentation-eda
def get_custom_df(df, fnames, root):
    
    df = df.copy()
    
    # 1. Get Case-ID as a column (str and int)
    df["case_id_str"] = df["id"].apply(lambda x: x.split("_", 2)[0])
    df["case_id"] = df["id"].apply(lambda x: int(x.split("_", 2)[0].replace("case", "")))

    # 2. Get Day as a column
    df["day_num_str"] = df["id"].apply(lambda x: x.split("_", 2)[1])
    df["day_num"] = df["id"].apply(lambda x: int(x.split("_", 2)[1].replace("day", "")))

    # 3. Get Slice Identifier as a column
    df["slice_id"] = df["id"].apply(lambda x: x.split("_", 2)[2])

    # 4. Get full file paths for the representative scans
    df["_partial_fname"] = (root+'/'+ # /kaggle/input/uw-madison-gi-tract-image-segmentation/train/
                          df["case_id_str"]+"/"+ # .../case###/
                          df["case_id_str"]+"_"+df["day_num_str"]+ # .../case###_day##/
                          "/scans/"+df["slice_id"]) # .../slice_####
    
    _tmp_merge_df = pd.DataFrame({"_partial_fname":[str(x).rsplit("_",4)[0] for x in fnames], "fname": fnames})
    df = df.merge(_tmp_merge_df, on="_partial_fname").drop(columns=["_partial_fname"])
    
    # Minor cleanup of our temporary workaround
    del _tmp_merge_df; gc.collect(); gc.collect()
    
    # 5. Get slice dimensions from filepath (int in pixels)
    df["slice_h"] = df["fname"].apply(lambda x: int(str(x)[:-4].rsplit("_",4)[1]))
    df["slice_w"] = df["fname"].apply(lambda x: int(str(x)[:-4].rsplit("_",4)[2]))

    # 6. Pixel spacing from filepath (float in mm)
    df["px_spacing_h"] = df["fname"].apply(lambda x: float(str(x)[:-4].rsplit("_",4)[3]))
    df["px_spacing_w"] = df["fname"].apply(lambda x: float(str(x)[:-4].rsplit("_",4)[4]))

    # 7. Merge 3 Rows Into A Single Row (As This/Segmentation-RLE Is The Only Unique Information Across Those Rows)
    l_bowel_train_df = df[df["class"]=="large_bowel"][["id", "segmentation"]].rename(columns={"segmentation":"lb_seg_rle"})
    s_bowel_train_df = df[df["class"]=="small_bowel"][["id", "segmentation"]].rename(columns={"segmentation":"sb_seg_rle"})
    stomach_train_df = df[df["class"]=="stomach"][["id", "segmentation"]].rename(columns={"segmentation":"st_seg_rle"})
    df = df.merge(l_bowel_train_df, on="id", how="left")
    df = df.merge(s_bowel_train_df, on="id", how="left")
    df = df.merge(stomach_train_df, on="id", how="left")
    df = df.drop_duplicates(subset=["id",]).reset_index(drop=True)
    df["lb_seg_flag"] = df["lb_seg_rle"].apply(lambda x: not pd.isna(x))
    df["sb_seg_flag"] = df["sb_seg_rle"].apply(lambda x: not pd.isna(x))
    df["st_seg_flag"] = df["st_seg_rle"].apply(lambda x: not pd.isna(x))
    df["n_segs"] = df["lb_seg_flag"].astype(int)+df["sb_seg_flag"].astype(int)+df["st_seg_flag"].astype(int)

    # 8. Reorder columns to the a new ordering (drops class and segmentation as no longer necessary)
    df = df[["id", "fname", "n_segs",
             "lb_seg_rle", "lb_seg_flag",
             "sb_seg_rle", "sb_seg_flag", 
             "st_seg_rle", "st_seg_flag",
             "slice_h", "slice_w", "px_spacing_h", 
             "px_spacing_w", "case_id_str", "case_id", 
             "day_num_str", "day_num", "slice_id",]]

    return df

# ref: https://www.kaggle.com/paulorzp/run-length-encode-and-decode
# modified from: https://www.kaggle.com/inversion/run-length-decoding-quick-start
def rle_decode(mask_rle, shape, color=1):
    """ TBD
    
    Args:
        mask_rle (str): run-length as string formated (start length)
        shape (tuple of ints): (height,width) of array to return 
    
    Returns: 
        Mask (np.array)
            - 1 indicating mask
            - 0 indicating background

    """
    # Split the string by space, then convert it into a integer array
    s = np.array(mask_rle.split(), dtype=int)

    # Every even value is the start, every odd value is the "run" length
    starts = s[0::2] - 1
    lengths = s[1::2]
    ends = starts + lengths

    # The image image is actually flattened since RLE is a 1D "run"
    if len(shape)==3:
        h, w, d = shape
        img = np.zeros((h * w, d), dtype=np.float32)
    else:
        h, w = shape
        img = np.zeros((h * w,), dtype=np.float32)

    # The color here is actually just any integer you want!
    for lo, hi in zip(starts, ends):
        img[lo : hi] = color
        
    # Don't forget to change the image back to the original shape
    return img.reshape(shape)

def get_image(row):
    img = np.array(Image.open(row['fname']))
    img = np.interp(img, [np.min(img), np.max(img)], [0,255])
    return img
    # return row['fname']
                   

def get_mask(row):
    mask = np.zeros((row['slice_w'], row['slice_h'], 3))
    if row['lb_seg_flag']:
        mask[..., 0] += rle_decode(row['lb_seg_rle'], shape=(row['slice_w'], row['slice_h']), color=255)
    if row['sb_seg_flag']:
        mask[..., 1] += rle_decode(row['sb_seg_rle'], shape=(row['slice_w'], row['slice_h']), color=255)
    if row['st_seg_flag']:
        mask[..., 2] += rle_decode(row['st_seg_rle'], shape=(row['slice_w'], row['slice_h']), color=255)
        
    return mask.astype(np.uint8)

In [None]:
root = data_path+'test'
test_fnames = get_image_files(test_path)

if not test_fnames:
    test_fnames = fnames
    root = data_path+'train'

test = pd.DataFrame({
    'id': chain.from_iterable([[get_case_id(fname)]*3 for fname in test_fnames]),
    'class': chain.from_iterable([['large_bowel', 'small_bowel', 'stomach'] for _ in test_fnames]),
    'segmentation': chain.from_iterable([[np.nan]*3 for _ in test_fnames]),
})

test = get_custom_df(test, test_fnames, root)
train = get_custom_df(train, fnames, data_path+'train')

## Validation Set

In [None]:
valid_pct = 0.2

np.random.seed(SEED)

cases = train.case_id.unique()
n_cases = len(cases)
random_cases = np.random.choice(cases, int(n_cases*valid_pct), replace=False)

train['is_valid'] = False
train.loc[train.case_id.isin(random_cases), 'is_valid'] = True

days = train.loc[~train['is_valid'], 'day_num'].unique()
n_days = len(days)
random_days = np.random.choice(days, int(n_days*valid_pct), replace=False)

train.loc[train.case_id.isin(random_days), 'is_valid'] = True

## Dataloaders

In [None]:
@ToTensor
def encodes(self, o:PILMask): return o._tensor_cls(image2tensor(o))

@Normalize
def encodes(self, o:TensorMask): return o / 255

@Normalize
def decodes(self, o:TensorMask): 
    f = to_cpu if o.device.type=='cpu' else noop
    return f((o * 255).long())

def get_aug_dls(aug=[], method='squish', sample=False, show=True):
    batch_tfms = [Normalize.from_stats(*imagenet_stats)]
    if aug: batch_tfms = [*aug] + batch_tfms
    
    db = DataBlock((ImageBlock(cls=PILImageBW), MaskBlock),
                   get_x=get_image,
                   get_y=get_mask,
                   splitter = ColSplitter(),
                   item_tfms=[Resize(160, method=method)],
                   batch_tfms=batch_tfms
            )
    
    if sample:
        dev = train.sample(frac=0.2, random_state=SEED)
        bs = 16
    else:
        dev = train
        bs = 16
        
    dls = db.dataloaders(dev, bs=bs, shuffle=True)
    dls.rng.seed(SEED)
    
    if show:
        dls.show_batch(nrows=bs//4, ncols=4, max_n=bs, figsize=(12, 12))
        
    return dls, dev

In [None]:
import matplotlib.patches as mpatches

@typedispatch
def show_batch(x:TensorImage, y:TensorMask, samples, ctxs=None, max_n=6, nrows=None, ncols=2, figsize=None, **kwargs):
    if figsize is None: figsize = (ncols*3, max_n//ncols * 3)
    if ctxs is None: ctxs = get_grid(max_n, nrows=nrows, ncols=ncols, figsize=figsize)
    for i,ctx in enumerate(ctxs): 
        x_i = x[i] / x[i].max()
        show_image(x_i, ctx=ctx, cmap='gray', **kwargs)
        show_image(y[i], ctx=ctx, cmap='Spectral_r', alpha=0.35, **kwargs)
        red_patch = mpatches.Patch(color='red', label='lb')
        green_patch = mpatches.Patch(color='green', label='sb')
        blue_patch = mpatches.Patch(color='blue', label='st')
        ctx.legend(handles=[red_patch, green_patch, blue_patch], fontsize=figsize[0]/2)

## Metrics

In [None]:
from scipy.spatial.distance import directed_hausdorff

def mod_acc(inp, targ):
    targ = targ.squeeze(1)
    mask = targ != 0
    if mask.sum() == 0:
        mask = targ == 0
    return (torch.where(sigmoid(inp) > 0.5, 1, 0)[mask]==targ[mask]).float().mean().item()

def dice_coeff(inp, targ):
    if torch.is_tensor(inp):
        inp = torch.where(sigmoid(inp) > 0.5, 1, 0).cpu().detach().numpy().astype(np.uint8)
    if torch.is_tensor(targ):
        targ = targ.cpu().detach().numpy().astype(np.uint8)
    # mask = targ == 1
    # I = (inp[mask] == targ[mask]).sum((2, 3))
    eps = 1e-5
    I = (inp & targ).sum((2, 3))
    U = inp.sum((2, 3)) + targ.sum((2, 3))
    return ((2*I+eps)/(U+eps)).mean((1, 0))

# def dice_coeff2(inp, targ, thr=0.5, dim=(2,3), epsilon=0.001):
#     targ = targ.to(torch.float32)
#     inp = (inp>thr).to(torch.float32)
#     inter = (targ*inp).sum(dim=dim)
#     den = targ.sum(dim=dim) + inp.sum(dim=dim)
#     dice = ((2*inter+epsilon)/(den+epsilon)).mean(dim=(1,0))
#     return dice

def hd_dist_per_slice(inp, targ):
    inp = torch.where(sigmoid(inp) > 0.5, 1, 0).cpu().detach().numpy()
    targ = targ.cpu().detach().numpy()
    inp = np.argwhere(inp) / np.array(inp.shape)
    targ = np.argwhere(targ) / np.array(targ.shape)
    # if len(targ) == 0:
    #     inp = 1 - inp
    #     targ = 1 - targ
    haussdorf_dist = 1 - directed_hausdorff(inp, targ, SEED)[0]
    return haussdorf_dist if haussdorf_dist > 0 else 0

def hd_dist(inp, targ):
    return np.mean([np.mean([hd_dist_per_slice(inp[i, j], targ[i, j]) for j in range(3)]) for i in range(len(inp))])

def custom_metric(inp, targ):
    hd_score_per_batch = hd_dist(inp, targ)
    dice_score_per_batch = dice_coeff(inp, targ)
        
    return 0.4*dice_score_per_batch + 0.6*hd_score_per_batch

def custom_loss(inp, targ):
    return nn.BCEWithLogitsLoss()(inp, targ.float())

# Resizing with pad

## Create a datalaoder

In [None]:
dls, dev = get_aug_dls(sample=True, method='pad', show=False)

## Train a model 

In [None]:
set_seed(SEED, True)
learn = unet_learner(dls, resnet18, metrics=[mod_acc, dice_coeff, hd_dist, custom_metric], n_out=3, loss_func=custom_loss,
                     self_attention=True, act_cls=Mish, opt_func=ranger).to_fp16()
learn.freeze()
learn.fit_flat_cos(1, slice(1e-3))

## Create a new validation dl without any transformation

In [None]:
raw_dl = dls.valid.new(after_item=[ToTensor], after_batch=[], bs=1)

## Get all predictions

In [None]:
imgs, preds, targs = learn.get_preds(dl=dls.valid, with_input=True)

## Convert predictions to numpy masks

In [None]:
preds_masks = (sigmoid(preds) > 0.5).permute(0, 2, 3, 1).cpu().detach().numpy().astype(np.uint8)

## Upsize predicted masks 

In [None]:
from tqdm import tqdm
import cv2

In [None]:
before_dices = []
after_dices = []
org_imgs = []
inp_masks = []
targ_masks = []
val = dev.query('is_valid')
for pred_mask, pred, targ, raw_b, width, height in tqdm(zip(preds_masks, preds, targs, raw_dl, val['slice_w'], val['slice_h'])):
    upsized_mask = np.moveaxis(cv2.resize(pred_mask, dsize=(height, width), interpolation=cv2.INTER_NEAREST), -1, 0)
    targ_mask = raw_b[1]
    
    before_dices.append(dice_coeff(pred[None, ...], targ[None, ...]))
    after_dices.append(dice_coeff(upsized_mask[None, ...], targ_mask/255))
    org_imgs.append(raw_b[0])
    inp_masks.append(upsized_mask)
    targ_masks.append(targ_mask)

## Compare score before and after resizing

In [None]:
np.mean(before_dices), np.mean(after_dices)

In [None]:
plt.scatter(before_dices, after_dices);

### There you can see a little discrepancy where scores after upsizing tend to decrease. Let's take a look at some examples.

### Find masks with highest discrepancies between after and before scores

In [None]:
diff = np.array(before_dices) - np.array(after_dices)
diff_idx_sorted = np.argsort(diff)[::-1]

In [None]:
def plt_before_after(idx):
    fig, axes = plt.subplots(2, 2, figsize=(10, 10))
    axes = axes.flatten()
    
    print(before_dices[idx], after_dices[idx], val.iloc[idx]['slice_w'], val.iloc[idx]['slice_h'])
    
    # plot image and target before upsize
    decoded_small_img, decoded_small_mask = dls.decode((imgs[idx], targs[idx]))
    show_image(decoded_small_img[0], cmap='gray', ctx=axes[0])
    show_image(decoded_small_mask, cmap='Spectral_r', alpha=0.35, ctx=axes[0], title="Target (resized)")
    
    # plot image and pred before upsize
    show_image(decoded_small_img[0], cmap='gray', ctx=axes[1])
    show_image(preds_masks[idx]*255, cmap='Spectral_r', alpha=0.35, ctx=axes[1], title="Prediction (resized)")
    
    # plot image and target after upsize
    show_image(org_imgs[idx][0], cmap='gray', ctx=axes[2])
    show_image(targ_masks[idx][0], cmap='Spectral_r', alpha=0.35, ctx=axes[2], title="Target (original)")
    
    # plot image and pred after upsize
    show_image(org_imgs[idx][0], cmap='gray', ctx=axes[3])
    show_image(np.moveaxis(inp_masks[idx], 0, -1)*255, cmap='Spectral_r', alpha=0.35, ctx=axes[3], title="Prediction (upsized)")

In [None]:
plt_before_after(diff_idx_sorted[0])

In [None]:
plt_before_after(diff_idx_sorted[1])

In [None]:
plt_before_after(diff_idx_sorted[2])

## Now let's fast forward to what would happend if we use squish

In [None]:
dls, dev = get_aug_dls(sample=True, method='squish', show=False)

set_seed(SEED, True)
learn = unet_learner(dls, resnet18, metrics=[mod_acc, dice_coeff, hd_dist, custom_metric], n_out=3, loss_func=custom_loss,
                     self_attention=True, act_cls=Mish, opt_func=ranger).to_fp16()
learn.freeze()
learn.fit_flat_cos(1, slice(1e-3))

raw_dl = dls.valid.new(after_item=[ToTensor], after_batch=[], bs=1)

imgs, preds, targs = learn.get_preds(dl=dls.valid, with_input=True)

preds_masks = (sigmoid(preds) > 0.5).permute(0, 2, 3, 1).cpu().detach().numpy().astype(np.uint8)

before_dices = []
after_dices = []
org_imgs = []
inp_masks = []
targ_masks = []
val = dev.query('is_valid')
for pred_mask, pred, targ, raw_b, width, height in tqdm(zip(preds_masks, preds, targs, raw_dl, val['slice_w'], val['slice_h'])):
    upsized_mask = np.moveaxis(cv2.resize(pred_mask, dsize=(height, width), interpolation=cv2.INTER_NEAREST), -1, 0)
    targ_mask = raw_b[1]
    
    before_dices.append(dice_coeff(pred[None, ...], targ[None, ...]))
    after_dices.append(dice_coeff(upsized_mask[None, ...], targ_mask/255))
    org_imgs.append(raw_b[0])
    inp_masks.append(upsized_mask)
    targ_masks.append(targ_mask)

In [None]:
np.mean(before_dices), np.mean(after_dices)

In [None]:
plt.scatter(before_dices, after_dices);

#### And when we look at the predictions with the highest difference, we can see that they aren't as bad as before.

In [None]:
plt_before_after(diff_idx_sorted[0])

In [None]:
plt_before_after(diff_idx_sorted[1])

In [None]:
plt_before_after(diff_idx_sorted[2])

### Thanks for reading.