In [None]:
!pip install ../input/pycocotools/pycocotools-2.0-cp37-cp37m-linux_x86_64.whl
!pip install ../input/hpapytorchzoozip/pytorch_zoo-master
!pip install ../input/hpacellsegmentatormaster/HPA-Cell-Segmentation-master

In [None]:
import base64
import gc
import os
import pickle
import random
import sys
import typing as t
import zlib
from itertools import groupby
from multiprocessing import Pool
from operator import itemgetter
from pathlib import Path

from IPython.display import display
import albumentations as A
import cv2
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
from albumentations.pytorch import ToTensorV2
from pycocotools import _mask as coco_mask
from pycocotools import mask as mutils
from torch.utils.data import DataLoader, Dataset
from tqdm import tqdm

random.seed(0)

# =============================================================================
# setting
# =============================================================================
"""
Precomputed Public + Dummy Private: COMPUTE_PUBLIC=False, COMPUTE_PRIVATE=False
Precomputed Public + Private      : COMPUTE_PUBLIC=False, COMPUTE_PRIVATE=True
Public + Private                  : COMPUTE_PUBLIC=True,  COMPUTE_PRIVATE=True
"""

LOCAL = False
COMPUTE_PUBLIC = False
COMPUTE_PRIVATE = True

# ==========================
# 1 st stage
# ==========================
EXP_NAME_1ST = ["exp049", "exp050"]
MODEL_NAMES_1ST = [
    "model_best_0.pth", # "model_tmp_0.pth", 
    "model_best_1.pth", # "model_tmp_1.pth", 
    "model_best_2.pth", # "model_tmp_2.pth",
    "model_best_3.pth", # "model_tmp_3.pth"
              ]
TEST_LOCAL_COMPUTED_1ST = [
    "pred_0.csv", 
    "pred_1.csv", 
    "pred_2.csv", 
    "pred_3.csv"
]


# ==========================
# 2nd stage
# ==========================
BACKBONE_NAME = "seresnet152d"

EXP_NAME = ["exp068", "exp071", "exp072", "exp073"]
MODEL_NAMES = [
    # index 0, exp068
    [
        "model_best_0.pth", "model_tmp_0.pth", 
        "model_best_1.pth", "model_tmp_1.pth", 
        "model_best_2.pth", "model_tmp_2.pth",
        "model_best_3.pth", "model_tmp_3.pth"

    ],
    # index 1, exp071
    [
        "model_0_25.pth", "model_0_21.pth", 
        "model_1_25.pth", "model_1_21.pth", 
    ],
    # index 2, exp072
    [
        "model_0_25.pth", "model_0_21.pth", 
#         "model_1_25.pth", "model_1_21.pth", 
    ],
    # index 3, exp073
    [
        "model_0_25.pth", "model_0_21.pth", 
#         "model_1_25.pth", "model_1_21.pth", 
    ],
]
TEST_LOCAL_COMPUTED = [
    # index 0, exp068
    [
        "pred_0.csv", 
        "pred_1.csv", 
        "pred_2.csv", 
        "pred_3.csv"
    ],
    # index 1, exp071
    [
        "pred_0.csv", 
        "pred_1.csv", 
#         "pred_2.csv", 
#         "pred_3.csv"
    ],
    # index 2, exp072
    [
        "pred_0.csv", 
#         "pred_1.csv", 
#         "pred_2.csv", 
#         "pred_3.csv"
    ],
    # index 3, exp073
    [
        "pred_0.csv", 
#         "pred_1.csv", 
#         "pred_2.csv", 
#         "pred_3.csv"
    ],
]


# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# image level
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
MODEL_NAMES_IMAGE = [
    "model_best_0.pth", "model_tmp_0.pth", 
    "model_best_1.pth", "model_tmp_1.pth", 
    "model_best_2.pth", "model_tmp_2.pth",
    "model_best_3.pth", "model_tmp_3.pth",
    "model_best_4.pth", "model_tmp_4.pth",
    "model_best_5.pth", "model_tmp_5.pth",
    "model_best_6.pth", "model_tmp_6.pth",
    "model_best_7.pth", "model_tmp_7.pth",
              ]

MODEL_PATHS_IMAGE = [f"../input/hpa-image-level-weight/exp102/{p}" for p in MODEL_NAMES_IMAGE]

TEST_LOCAL_COMPUTED_IMAGE = [
    "pred_0.csv", 
    "pred_1.csv", 
    "pred_2.csv", 
    "pred_3.csv",
    "pred_4.csv",
    "pred_5.csv",
    "pred_6.csv",
    "pred_7.csv",
]

TEST_LOCAL_COMPUTED_PATHS_IMAGE = [f"../input/hpa-image-level-weight/exp102/{p}" for p in TEST_LOCAL_COMPUTED_IMAGE]



COLS_TARGET = [f"label_{i}" for i in range(19)]

BATCH_SIZE = 32
IMAGE_SIZE = 512

MARGIN = 100
W_MASK = True
IN_CHANS = 4

KEEP_CELL_AREA_MIN = 0.005
KEEP_NUC_AREA_MIN = 0.001
KEEP_EDGE_CELL_AREA_MIN = 0.01
NUC_AREA_MIN_0to5 = 0.12

WEIGHT_CELL_LEVEL_VS_IMAGE_MEAN_VS_IMAGE_PRED = [0.6, 0., 0.4]

RATE_OF_WEIGHT_1ST_2ND = [0.2, 0.8]

GPUS = torch.cuda.device_count()
GPU = 0

ROOT = Path.cwd().parent
if LOCAL:
    INPUT = ROOT / "input"
    MODEL_PATHS = [ROOT / "output" / EXP_NAME / p for p in MODEL_NAMES]
    TEST_LOCAL_COMPUTED_PATH = ROOT / "output" / EXP_NAME / TEST_LOCAL_COMPUTED
    TEST_IMG_DIR = ROOT / "data" / "test_rgby_images"
    MASK_DIR = ROOT / "data" / "mask"

    NUC_MODEL = MASK_DIR / "dpn_unet_nuclei_v1.pth"
    CELL_MODEL = MASK_DIR / "dpn_unet_cell_3ch_v1.pth"

    MAX_THRE = 40

    import timm
else:
    INPUT = ROOT / "input" / "hpa-single-cell-image-classification"
    LIB_DIR = ROOT / "input" / "hpa2021-libs"
    
    # ============================
    # 1 st stage
    # ============================
    MODEL_PATHS = []
    TEST_LOCAL_COMPUTED_PATHS_1ST = []
    for e_name in EXP_NAME_1ST:
        MODEL_PATHS += [LIB_DIR / e_name / p for p in MODEL_NAMES_1ST]
        TEST_LOCAL_COMPUTED_PATHS_1ST += [LIB_DIR / e_name / p for p in TEST_LOCAL_COMPUTED_1ST]
        
    LEN_1ST = len(MODEL_PATHS)
        
    # ============================
    # 2nd stage
    # ============================
    TEST_LOCAL_COMPUTED_PATHS = []
    for i, e_name in enumerate(EXP_NAME):
        MODEL_PATHS += [LIB_DIR / e_name / p for p in MODEL_NAMES[i]]
        TEST_LOCAL_COMPUTED_PATHS += [LIB_DIR / e_name / p for p in TEST_LOCAL_COMPUTED[i]]
        
    # ===========================
    # Weight 1st vs 2nd
    # ===========================
    WEIGHT_1ST_2ND = np.ones(len(MODEL_PATHS)).reshape(-1 , 1, 1)
    WEIGHT_1ST_2ND[:LEN_1ST] = len(MODEL_PATHS) * (1 / LEN_1ST) * RATE_OF_WEIGHT_1ST_2ND[0]
    WEIGHT_1ST_2ND[LEN_1ST:] = len(MODEL_PATHS) * (1 / (len(MODEL_PATHS) - LEN_1ST)) * RATE_OF_WEIGHT_1ST_2ND[1]


    OUTPUT = ROOT / "temp"
    MASK_DIR = OUTPUT / "mask"
    MASK_DIR.mkdir(exist_ok=True, parents=True)
    NUCEIL_DIR = MASK_DIR / "test" / "nuclei"
    NUCEIL_DIR.mkdir(exist_ok=True, parents=True)
    CELL_DIR = MASK_DIR / "test" / "cell"
    CELL_DIR.mkdir(exist_ok=True, parents=True)

    NUC_MODEL = (
        ROOT / "input" / "hpacellsegmentatormodelweights" / "dpn_unet_nuclei_v1.pth"
    )
    CELL_MODEL = (
        ROOT / "input" / "hpacellsegmentatormodelweights" / "dpn_unet_cell_3ch_v1.pth"
    )

    MAX_THRE = 2

    sys.path.append(str(ROOT / "input" / "hpa2021-libs"))
    import timm
    from tqdm.notebook import tqdm


sample_submission = pd.read_csv(INPUT / "sample_submission.csv")


print("NUC_MODEL:", NUC_MODEL.exists())
print("CELL_MODEL:", CELL_MODEL.exists())
print("MODEL_PATHS:", [p.exists() for p in MODEL_PATHS])
print("TEST_LOCAL_COMPUTED_PATHS:", [p.exists() for p in TEST_LOCAL_COMPUTED_PATHS])


In [None]:
# =============================================================================
# def
# =============================================================================
def decode_binary_mask(decoded_base64_str, width, height):
    """Converts a OID challenge encoding ascii text into binary mask."""

    binary_str = base64.b64decode(decoded_base64_str)
    rle_encoded_mask = zlib.decompress(binary_str)
    # print(rle_encoded_mask)
    decoding_dict = {
        "size": [height, width],  # [im_height, im_width],
        "counts": rle_encoded_mask,
    }
    mask_tensor = mutils.decode(decoding_dict).astype(bool)
    return mask_tensor


def coco_rle_encode(mask):
    rle = {"counts": [], "size": list(mask.shape)}
    counts = rle.get("counts")
    for i, (value, elements) in enumerate(groupby(mask.ravel(order="F"))):
        if i == 0 and value == 1:
            counts.append(0)
        counts.append(len(list(elements)))
    return rle


def read_img(image_id, color, train_or_test="train"):
    filename = f"{INPUT}/{train_or_test}/{image_id}_{color}.png"
    assert os.path.exists(filename), f"not found {filename}"
    img = cv2.imread(filename, cv2.IMREAD_UNCHANGED)
    if img.dtype == "uint16":
        img = (img / 256).astype("uint8")
    return img


def load_RGB_image(image_id, train_or_test="test"):
    red = read_img(image_id, "red", train_or_test)
    green = read_img(image_id, "green", train_or_test)
    blue = read_img(image_id, "blue", train_or_test)
    # using rgb only here
    # yellow = read_img(image_id, "yellow", train_or_test, image_size)
    stacked_images = np.transpose(np.array([red, green, blue]), (1, 2, 0))
    return stacked_images


def load_RGBY_image(image_id, train_or_test="test"):
    red = read_img(image_id, "red", train_or_test)
    green = read_img(image_id, "green", train_or_test)
    blue = read_img(image_id, "blue", train_or_test)
    # using rgb only here
    yellow = read_img(image_id, "yellow", train_or_test)
    stacked_images = np.transpose(np.array([red, green, blue, yellow]), (1, 2, 0))
    return stacked_images


def print_masked_img(image_id, mask):
    img = load_RGB_image(image_id, "test")

    plt.figure(figsize=(15, 15))
    plt.subplot(1, 3, 1)
    plt.imshow(img)
    plt.title("Image")
    plt.axis("off")

    plt.subplot(1, 3, 2)
    plt.imshow(mask)
    plt.title("Mask")
    plt.axis("off")

    plt.subplot(1, 3, 3)
    plt.imshow(img)
    plt.imshow(mask, alpha=0.6)
    plt.title("Image + Mask")
    plt.axis("off")
    plt.show()


def split_list(l, n):
    for idx in range(0, len(l), n):
        yield l[idx : idx + n]


# =============================================================================
# Transforms
# =============================================================================
def get_transforms():
    return A.Compose(
        [
            A.Resize(IMAGE_SIZE, IMAGE_SIZE),
            A.Normalize(
                mean=[0.485, 0.456, 0.406, 0.456], std=[0.229, 0.224, 0.225, 0.225],
            ),
            ToTensorV2(),
        ]
    )


# =============================================================================
# Dataset
# =============================================================================
def load_bmask(cell_mask_dir, image_id, cell_id):
    mask = np.load(f"{cell_mask_dir}/{image_id}.npz")["arr_0"]
    bmask = mask == cell_id
    return bmask * 1


class MyDataset(Dataset):
    def __init__(self, df, mode, w_mask=False):
        self.df = df.reset_index(drop=True)
        self.mode = mode
        self.w_mask = w_mask
        self.transform = get_transforms()

        if self.mode in ["train", "valid"]:
            self.targets = self.df[COLS_TARGET].values
            self.cell_mask_dir = MASK_DIR / "train" / "cell"
        else:
            self.cell_mask_dir = MASK_DIR / "test" / "cell"

    def crop(self, image, idx):
        y0, x0, y1, x1 = self.df.loc[idx, ["y0", "x0", "y1", "x1"]].values.astype(int)

        y0 = max(0, y0 - MARGIN)
        x0 = max(0, x0 - MARGIN)
        y1 = min(image.shape[0], y1 + MARGIN)
        x1 = min(image.shape[1], x1 + MARGIN)

        image = image[y0:y1, x0:x1]
        return image

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        image_id = self.df.loc[idx, "image_id"]

        image = load_RGBY_image(image_id, "test")
        image = self.crop(image, idx)

        if self.w_mask:
            cell_id = self.df.loc[idx, "cell_id"]
            bmask = load_bmask(self.cell_mask_dir, image_id, cell_id)
            bmask = self.crop(bmask, idx)

            image = image * np.stack([bmask] * IN_CHANS, 2)

        else:
            pass

        augmented = self.transform(image=image.astype("uint8"))
        image = augmented["image"]

        if self.mode in ["train", "valid"]:
            targets = self.df.loc[idx, COLS_TARGET].values
            return image, torch.FloatTensor(targets.astype("float32"))
        else:
            return image


# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# image level
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

def get_transforms_image():
    return A.Compose(
        [
            A.Resize(768, 768),
            A.Normalize(
                mean=[0.485, 0.456, 0.406, 0.456], std=[0.229, 0.224, 0.225, 0.225],
            ),
            ToTensorV2(),
        ]
    )


class ImageDataset(Dataset):
    def __init__(self, df):
        self.df = df.reset_index(drop=True)
        self.transform = get_transforms_image()

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        image_id = self.df.loc[idx, "image_id"]

        image = load_RGBY_image(image_id, "test")

        augmented = self.transform(image=image.astype("uint8"))
        image = augmented["image"]

        return image

# =============================================================================
# Network
# =============================================================================
def get_model(model_path):
    model = timm.create_model(
        BACKBONE_NAME, pretrained=False, in_chans=IN_CHANS, num_classes=19
    )
    load_model(model_path, model, GPU)
    return model.cuda(GPU)


def load_model(model_path, model, rank):
    # print(f"loading... {model_path}")
    model.load_state_dict(torch.load(model_path, map_location=torch.device(rank)))
    return


def predict(models, test_loader, rank):
    [model.eval() for model in models]
    pred_list1 = []
    with torch.no_grad():
        for images in tqdm(test_loader):
            images = images.cuda(rank)
            pred_list2 = []
            for model in models:
                preds = model(images).cpu().sigmoid()

                pred_list2.append(preds.numpy())

            pred_list1.append(np.stack(pred_list2, 0).mean(0))

        preds = np.concatenate(pred_list1)
    # torch.cuda.empty_cache()
    return preds


def predict_weighted(models, test_loader, rank, weights:np.ndarray=None):
    [model.eval() for model in models]
    if weights is None:
        weights = np.ones(len(models)).reshape(-1 , 1, 1)
    pred_list1 = []
    with torch.no_grad():
        for images in tqdm(test_loader):
            images = images.cuda(rank)
            pred_list2 = []
            for i, model in enumerate(models):
                preds = model(images).cpu().sigmoid().numpy()
                if i < LEN_1ST:
                    preds[:, 11] = 0
                pred_list2.append(preds)
            stack = np.stack(pred_list2, 0)
            preds = np.multiply(stack, weights).mean(0)
            pred_list1.append(preds)
        preds = np.concatenate(pred_list1)
    # torch.cuda.empty_cache()
    return preds


def get_test_shortage(test):
    test_shortage = sample_submission[~sample_submission["ID"].isin(test["image_id"])]
    return test_shortage


def write_submission(test, fill_shortage=False):
    if test.isnull().sum().sum() > 0:
        sample_submission.to_csv("submission.csv", index=False)
        return

    gr = test[["image_id", "w", "h", "encoded_mask"] + COLS_TARGET].groupby("image_id")

    if fill_shortage:
        test_shortage = get_test_shortage(test)
        len_test = test["image_id"].nunique()
        len_test_shortage = test_shortage["ID"].nunique()
        print(f"fill shortage: {len_test} => {len_test + len_test_shortage}")
        test_shortage = get_test_shortage(test)
    else:
        test_shortage = []
        if len(sample_submission) == 559:
            pass
        elif sample_submission["ID"].isin(test["image_id"]).mean() != 1.0:
            sample_submission.to_csv("submission.csv", index=False)
            return

    classes = list(map(str, range(19)))

    with open("submission.csv", "w") as outf:
        print("ID,ImageWidth,ImageHeight,PredictionString", file=outf)
        for image_id, df in gr:
            if len(df) == 0:
                continue
            w = df.iloc[0, 1]
            h = df.iloc[0, 2]

            pred_strs = []
            for i, row in df.iterrows():
                cnfs = [row[c] for c in COLS_TARGET]
                emasks = [row["encoded_mask"]] * len(cnfs)
                pred_strs += list(zip(classes, cnfs, emasks))

            pred_strs = sorted(pred_strs, key=itemgetter(1), reverse=True)
            pred_strs = " ".join(map(lambda x: f"{x[0]} {x[1]} {x[2]}", pred_strs))

            print(f"{image_id},{w},{h},{pred_strs}", file=outf)

        if len(test_shortage) > 0:
            for i, row in test_shortage.iterrows():
                print(
                    f"{row['ID']},{row['ImageWidth']},{row['ImageHeight']},{row['PredictionString']}",
                    file=outf,
                )

    return


def post_process1(test):
    len1 = len(test)

    keep_condition = (
        (test["cell_area_ratio"] > KEEP_CELL_AREA_MIN) &
        (test["nuc_area_ratio"] > KEEP_NUC_AREA_MIN)
    )
    test = test[keep_condition].reset_index(drop=True)

    # edge
    edge = (
        (test['y0'].between(0,1)) |
        (test['h'] - test['y1']).between(0,1) |
        (test['x0'].between(0,1)) |
        (test['w'] - test['x1']).between(0,1)
    )
    drop_condition = (edge &
      (test["cell_area_ratio"] < KEEP_EDGE_CELL_AREA_MIN)
      )
    test = test[~drop_condition].reset_index(drop=True)

    len2 = len(test)

    print(f"remove cell with ")
    print(f"cell_area_ratio({KEEP_CELL_AREA_MIN:.6f}) and ")
    print(f"nuc_area_ratio ({KEEP_NUC_AREA_MIN:.6f}), ")
    print(f"small cell in edge({KEEP_EDGE_CELL_AREA_MIN:.6f}), ")
    print(f"{len1} => {len2}")

    return test


def post_process2(test):
    condition = (test["nuc_area_ratio"] / test["cell_area_ratio"]) < NUC_AREA_MIN_0to5

    test.loc[condition, COLS_TARGET[:6]] = test.loc[condition, COLS_TARGET[:6]] * 0.5

    print(
        f"decrease conf with NUC_AREA_MIN_0to5({NUC_AREA_MIN_0to5:.6f}) / "
        f"subject to update: {condition.sum()}"
    )

    return test


# def post_process3(test):
#     # groupby image id -> mean
#     image_mean = test.groupby("image_id")[COLS_TARGET].transform('mean')
#     image_mean["label_11"] = test["label_11"]
#     w_cell = WEIGHT_CELL_LEVEL_VS_IMAGE_MEAN[0]
#     w_image = WEIGHT_CELL_LEVEL_VS_IMAGE_MEAN[1]

#     test[COLS_TARGET] = w_cell * test[COLS_TARGET] + w_image * image_mean

#     print(
#         "Add image level prediction (aggregating-average of image_id) to cell level prediction"
#     )
#     print(f"cell-level-pred : image-level-pred = {w_cell} : {w_image}")

#     return test


def post_process3(test, test_image):
    # groupby image id -> mean
    image_mean = test.groupby("image_id")[COLS_TARGET].transform('mean')
    image_mean["label_11"] = test["label_11"]

    # match shape of image_level_prediction to cell_level_prediction
    image_pred = test.loc[:, ["image_id"]].merge(test_image, on="image_id", how="left")[COLS_TARGET]
    image_pred["label_11"] = test["label_11"]

    w_cell = WEIGHT_CELL_LEVEL_VS_IMAGE_MEAN_VS_IMAGE_PRED[0]
    w_image_mean = WEIGHT_CELL_LEVEL_VS_IMAGE_MEAN_VS_IMAGE_PRED[1]
    w_image_pred = WEIGHT_CELL_LEVEL_VS_IMAGE_MEAN_VS_IMAGE_PRED[2]

    test[COLS_TARGET] = w_cell * test[COLS_TARGET] + w_image_mean * image_mean + w_image_pred * image_pred

    print(
        "Add image level prediction (aggregating-average of image_id) to cell level prediction"
    )
    print(f"cell-level-pred : image-level-mean : image-level-pred = {w_cell} : {w_image_mean} : {w_image_pred}")

    return test

## 1st Stage

In [None]:
cmd = "python ../input/hpa2021-libs/generate_test_mask_kernel_faster7.py "

if COMPUTE_PUBLIC:
    cmd += "--compute_public "
if COMPUTE_PRIVATE:
    cmd += "--compute_private "

cmd

In [None]:
! {cmd}

In [None]:
test = pd.read_csv(MASK_DIR / "test_bbox.csv")
test

In [None]:
test = post_process1(test)
test

# Image level prediction

In [None]:
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# image level prediction
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

test_image = test.groupby("image_id").first().reset_index()

if LOCAL:
    pass

else:

    test_loader = DataLoader(
        ImageDataset(test_image),
        batch_size=BATCH_SIZE,
        num_workers=MAX_THRE,
        pin_memory=True,
    )

    models = []
    for p in MODEL_PATHS_IMAGE:
        model = get_model(p)
        model.eval()
        models.append(model)

    preds = predict(models, test_loader, GPU)

    display(pd.crosstab(test_image["image_id"], preds.argmax(1)))

    test_image[COLS_TARGET] = preds

    del models, model, test_loader
    torch.cuda.empty_cache()
    gc.collect()

    display(test_image)

## 2nd Stage

In [None]:
if LOCAL:
    pass

else:

    test_loader = DataLoader(
        MyDataset(test, mode="test", w_mask=W_MASK),
        batch_size=BATCH_SIZE,
        num_workers=MAX_THRE,
        pin_memory=True,
    )

    models = []
    for p in MODEL_PATHS:
        model = get_model(p)
        model.eval()
        models.append(model)

    preds = predict_weighted(models, test_loader, GPU, WEIGHT_1ST_2ND)

    display(pd.crosstab(test["image_id"], preds.argmax(1)))

    test[COLS_TARGET] = preds
    test = post_process2(test)
    test = post_process3(test, test_image)

    del models, model
    torch.cuda.empty_cache()
    gc.collect()

    display(test)


In [None]:
rm -rf ../temp

## Check Submission

In [None]:
# =============================================================================
# submission
# =============================================================================
cols = sample_submission.columns


if COMPUTE_PUBLIC is False and COMPUTE_PRIVATE is False:

    print("dryrun, replace with local computed file")
    # ===========================
    # cell level 2nd
    # ===========================
    test_2nd = pd.read_csv(TEST_LOCAL_COMPUTED_PATHS[0])
    for p in TEST_LOCAL_COMPUTED_PATHS[1:]:
        test_2nd.loc[:, COLS_TARGET] += pd.read_csv(p).loc[:, COLS_TARGET]
    test_2nd.loc[:, COLS_TARGET] /= len(TEST_LOCAL_COMPUTED_PATHS)
    
    # ===========================
    # cell level 1st
    # ===========================
    test_1st = pd.read_csv(TEST_LOCAL_COMPUTED_PATHS_1ST[0])
    for p in TEST_LOCAL_COMPUTED_PATHS_1ST[1:]:
        test_1st.loc[:, COLS_TARGET] += pd.read_csv(p).loc[:, COLS_TARGET]
    test_1st.loc[:, COLS_TARGET] /= len(TEST_LOCAL_COMPUTED_PATHS_1ST)
    
    # ensemble 1st + 2nd
    test = test_2nd.copy()
    test_1st.iloc[:, 11] = 0
    test.loc[:, COLS_TARGET] = test_1st.loc[:, COLS_TARGET] * RATE_OF_WEIGHT_1ST_2ND[0] + test_2nd.loc[:, COLS_TARGET] * RATE_OF_WEIGHT_1ST_2ND[1]
    
    # image level
    test_image = pd.read_csv(TEST_LOCAL_COMPUTED_PATHS_IMAGE[0])
    for p in TEST_LOCAL_COMPUTED_PATHS_IMAGE[1:]:
        test_image.loc[:, COLS_TARGET] += pd.read_csv(p).loc[:, COLS_TARGET]
    test_image.loc[:, COLS_TARGET] /= len(TEST_LOCAL_COMPUTED_PATHS_IMAGE)
        
    test = post_process1(test)
    test = post_process2(test)
    test = post_process3(test, test_image)

    write_submission(test, fill_shortage=True)


elif COMPUTE_PUBLIC is False and COMPUTE_PRIVATE is True:

    print("only private")
    
    # ===========================
    # cell level 2nd
    # ===========================
    test_2nd = pd.read_csv(TEST_LOCAL_COMPUTED_PATHS[0])
    for p in TEST_LOCAL_COMPUTED_PATHS[1:]:
        test_2nd.loc[:, COLS_TARGET] += pd.read_csv(p).loc[:, COLS_TARGET]
    test_2nd.loc[:, COLS_TARGET] /= len(TEST_LOCAL_COMPUTED_PATHS)
    
    # ===========================
    # cell level 1st
    # ===========================
    test_1st = pd.read_csv(TEST_LOCAL_COMPUTED_PATHS_1ST[0])
    for p in TEST_LOCAL_COMPUTED_PATHS_1ST[1:]:
        test_1st.loc[:, COLS_TARGET] += pd.read_csv(p).loc[:, COLS_TARGET]
    test_1st.loc[:, COLS_TARGET] /= len(TEST_LOCAL_COMPUTED_PATHS_1ST)
    
    # ensemble 1st + 2nd
    test_local = test_2nd.copy()
    test_1st.iloc[:, 11] = 0
    test_local.loc[:, COLS_TARGET] = test_1st.loc[:, COLS_TARGET] * RATE_OF_WEIGHT_1ST_2ND[0] + test_2nd.loc[:, COLS_TARGET] * RATE_OF_WEIGHT_1ST_2ND[1]
    
    # image level
    test_image_local = pd.read_csv(TEST_LOCAL_COMPUTED_PATHS_IMAGE[0])
    for p in TEST_LOCAL_COMPUTED_PATHS_IMAGE[1:]:
        test_image_local.loc[:, COLS_TARGET] += pd.read_csv(p).loc[:, COLS_TARGET]
    test_image_local.loc[:, COLS_TARGET] /= len(TEST_LOCAL_COMPUTED_PATHS_IMAGE)
    
    test_local = post_process1(test_local)
    test_local = post_process2(test_local)
    test_local = post_process3(test_local, test_image_local)

    if len(sample_submission) == 559:
        write_submission(test_local, fill_shortage=False)

    else:
        test2 = pd.concat([test_local, test[test_local.columns]], ignore_index=True)
        write_submission(test2, fill_shortage=False)


elif COMPUTE_PUBLIC is True and COMPUTE_PRIVATE is True:
    print("full compute")
    write_submission(test, fill_shortage=False)


elif COMPUTE_PUBLIC is True and COMPUTE_PRIVATE is False:
    print("only public")
    write_submission(test, fill_shortage=True)


In [None]:
if len(sample_submission) == 559:
    sub = pd.read_csv("submission.csv")
    display(sub)

In [None]:
if len(sample_submission) == 559:
    sub = pd.read_csv("submission.csv")
    for index, row in sub.head(3).iterrows():
        image_id = row["ID"]
        w = row["ImageWidth"]
        h = row["ImageHeight"]
        pred_strs = row["PredictionString"].split()
        pred_strs = list(split_list(pred_strs, 3))
        for i, pred in enumerate(pred_strs):
            class_id, cnf, encoded_mask = pred
            class_id = int(class_id)
            cnf = float(cnf)

            print(f"class_id:{class_id}, image_id:{image_id}, confidence:{cnf}")
            mask = decode_binary_mask(encoded_mask, w, h)
            print_masked_img(image_id, mask)
            if i == 9:
                break


In [None]:
!ls -lh