In [None]:
import time
nb_start = time.time()

print("\n... installing libraries ...\n")

!pip install -q "/kaggle/input/pycocotools/pycocotools-2.0-cp37-cp37m-linux_x86_64.whl"
!pip install -q "/kaggle/input/hpapytorchzoozip/pytorch_zoo-master"
!pip install -q "../input/slidingwindow/slidingwindow"
!pip install -q "../input/timm-pytorch-image-models/pytorch-image-models-master"
!pip install -q "../input/hpacellsegmentatorraman/HPA-Cell-Segmentation"

lib_install = time.time() - nb_start
print(f"Library install time:  {lib_install/60} minutes")

In [None]:
# global params
num_cpu = 2


import pandas as pd

public_test_df = pd.read_csv("../input/hpa-sample-submission-with-extra-metadata/updated_sample_submission.csv")
public_test_df = public_test_df[["ID", "ImageWidth", "ImageHeight", "PredictionString"]] #[:550]

full_test_df = pd.read_csv('../input/hpa-single-cell-image-classification/sample_submission.csv')

sub_public_only = False

if len(full_test_df) == 559:
    commit_df = full_test_df[:5]
    commit_df.to_csv('./sample_submission.csv', index=False)

else:
    if sub_public_only:
        df_public = public_test_df.copy()
        df_public.to_csv('./sample_submission.csv', index=False)

    else:
        df_all = full_test_df.merge(public_test_df.drop_duplicates(), on=["ID", "ImageWidth", "ImageHeight", "PredictionString"], 
                       how='left', indicator=True)

        df_private = df_all[df_all['_merge'] == 'left_only']
        df_private = df_private[["ID", "ImageWidth", "ImageHeight", "PredictionString"]]

        df_private.to_csv('./sample_submission.csv', index=False)



# Create Mask (deoxy)

In [None]:
%%writefile faster_hpa_cell_segment.py

#mask_start = time.time()

import warnings
warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd
import os
from tqdm import tqdm
import imageio

from hpacellseg.cellsegmentator import *


class CellSegmentator(object):
    """Uses pretrained DPN-Unet models to segment cells from images."""

    def __init__(
        self,
        nuclei_model="../input/hpacellsegmentatormodelweights/dpn_unet_nuclei_v1.pth",
        cell_model="../input/hpacellsegmentatormodelweights/dpn_unet_cell_3ch_v1.pth",
        scale_factor=1.0,
        device="cuda",
        padding=False,
        multi_channel_model=True,
    ):
        """Class for segmenting nuclei and whole cells from confocal microscopy images.
        It takes lists of images and returns the raw output from the
        specified segmentation model. Models can be automatically
        downloaded if they are not already available on the system.
        When working with images from the Huan Protein Cell atlas, the
        outputs from this class' methods are well combined with the
        label functions in the utils module.
        Note that for cell segmentation, there are two possible models
        available. One that works with 2 channeled images and one that
        takes 3 channels.
        Keyword arguments:
        nuclei_model -- A loaded torch nuclei segmentation model or the
                        path to a file which contains such a model.
                        If the argument is a path that points to a non-existant file,
                        a pretrained nuclei_model is going to get downloaded to the
                        specified path (default: './nuclei_model.pth').
        cell_model -- A loaded torch cell segmentation model or the
                      path to a file which contains such a model.
                      The cell_model argument can be None if only nuclei
                      are to be segmented (default: './cell_model.pth').
        scale_factor -- How much to scale images before they are fed to
                        segmentation models. Segmentations will be scaled back
                        up by 1/scale_factor to match the original image
                        (default: 0.25).
        device -- The device on which to run the models.
                  This should either be 'cpu' or 'cuda' or pointed cuda
                  device like 'cuda:0' (default: 'cuda').
        padding -- Whether to add padding to the images before feeding the
                   images to the network. (default: False).
        multi_channel_model -- Control whether to use the 3-channel cell model or not.
                               If True, use the 3-channel model, otherwise use the
                               2-channel version (default: True).
        """
        if device != "cuda" and device != "cpu" and "cuda" not in device:
            raise ValueError(f"{device} is not a valid device (cuda/cpu)")
        if device != "cpu":
            try:
                assert torch.cuda.is_available()
            except AssertionError:
                print("No GPU found, using CPU.", file=sys.stderr)
                device = "cpu"
        self.device = device

        if isinstance(nuclei_model, str):
            if not os.path.exists(nuclei_model):
                print(
                    f"Could not find {nuclei_model}. Downloading it now",
                    file=sys.stderr,
                )
                download_with_url(NUCLEI_MODEL_URL, nuclei_model)
            nuclei_model = torch.load(
                nuclei_model, map_location=torch.device(self.device)
            )
        if isinstance(nuclei_model, torch.nn.DataParallel) and device == "cpu":
            nuclei_model = nuclei_model.module

        self.nuclei_model = nuclei_model.to(self.device).eval()

        self.multi_channel_model = multi_channel_model
        if isinstance(cell_model, str):
            if not os.path.exists(cell_model):
                print(
                    f"Could not find {cell_model}. Downloading it now", file=sys.stderr
                )
                if self.multi_channel_model:
                    download_with_url(MULTI_CHANNEL_CELL_MODEL_URL, cell_model)
                else:
                    download_with_url(TWO_CHANNEL_CELL_MODEL_URL, cell_model)
            cell_model = torch.load(cell_model, map_location=torch.device(self.device))
        self.cell_model = cell_model.to(self.device).eval()
        self.scale_factor = scale_factor
        self.padding = padding

    def _image_conversion(self, images):
        """Convert/Format images to RGB image arrays list for cell predictions.
        Intended for internal use only.
        Keyword arguments:
        images -- list of lists of image paths/arrays. It should following the
                 pattern if with er channel input,
                 [
                     [microtubule_path0/image_array0, microtubule_path1/image_array1, ...],
                     [er_path0/image_array0, er_path1/image_array1, ...],
                     [nuclei_path0/image_array0, nuclei_path1/image_array1, ...]
                 ]
                 or if without er input,
                 [
                     [microtubule_path0/image_array0, microtubule_path1/image_array1, ...],
                     None,
                     [nuclei_path0/image_array0, nuclei_path1/image_array1, ...]
                 ]
        """
        microtubule_imgs, er_imgs, nuclei_imgs = images
        if self.multi_channel_model:
            if not isinstance(er_imgs, list):
                raise ValueError("Please speicify the image path(s) for er channels!")
        else:
            if not er_imgs is None:
                raise ValueError(
                    "second channel should be None for two channel model predition!"
                )

        if not isinstance(microtubule_imgs, list):
            raise ValueError("The microtubule images should be a list")
        if not isinstance(nuclei_imgs, list):
            raise ValueError("The microtubule images should be a list")

        if er_imgs:
            if not len(microtubule_imgs) == len(er_imgs) == len(nuclei_imgs):
                raise ValueError("The lists of images needs to be the same length")
        else:
            if not len(microtubule_imgs) == len(nuclei_imgs):
                raise ValueError("The lists of images needs to be the same length")

        if not all(isinstance(item, np.ndarray) for item in microtubule_imgs):
            microtubule_imgs = [
                os.path.expanduser(item) for _, item in enumerate(microtubule_imgs)
            ]
            nuclei_imgs = [
                os.path.expanduser(item) for _, item in enumerate(nuclei_imgs)
            ]

            microtubule_imgs = list(
                map(lambda item: imageio.imread(item), microtubule_imgs)
            )
            nuclei_imgs = list(map(lambda item: imageio.imread(item), nuclei_imgs))
            if er_imgs:
                er_imgs = [os.path.expanduser(item) for _, item in enumerate(er_imgs)]
                er_imgs = list(map(lambda item: imageio.imread(item), er_imgs))

        if not er_imgs:
            er_imgs = [
                np.zeros(item.shape, dtype=item.dtype)
                for _, item in enumerate(microtubule_imgs)
            ]
        cell_imgs = list(
            map(
                lambda item: np.dstack((item[0], item[1], item[2])),
                list(zip(microtubule_imgs, er_imgs, nuclei_imgs)),
            )
        )

        return cell_imgs

    def pred_nuclei(self, images):
        """Predict the nuclei segmentation.
        Keyword arguments:
        images -- A list of image arrays or a list of paths to images.
                  If as a list of image arrays, the images could be 2d images
                  of nuclei data array only, or must have the nuclei data in
                  the blue channel; If as a list of file paths, the images
                  could be RGB image files or gray scale nuclei image file
                  paths.
        Returns:
        predictions -- A list of predictions of nuclei segmentation for each nuclei image.
        """

        def _preprocess(image):
            if isinstance(image, str):
                image = imageio.imread(image)
            self.target_shape = image.shape
            if len(image.shape) == 2:
                image = np.dstack((image, image, image))
            image = transform.rescale(image, self.scale_factor, multichannel=True)
            nuc_image = np.dstack((image[..., 2], image[..., 2], image[..., 2]))
            if self.padding:
                rows, cols = nuc_image.shape[:2]
                self.scaled_shape = rows, cols
                nuc_image = cv2.copyMakeBorder(
                    nuc_image,
                    32,
                    (32 - rows % 32),
                    32,
                    (32 - cols % 32),
                    cv2.BORDER_REFLECT,
                )
            nuc_image = nuc_image.transpose([2, 0, 1])
            return nuc_image

        def _segment_helper(imgs):
            with torch.no_grad():
                mean = torch.as_tensor(NORMALIZE["mean"], device=self.device)
                std = torch.as_tensor(NORMALIZE["std"], device=self.device)
                imgs = torch.tensor(imgs).float()
                imgs = imgs.to(self.device)
                imgs = imgs.sub_(mean[:, None, None]).div_(std[:, None, None])

                imgs = self.nuclei_model(imgs)
                imgs = F.softmax(imgs, dim=1)
                return imgs

        preprocessed_imgs = list(map(_preprocess, images))
        bs = 24
        predictions = []
        for i in range(0, len(preprocessed_imgs), bs):
            start = i
            end = min(len(preprocessed_imgs), i+bs)
            x = preprocessed_imgs[start:end]
            pred = _segment_helper(x).cpu().numpy()
            predictions.append(pred)
        predictions = list(np.concatenate(predictions, axis=0))
        predictions = map(util.img_as_ubyte, predictions)
        predictions = list(map(self._restore_scaling_padding, predictions))
        return predictions

    def _restore_scaling_padding(self, n_prediction):
        """Restore an image from scaling and padding.
        This method is intended for internal use.
        It takes the output from the nuclei model as input.
        """
        n_prediction = n_prediction.transpose([1, 2, 0])
        if self.padding:
            n_prediction = n_prediction[
                32 : 32 + self.scaled_shape[0], 32 : 32 + self.scaled_shape[1], ...
            ]
        if not self.scale_factor == 1:
            n_prediction[..., 0] = 0
            n_prediction = cv2.resize(
                n_prediction,
                (self.target_shape[0], self.target_shape[1]),
                interpolation=cv2.INTER_AREA,
            )
        return n_prediction

    def pred_cells(self, images, precombined=False):
        """Predict the cell segmentation for a list of images.
        Keyword arguments:
        images -- list of lists of image paths/arrays. It should following the
                  pattern if with er channel input,
                  [
                      [microtubule_path0/image_array0, microtubule_path1/image_array1, ...],
                      [er_path0/image_array0, er_path1/image_array1, ...],
                      [nuclei_path0/image_array0, nuclei_path1/image_array1, ...]
                  ]
                  or if without er input,
                  [
                      [microtubule_path0/image_array0, microtubule_path1/image_array1, ...],
                      None,
                      [nuclei_path0/image_array0, nuclei_path1/image_array1, ...]
                  ]
                  The ER channel is required when multichannel is True
                  and required to be None when multichannel is False.
                  The images needs to be of the same size.
        precombined -- If precombined is True, the list of images is instead supposed to be
                       a list of RGB numpy arrays (default: False).
        Returns:
        predictions -- a list of predictions of cell segmentations.
        """

        def _preprocess(image):
            self.target_shape = image.shape
            if not len(image.shape) == 3:
                raise ValueError("image should has 3 channels")
            cell_image = transform.rescale(image, self.scale_factor, multichannel=True)
            if self.padding:
                rows, cols = cell_image.shape[:2]
                self.scaled_shape = rows, cols
                cell_image = cv2.copyMakeBorder(
                    cell_image,
                    32,
                    (32 - rows % 32),
                    32,
                    (32 - cols % 32),
                    cv2.BORDER_REFLECT,
                )
            cell_image = cell_image.transpose([2, 0, 1])
            return cell_image

        def _segment_helper(imgs):
            with torch.no_grad():
                mean = torch.as_tensor(NORMALIZE["mean"], device=self.device)
                std = torch.as_tensor(NORMALIZE["std"], device=self.device)
                imgs = torch.tensor(imgs).float()
                imgs = imgs.to(self.device)
                imgs = imgs.sub_(mean[:, None, None]).div_(std[:, None, None])

                imgs = self.cell_model(imgs)
                imgs = F.softmax(imgs, dim=1)
                return imgs

        if not precombined:
            images = self._image_conversion(images)
        preprocessed_imgs = list(map(_preprocess, images))
        bs = 24
        predictions = []
        for i in range(0, len(preprocessed_imgs), bs):
            start = i
            end = min(len(preprocessed_imgs), i+bs)
            x = preprocessed_imgs[start:end]
            pred = _segment_helper(x).cpu().numpy()
            predictions.append(pred)
        predictions = list(np.concatenate(predictions, axis=0))
        predictions = map(self._restore_scaling_padding, predictions)
        predictions = list(map(util.img_as_ubyte, predictions))

        return predictions
    
    
import os.path
import urllib
import zipfile

import numpy as np
import scipy.ndimage as ndi
from skimage import transform
from skimage import filters, measure, segmentation
from skimage.morphology import (binary_erosion, closing, disk,
                                remove_small_holes, remove_small_objects)

HIGH_THRESHOLD = 0.4
LOW_THRESHOLD = HIGH_THRESHOLD - 0.25


def download_with_url(url_string, file_path, unzip=False):
    """Download file with a link."""
    with urllib.request.urlopen(url_string) as response, open(
        file_path, "wb"
    ) as out_file:
        data = response.read()  # a `bytes` object
        out_file.write(data)

    if unzip:
        with zipfile.ZipFile(file_path, "r") as zip_ref:
            zip_ref.extractall(os.path.dirname(file_path))


def __fill_holes(image):
    """Fill_holes for labelled image, with a unique number."""
    boundaries = segmentation.find_boundaries(image)
    image = np.multiply(image, np.invert(boundaries))
    image = ndi.binary_fill_holes(image > 0)
    image = ndi.label(image)[0]
    return image





def label_cell(nuclei_pred, cell_pred):
    """Label the cells and the nuclei.
    Keyword arguments:
    nuclei_pred -- a 3D numpy array of a prediction from a nuclei image.
    cell_pred -- a 3D numpy array of a prediction from a cell image.
    Returns:
    A tuple containing:
    nuclei-label -- A nuclei mask data array.
    cell-label  -- A cell mask data array.
    0's in the data arrays indicate background while a continous
    strech of a specific number indicates the area for a specific
    cell.
    The same value in cell mask and nuclei mask refers to the identical cell.
    NOTE: The nuclei labeling from this function will be sligthly
    different from the values in :func:`label_nuclei` as this version
    will use information from the cell-predictions to make better
    estimates.
    """
    def __wsh(
        mask_img,
        threshold,
        border_img,
        seeds,
        threshold_adjustment=0.35,
        small_object_size_cutoff=10,
    ):
        img_copy = np.copy(mask_img)
        m = seeds * border_img  # * dt
        img_copy[m <= threshold + threshold_adjustment] = 0
        img_copy[m > threshold + threshold_adjustment] = 1
        img_copy = img_copy.astype(np.bool)
        img_copy = remove_small_objects(img_copy, small_object_size_cutoff).astype(
            np.uint8
        )

        mask_img[mask_img <= threshold] = 0
        mask_img[mask_img > threshold] = 1
        mask_img = mask_img.astype(np.bool)
        mask_img = remove_small_holes(mask_img, 63)
        mask_img = remove_small_objects(mask_img, 1).astype(np.uint8)
        markers = ndi.label(img_copy, output=np.uint32)[0]
        labeled_array = segmentation.watershed(
            mask_img, markers, mask=mask_img, watershed_line=True
        )
        return labeled_array

    nuclei_label = __wsh(
        nuclei_pred[..., 2] / 255.0,
        0.4,
        1 - (nuclei_pred[..., 1] + cell_pred[..., 1]) / 255.0 > 0.05,
        nuclei_pred[..., 2] / 255,
        threshold_adjustment=-0.25,
        small_object_size_cutoff=32,
    )

    # for hpa_image, to remove the small pseduo nuclei
    nuclei_label = remove_small_objects(nuclei_label, 157)
    nuclei_label = measure.label(nuclei_label)
    # this is to remove the cell borders' signal from cell mask.
    # could use np.logical_and with some revision, to replace this func.
    # Tuned for segmentation hpa images
    threshold_value = max(0.22, filters.threshold_otsu(cell_pred[..., 2] / 255) * 0.5)
    # exclude the green area first
    cell_region = np.multiply(
        cell_pred[..., 2] / 255 > threshold_value,
        np.invert(np.asarray(cell_pred[..., 1] / 255 > 0.05, dtype=np.int8)),
    )
    sk = np.asarray(cell_region, dtype=np.int8)
    distance = np.clip(cell_pred[..., 2], 255 * threshold_value, cell_pred[..., 2])
    cell_label = segmentation.watershed(-distance, nuclei_label, mask=sk)
    cell_label = remove_small_objects(cell_label, 344).astype(np.uint8)
    selem = disk(2)
    cell_label = closing(cell_label, selem)
    cell_label = __fill_holes(cell_label)
    # this part is to use green channel, and extend cell label to green channel
    # benefit is to exclude cells clear on border but without nucleus
    sk = np.asarray(
        np.add(
            np.asarray(cell_label > 0, dtype=np.int8),
            np.asarray(cell_pred[..., 1] / 255 > 0.05, dtype=np.int8),
        )
        > 0,
        dtype=np.int8,
    )
    cell_label = segmentation.watershed(-distance, cell_label, mask=sk)
    cell_label = __fill_holes(cell_label)
    cell_label = np.asarray(cell_label > 0, dtype=np.uint8)
    cell_label = measure.label(cell_label)
    cell_label = remove_small_objects(cell_label, 344)
    cell_label = measure.label(cell_label)
    cell_label = np.asarray(cell_label, dtype=np.uint16)
    nuclei_label = np.multiply(cell_label > 0, nuclei_label) > 0
    nuclei_label = measure.label(nuclei_label)
    nuclei_label = remove_small_objects(nuclei_label, 157)
    nuclei_label = np.multiply(cell_label, nuclei_label > 0)

    return nuclei_label, cell_label


cellsegmentor = CellSegmentator()

data_df = pd.read_csv('./sample_submission.csv')
data_size = len(data_df)
bs = 240

def load_images(df : pd.DataFrame, root='../input/hpa-single-cell-image-classification/test/'):
    gray = []
    rgb = []
    for i, row in tqdm(df.iterrows(), total=len(df)):
        r = os.path.join(root, f'{row.ID}_red.png')
        y = os.path.join(root, f'{row.ID}_yellow.png')
        b = os.path.join(root, f'{row.ID}_blue.png')
        r = cv2.imread(r, 0)
        y = cv2.imread(y, 0)
        b = cv2.imread(b, 0)
        gray_image = cv2.resize(b, (512, 512))
        rgb_image = cv2.resize(np.stack((r, y, b), axis=2), (512, 512))
        gray.append(gray_image)
        rgb.append(rgb_image)
    return gray, rgb
        
    
for i in range(0, data_size, bs):
    print('!!!!', i, '!!!!')
    start = i
    end = min(len(data_df), start + bs)
    test_df = data_df[start:end]
    print(len(test_df))
    print('---- start load images ----')
    gray, rgb = load_images(test_df)
    print(len(gray))
    print('---- finish load images ----')
    print('---- start pred nuclei ----')
    nuc_segmentations = cellsegmentor.pred_nuclei(gray)
    print('---- finish pred nucrei ----')
    print('---- start pred cells ----')
    cell_segmentations = cellsegmentor.pred_cells(rgb, precombined=True)
    print('---- finish pred cells ----')


    #root = '/temp/hpa_test_mask/'
    root = '/kaggle/working/hpa_test_mask'
    os.makedirs(root, exist_ok=True)
    

    print('---- start mask write ----')
    for data_id, nuc_seg, cell_seg in zip(test_df.ID.to_list(), nuc_segmentations, cell_segmentations):
        nuc, cell = label_cell(nuc_seg, cell_seg)
        #np.save(os.path.join(root, f'{data_id}_nuc.npy'), nuc)
        #np.save(os.path.join(root, f'{data_id}_cell.npy'), cell)
        imageio.imwrite(os.path.join(root, f'{data_id}_predictedmask.png'), cell)
        
        
        #_, cell = utils.label_cell(nuc_seg, cell_seg)
        #save_fn = data_id.replace('red.png','predictedmask.png')
        #imageio.imwrite(os.path.join(output_dir, save_fn), cell)
        
    print('---- finish mask write ----')
    
#mask_gen = time.time() - mask_start
#print(f"Mask generation time:  {mask_gen/60} minutes")

In [None]:
%%time
mask_start = time.time()
!python faster_hpa_cell_segment.py
mask_gen = time.time() - mask_start

# Inference - PuzzleCam

In [None]:
import sys
sys.path.append('/kaggle/input/hpasrc/src')
import copy
from torch.utils.tensorboard import SummaryWriter
from torch.utils.data import DataLoader
from core.networks import *
from tools.general.io_utils import *
from tools.general.time_utils import *
from tools.ai.demo_utils import *
from tools.ai.torch_utils import *
from tools.ai.augment_utils import *
from tools.ai.randaugment import *

import timm
import pandas as pd
from albumentations import Compose
import slidingwindow as sw
from tqdm import tqdm
from multiprocessing import Pool

device = ('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
###############################################################################
# hpa dataset
###############################################################################

class HPADataset(torch.utils.data.Dataset):
    def __init__(self, root_dir, ids, labels, resize=None, transforms=None):
        self.root_dir = root_dir
        self.ids = ids
        self.resize = resize
        self.transforms = transforms
        self.labels = labels

    def __len__(self):
        return len(self.ids)

    def __getitem__(self, item):
        _ids = self.ids[item]
        colors = ['red', 'green', 'blue']
        img = [cv2.imread(os.path.join(self.root_dir, args.domain, f'{_ids}_{color}.png'), cv2.IMREAD_GRAYSCALE)
               for color in colors]
        image = np.dstack(img)

        if self.resize:
            image = cv2.resize(image, (self.resize, self.resize))

        # setting the target to one hot encoded form
        if args.domain == "train":
            y = self.labels[item]
            y = y.split('|')
            y = list(map(int, y))
            return image, y, _ids

        elif args.domain == "test":
            return image, [-1], _ids
        
###################################################################################
# Network
###################################################################################

class Classifier(nn.Module, ABC_Model):
    def __init__(self, model_name, num_classes=20, mode='fix'):
        super().__init__()

        if model_name == 'densenet121':
            num_features = 1024
        elif model_name == 'resnest101e':
            num_features = 2048
        elif model_name == 'resnest26d':
            num_features = 2048
        elif model_name == 'nf_regnet_b1':
            num_features = 960
        elif model_name == 'xception':
            num_features = 2048
            
        self.num_classes = num_classes
        self.model = timm.create_model(model_name, pretrained=False, num_classes=0, global_pool='')
        self.classifierr = nn.Conv2d(num_features, num_classes, 1, bias=False)
        self.initialize([self.classifierr])

    def forward(self, x, with_cam=False, with_cam_prob=False):
        x = self.model(x)
        if with_cam:
            features = self.classifierr(x)
            logits = self.global_average_pooling_2d(features)
            return logits, features

        elif with_cam_prob:
            features = self.classifierr(x)
            x = self.global_average_pooling_2d(x, keepdims=True)
            logits = self.classifierr(x).view(-1, self.num_classes)
            return logits, features

        else:
            x = self.global_average_pooling_2d(x, keepdims=True)
            logits = self.classifierr(x).view(-1, self.num_classes)
            return logits

In [None]:
#################################################################################################
# prediction
#################################################################################################

def get_cam(models, ori_image, scale, ori_w, ori_h):
    # preprocessing
    image = copy.deepcopy(ori_image)
    image = cv2.resize(image, (round(ori_w * scale), round(ori_h * scale)), interpolation=cv2.INTER_AREA)
    image = image.transpose((2, 0, 1))

    image = torch.from_numpy(image)
    flipped_image = image.flip(-1)
    flipped_image2 = image.flip(-2)

    images = torch.stack([image, flipped_image, flipped_image2])
    images = images.to(device, dtype=torch.float)

    # inferenece
    output_list = []
    cams_list = []
    for model in models:
        output, features = model(images, with_cam_prob=True)
        output = (output[0] + output[1] + output[2]) / 3
        output = torch.sigmoid(output)
        output_list.append(output)

        # postprocessing
        cams = F.relu(features)
        cams = (cams[0] + cams[1].flip(-1) + cams[2].flip(-2)) / 3
        cams_list.append(cams)
        
    output = torch.mean(torch.stack(output_list), dim=0)
    cams = torch.mean(torch.stack(cams_list), dim=0)

    return cams, output



def run_prediction(models, dataset, pred_dir):
    
    scales = [float(scale) for scale in args.scales.split(',')]

    with torch.no_grad():
        length = len(dataset)
        tk1 = tqdm(dataset, total=int(len(dataset)))
        for (ori_image, label, image_id) in tk1:
            _ori_w, _ori_h = ori_image.shape[0], ori_image.shape[1]

            ##########################
            image = ori_image.copy()
            tmp = image  # for drawing a rectangle
            windows = sw.generate(image, sw.DimOrder.HeightWidthChannel, args.crop_size, args.sw_overlap)

            sourceWidth = image.shape[sw.DimOrder.HeightWidthChannel.index('w')]
            sourceHeight = image.shape[sw.DimOrder.HeightWidthChannel.index('h')]
            sums = np.zeros((args.CLASS, sourceHeight, sourceWidth), dtype=np.float)
            counts = np.zeros((sourceHeight, sourceWidth), dtype=np.uint32)

            # Do stuff with the generated windows
            image_probs = []
            for windowNum, window in enumerate(windows):
                subset = image[window.indices()]

                ori_w, ori_h = subset.shape[0], subset.shape[1]

                strided_size = get_strided_size((ori_h, ori_w), 4)
                strided_up_size = get_strided_up_size((ori_h, ori_w), 16)

                cams_list = []
                probs_list = []
                for scale in scales:
                    cams , probs = get_cam(models, subset, scale,  ori_w, ori_h)
                    cams_list.append(cams)
                    probs_list.append(probs)

                hr_cams_list = [resize_for_tensors(cams.unsqueeze(0), strided_up_size)[0] for cams in cams_list]
                hr_cams = torch.sum(torch.stack(hr_cams_list), dim=0)[:, :ori_h, :ori_w]
                hc = hr_cams.cpu().numpy()

                out_prob,_ = torch.max(torch.stack(probs_list), dim=0)
                image_probs.append(out_prob)

                x = window.x
                y = window.y
                w = window.w
                h = window.h

                windowIndices = window.indices(False)
                sumsView = sums[:,  y:y+h, x:x+w,]
                countsView = counts[windowIndices]

                sumsView[:] += hc
                countsView[:] += 1

            # Use the sums and the counts to compute the mean values
            for dim in range(0, args.CLASS):
                sums[dim, :, :] /= counts

            img_prob, _ = torch.max(torch.stack(image_probs), dim=0)
            img_prob = img_prob.cpu().numpy()
            ############################################################################################################

            ori_image_vis = ori_image.astype(np.uint8)
            label_cams = []
            save_img = False
            save_npy = True

            for i, label_cam in enumerate(sums):
                # normalize cam
                norm_label_cam = (label_cam - np.min(label_cam)) / (200 - np.min(label_cam))
                norm_label_cam *= img_prob[i]

                if save_img:
                    img = np.stack((norm_label_cam,) * 3, -1) * 255.
                    img = img.astype(np.uint8)
                    img = cv2.applyColorMap(img, cv2.COLORMAP_JET)

                    image = cv2.addWeighted(ori_image_vis, 0.5, img, 0.5, 0)

                    if i in label:
                        cv2.imwrite(f'{pred_dir}{image_id}-{i}-true-{label}-prob{img_prob[i]:0.4f}.png', image)
                    else:
                        cv2.imwrite(f'{pred_dir}{image_id}-{i}-noLabel-prob{img_prob[i]:0.4f}.png', image)

                if save_npy:
                    norm_label_cam = cv2.resize(norm_label_cam, (256, 256))
                    norm_label_cam = norm_label_cam.astype(np.float16)
                    label_cams.append(norm_label_cam)

            if save_npy:
                np.save(f'{pred_dir}{image_id}.npy', label_cams)

    

In [None]:
data_dir = '/kaggle/input/hpa-single-cell-image-classification'
mask_dir = '/kaggle/working/hpa_test_mask'

df_sub_org = pd.read_csv('./sample_submission.csv')
df_sub = pd.read_csv('./sample_submission.csv')
df_sub["Label"] = -1

labels = df_sub["Label"].tolist()
ids = df_sub["ID"].tolist()

AUGMENTATIONS_TEST = Compose([], p=1)


In [None]:
def load_model(architecture, tag):
    
    model_path = '/kaggle/input/puzzleweights/' + f'{tag}.pth'
    
    # load model
    model = Classifier(architecture, args.CLASS, mode=args.mode)
    model = model.cuda()
    model.eval()
    model.load_state_dict(torch.load(model_path))
    print('[i] Architecture is {}'.format(architecture))
    
    return model

In [None]:
pred1_start = time.time()

class args:
    num_workers = num_cpu
    mode = 'normal'
    scales = '0.75,1.0,1.5'
    domain = 'test'
    CLASS = 19
    img_size = 1280
    crop_size = 1024
    sw_overlap = 0.25


models1024 = []

pred_dir = create_directory(f'/kaggle/working/predictions/models1024/')
dataset = HPADataset(data_dir, ids, labels, args.img_size, AUGMENTATIONS_TEST)

architecture = 'xception'
tag = 'xceptionPuzzle1024crops1280resizeadamwmllcombinedbs6_best_ep10'
model = load_model(architecture, tag)
models1024.append(model)

run_prediction(models1024, dataset, pred_dir)

pred1_gen = time.time() - pred1_start


In [None]:
#################################################################################################
# generate submission
#################################################################################################
import base64
import os
import typing as t
import zlib
from itertools import groupby
import cv2
import numpy as np
import pandas as pd
from pycocotools import _mask as coco_mask
from pycocotools import mask as mutils
from tqdm import tqdm

#################################################################################################

def coco_rle_encode(mask):
    rle = {'counts': [], 'size': list(mask.shape)}
    counts = rle.get('counts')
    for i, (value, elements) in enumerate(groupby(mask.ravel(order='F'))):
        if i == 0 and value == 1:
            counts.append(0)
        counts.append(len(list(elements)))
    return rle


def encode_binary_mask(mask: np.ndarray) -> t.Text:
    """Converts a binary mask into OID challenge encoding ascii text."""

    # check input mask --
    if mask.dtype != np.bool:
        raise ValueError(
            "encode_binary_mask expects a binary mask, received dtype == %s" %
            mask.dtype)

    mask = np.squeeze(mask)
    if len(mask.shape) != 2:
        raise ValueError(
            "encode_binary_mask expects a 2d mask, received shape == %s" %
            mask.shape)

    # convert input mask to expected COCO API input --
    mask_to_encode = mask.reshape(mask.shape[0], mask.shape[1], 1)
    mask_to_encode = mask_to_encode.astype(np.uint8)
    mask_to_encode = np.asfortranarray(mask_to_encode)

    # RLE encode mask --
    encoded_mask = coco_mask.encode(mask_to_encode)[0]["counts"]

    # compress and base64 encoding --
    binary_str = zlib.compress(encoded_mask, zlib.Z_BEST_COMPRESSION)
    base64_str = base64.b64encode(binary_str)
    return str(base64_str, 'utf-8')


def get_rles_from_mask(mask):
    rle_list = []
    sub_rle_list = []
    mask_ids = np.unique(mask)
    for val in mask_ids:
        if val == 0:
            continue
        binary_mask = np.where(mask == val, 1, 0).astype(bool)
        rle = coco_rle_encode(binary_mask)
        rle_list.append(rle)

        sub_rle = encode_binary_mask(binary_mask)
        sub_rle_list.append(sub_rle)

    return sub_rle_list, rle_list, mask.shape[0], mask.shape[1]



def mask_labelling(idx):
    image_id = df_sub.iloc[idx].ID
    img_w = df_sub.iloc[idx].ImageWidth
    img_h = df_sub.iloc[idx].ImageHeight
    
    colors = ['red', 'green', 'blue']
    img = [cv2.imread(os.path.join('../input/hpa-single-cell-image-classification/test/', f'{image_id}_{color}.png'), cv2.IMREAD_GRAYSCALE)
           for color in colors]
    img = np.dstack(img)

    
    CAMs = np.zeros((19, 256, 256))       
    try:
        cam1024 = np.load(f'./predictions/models1024/{image_id}.npy').astype(np.float32)
        cam1024[np.isnan(cam1024)] = 0
        CAMs = cam1024

    except Exception as e:
        print(e)
        CAMs = np.zeros((19, 256, 256))

    ###########################################
    # load segmentation mask
    ###########################################
    try:
        mask_path = mask_dir + "/" + image_id + '_predictedmask.png'
        cell_mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
        cell_mask = cv2.resize(cell_mask, (img_w, img_h), interpolation=cv2.INTER_NEAREST)
    except Exception as e:
        print(e)
        cell_mask = np.zeros((img_w, img_h))
    
    ############################################
    # iterate for each predicted box 
    # over every segmented box
    ############################################
    if cell_mask.max() != 0:
        pred_seg_ids = []
        pred_seg_clss = []
        pred_seg_confs = []
        pred_seg_rles = []
        cell_idxs = []
        coco_seg_rles = []

        sub_rles, seg_coco_rles, seg_height, seg_width = get_rles_from_mask(cell_mask)

        crop_imgs = []
        crop_probas = []
        for class_id in range(args.CLASS):
            for cell_idx in range(1, cell_mask.max() + 1):
                cls_cam = CAMs[class_id, ...].astype(np.float32)
                cls_cam = cv2.resize(cls_cam, (cell_mask.shape[0], cell_mask.shape[1]))
                result = cls_cam * (cell_mask == cell_idx)

                pred_seg_id = image_id
                pred_seg_cls = class_id
                pred_seg_conf = np.max(result)
                pred_seg_rle = sub_rles[cell_idx-1]
                coco_seg_rle = seg_coco_rles[cell_idx - 1]

                pred_seg_ids.append(pred_seg_id)
                pred_seg_clss.append(pred_seg_cls)
                pred_seg_confs.append(pred_seg_conf)
                pred_seg_rles.append(pred_seg_rle)
                cell_idxs.append(cell_idx)
                coco_seg_rles.append(coco_seg_rle)


        df = pd.DataFrame()
        df['ID'] = pred_seg_ids
        df['cls'] = pred_seg_clss
        df['conf'] = pred_seg_confs
        df['rles'] = pred_seg_rles
        df['cell_num'] = cell_idxs
        
        return df


    else: 
        df = pd.DataFrame()
        df['ID'] = image_id
        df['cls'] = 0
        df['conf'] = 0
        df['rles'] = ''
        df['cell_num'] = 0

        return df

In [None]:
sub_start = time.time()

ix = np.arange(0, len(df_sub))
ix = ix.tolist()

seg_boxes = []
for i in tqdm(ix, total=len(ix)):
    x = mask_labelling(i)
    seg_boxes.append(x)

appended_data = pd.concat(seg_boxes)
print('prediction complete..')

sub_gen = time.time() - sub_start

# Format predictions

In [None]:
def combine(r):
    cls = r['cls']
    conf = r['conf']
    enc = r['rles']
    classes = [str(cls) + ' ' + str(conf) + ' ' + enc]
    return ' '.join(classes)

appended_data['pred'] = appended_data[['cls', 'conf', 'rles']].apply(combine, axis=1)

subm = appended_data.groupby(['ID'])['pred'].apply(lambda x: ' '.join(x)).reset_index()

sub = pd.merge(
    full_test_df,
    subm,
    how="left",
    left_on='ID',
    right_on='ID',
)


sub.head(20)

In [None]:
def isNaN(num):
    return num != num


for i, row in sub.iterrows():
    if isNaN(row['pred']): continue
    sub.PredictionString.loc[i] = row['pred']

sub = sub[df_sub_org.columns]

sub.head(20)

In [None]:
sub.to_csv(f'submission.csv', index=False)
print('Submission formatting complete !')

# Clean working dir

In [None]:
######################################################################
import shutil

pred_dir_path = './predictions'
mask_dir_path = './hpa_test_mask'

try:
    shutil.rmtree(pred_dir_path)
    shutil.rmtree(mask_dir_path)
except OSError as e:
    print(e)


######################################################################
print(f"Library install time:  {lib_install/60} minutes")
print(f"Mask generation time:  {mask_gen/60} minutes")
print(f"Prediction time (1):   {pred1_gen/60} minutes")
print(f"Submission time:       {sub_gen/60} minutes")
print(f"Total Notebook time:   {(time.time() - nb_start)/60} minutes")