In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import os
import cv2
import zlib
import base64
import argparse
import numpy as np
import pandas as pd
import torch  # 1.7.0
import torchvision.transforms as transforms
from collections import OrderedDict

os.chdir('/kaggle/input')
os.getcwd()
!pip install -q "/kaggle/input/pycocotools/pycocotools-2.0-cp37-cp37m-linux_x86_64.whl"
!pip install -q "/kaggle/input/hpapytorchzoozip/pytorch_zoo-master"
!pip install -q "/kaggle/input/hpacellsegmentatormaster/HPA-Cell-Segmentation-master"

from hpacellseg import cellsegmentator
from hpacellseg.utils import label_cell
from pycocotools import _mask as coco_mask

In [None]:
class HPA_test(torch.utils.data.Dataset):
    ''' Dateset class for PyTorch dataloader.

    '''
    def __init__(self, root, name, cells, transform):
        super(HPA_test, self).__init__()
        self.root = root
        self.name = name
        self.cell_list = cells
        self.transform = transform
        channel_list = ['blue', 'green', 'red', 'yellow']
        channels = [cv2.imread(os.path.join(root, 'test', f'{name}_{ch}.png'), cv2.IMREAD_UNCHANGED) for ch in channel_list]
        self.image = np.stack(channels, axis=-1)

    def __getitem__(self, index):
        # Get patch.
        encoded_mask, min_h, max_h, min_w, max_w = self.cell_list[index]
        sub_mask = coco_mask.decode(encoded_mask).astype(np.uint16)
        patch = (self.image * sub_mask)[min_h: max_h, min_w: max_w]
        patch = (patch / 65536).astype(np.float32)

        h, w = patch.shape[:2]
        delta = int(abs(h-w)/2)
        _pad_list = ((0,0),(delta, delta),(0,0)) if h > w else ((delta, delta),(0,0),(0,0))
        patch = np.pad(patch, _pad_list, constant_values=0)
        patch=cv2.resize(patch,(256,256))
        # Get mask string.
        rle_str = encoded_mask[0]['counts']
        binary_str = zlib.compress(rle_str, zlib.Z_BEST_COMPRESSION)
        mask_str = base64.b64encode(binary_str).decode()

#         if self.transform is not None:
#             patch = self.transform(patch)
        patch = np.array(patch, np.float32).transpose(2, 0, 1)
        return patch, mask_str

    def __len__(self):
        return len(self.cell_list)

In [None]:
class HPA_image_test(torch.utils.data.Dataset):
    ''' Dateset class for PyTorch dataloader.

    '''

    def __init__(self, root, names, transform):
        super(HPA_image_test, self).__init__()
        self.root = root
        self.name_list = names
        self.transform = transform

    def __getitem__(self, index):
        # Load channels.
        name = self.name_list[index]
        channel_list = ['blue', 'green', 'red', 'yellow']
        channels = [cv2.imread(os.path.join(self.root, 'test', f'{name}_{ch}.png'), cv2.IMREAD_UNCHANGED) for ch in
                    channel_list]

        # Get image.
        image = np.stack(channels, axis=-1)
        max_value = 256 ** ((image.dtype == np.uint16) + 1) - 1
        image = (image / max_value).astype(np.float32)

        if self.transform is not None:
            image = self.transform(image)
        return image, name

    def __len__(self):
        return len(self.name_list)

In [None]:
def cell_segmentation(df, data_dir, model_dir, save_dir):
    ''' Cell segmentation for HPA dataset.

    # Arguments
        data_dir  (str): Directory to load data.
        save_dir  (str): Directory to save cell-level masks.
        model_dir (str): Directory to load models.
        df  (DataFrame): Sameple Submission.
    '''
    # Load segmentation model.
    nuclei_model = os.path.join(model_dir, 'nuclei-model.pth')
    cell_model = os.path.join(model_dir, 'cell-model.pth')
    segmentator = cellsegmentator.CellSegmentator(
        nuclei_model,
        cell_model,
        scale_factor=0.25,
        device='cuda',
        padding=True,
        multi_channel_model=True,
    )

    # Load data list.
    image_list = list(df['ID'])
    cell_level_masks = {}

    # Set batch size.
    batch_size = 24

    for index in range(0, len(image_list), batch_size):
        # Initialize data path.
        sub_image_list = image_list[index: index + batch_size]
        mt = [os.path.join(data_dir, 'test', f'{img}_red.png') for img in sub_image_list]
        er = [img.replace('red', 'yellow') for img in mt]
        nu = [img.replace('red', 'blue') for img in mt]

        # Model inference.
        nuclei_segmentations = segmentator.pred_nuclei(nu)
        cell_segmentations = segmentator.pred_cells([mt, er, nu])

        for i, name in enumerate(sub_image_list):
            # Label cells.
            _, cell_mask = label_cell(nuclei_segmentations[i], cell_segmentations[i])

            # Save cell masks.
            cell_level_masks[name] = []
            for c in range(cell_mask.max()):
                # Get cell region.
                sub_mask = (cell_mask == (c + 1))
                h_pos, w_pos = np.where(sub_mask)
                min_h, max_h = h_pos.min(), h_pos.max()
                min_w, max_w = w_pos.min(), w_pos.max()

                # Encode mask.
                encoded_mask = coco_mask.encode(np.asfortranarray(np.expand_dims(sub_mask, axis=-1)))
                cell_level_masks[name].append([encoded_mask, min_h, max_h, min_w, max_w])

        # Display progress.
        total = (len(image_list) - 1) // batch_size + 1
        print(' Progress: %5d / %5d\r' % (index // batch_size + 1, total), end='')

    # Save results.
    torch.save(cell_level_masks, os.path.join(save_dir, 'test_cell_masks.t7'))

In [None]:
def image_classification(df, data_dir, save_dir):

    model = EfficientNet.from_name('efficientnet-b7', num_classes=18)
    model._change_in_channels(4)
    model = My_EModel(model, num_classes=18).cuda()
    model = torch.nn.DataParallel(model)

    # Load model weights.

    model.load_state_dict(torch.load(os.path.join('xxx.pth'))))

    model.eval()
    model = tta.ClassificationTTAWrapper(model, tta.aliases.d4_transform(), merge_mode='mean')

    # Define transforms for dataset.
    input_size = EfficientNet.get_image_size('efficientnet-b7')
    test_transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Resize((input_size, input_size)),
    ])

    name_list = list(df['ID'])
    test_set = HPA_image_test(data_dir, name_list, test_transform)
    test_loader = torch.utils.data.DataLoader(
        test_set,
        batch_size=16,
        shuffle=False,
        pin_memory=True,
        num_workers=4,
    )

    # Model inference.
    image_level_pred = {}
    with torch.no_grad():
        for batch_idx, (inputs, names) in enumerate(test_loader):
            # Forward propagation.
            inputs = inputs.cuda()
            outputs = model(inputs)
            predicts = outputs.sigmoid()

            # Record predictions.
            for predict, name in zip(predicts, names):
                predict = predict.cpu().numpy()
                predict = np.append(predict, 1 - predict.max())
                image_level_pred[name] = predict
                # image_level_pred[name] = predict

            # Display progress.
            print(' Progress: %5d / %5d\r' % (batch_idx + 1, len(test_loader)), end='')
    torch.save(image_level_pred, os.path.join(save_dir, 'xxx.t7'))

In [None]:
def cell_classification(df, data_dir, model_dir, save_dir):
    ''' Cell classification for HPA dataset.

    # Arguments
        data_dir  (str): Directory to load data.
        save_dir  (str): Directory to save submission file.
        model_dir (str): Directory to load models.
        df  (DataFrame): Sameple Submission.
    '''
    # Initialize classification model.
    model = Net(num_class=18)
    
    model = torch.nn.DataParallel(model)
    model=model.cuda()

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    # Load model weights.
    checkpoint = torch.load(r'/kaggle/input/cell-image-18-fold0/checkpoint_HPA_014_0.67812.t7')['model']

#     new_state_dict = OrderedDict()
#     for k,v in checkpoint.items():
#         name = k[7:]
#         new_state_dict[name] = v

    model.load_state_dict(checkpoint,strict=True)
#     model.to(device)
    model.eval()

    # Define transforms for dataset.
#     input_size = EfficientNet.get_image_size('efficientnet-b3')
    test_transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Resize((256, 256)), 
        # transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])

    # Load cell-level masks.
    cell_level_masks = torch.load(os.path.join(save_dir, 'test_cell_masks.t7'),map_location=device)

    with torch.no_grad():
        for index, row in df.iterrows():
            # Load cells.
            name = row.ID
            test_set = HPA_test(data_dir, name, cell_level_masks[name], test_transform)
            test_loader = torch.utils.data.DataLoader(
                test_set, 
                batch_size=128,
                shuffle=False,
                pin_memory=True,
                num_workers=16,
            )

            # Model inference.
            pred_str = []
            for batch_idx, (inputs, masks) in enumerate(test_loader):
                # Forward propagation.
                inputs = inputs.to(device)
                #pred_tmp = []
                #for ckpt in os.listdir(model_dir):
                #    checkpoint = torch.load(os.path.join(model_dir, ckpt),map_location=device)
                #    model.load_state_dict(checkpoint['model'])
                #    model = model.to(device)
                #    model.eval()
                #    outputs = model(inputs)
                #    pred_tmp.append(outputs.sigmoid())

                #predicts = torch.mean(torch.stack(pred_tmp, axis=1), axis=1)
                #predicts = torch.max(torch.stack(pred_tmp, axis=1), axis=1)[0]
                outputs = model(inputs)
                predicts = outputs.sigmoid()

                # Record predictions.
                for predict, mask in zip(predicts, masks):
                    predict = predict.tolist()
                    predict.append(1 - max(predict))
                    #pdp = pd.Series(predict)
                    #for c, score in pdp.nlargest(4).items():
                    for c, score in enumerate(predict):
                        pred_str.append(f' {c} {score} {mask}')
            df.loc[index, 'PredictionString'] = ' '.join(pred_str)
            print(' Progress: %5d / %5d\r' % (index + 1, len(df)), end='')

    # Save submission file.
    df.to_csv(os.path.join(save_dir, 'submission.csv'), index=False)
    print(df)

In [None]:
data_dir = '/kaggle/input/hpa-single-cell-image-classification/'
seg_dir  = '/kaggle/input/segmodel'
cls_dir  = '/kaggle/input/hpamodelseb4focal/'
save_dir = '/kaggle/working/'

df = pd.read_csv(os.path.join(data_dir, 'sample_submission.csv'))
if len(df) == 559:
    df = df[:4]
cell_segmentation(df, data_dir, seg_dir, save_dir)
image_classification(df, data_dir, cls_dir, save_dir)
print('ssue')
cell_classification(df, data_dir, cls_dir, save_dir)