In [None]:
#coding: utf-8

In [None]:
# constants
TILE_WIDTH = 256
TILE_HEIGHT = 256

In [None]:
import os
img_dir = '/kaggle/input/prostate-cancer-grade-assessment/test_images'

is_test_phase = os.path.exists(img_dir)

if is_test_phase:
    img_name_list = sorted(os.listdir(img_dir))
else:
    # テストデータが見えない (=submit前) 場合は訓練画像をいくつか取ってきて使う
    img_dir = os.path.join(os.path.dirname(img_dir), 'train_images')
    img_name_list = os.listdir(img_dir)[:5]
    
img_path_list = [os.path.join(img_dir, s) for s in img_name_list]

In [None]:
if not is_test_phase:
    print('\n'.join(img_path_list))

In [None]:
import time

In [None]:
import numpy as np
import cv2

def fill_black_holes(img):
    _img = img.copy()
    contour, _ = cv2.findContours(_img, cv2.RETR_CCOMP, cv2.CHAIN_APPROX_SIMPLE)
    for cnt in contour:
        cv2.drawContours(_img, [cnt], 0, 255, -1)
    return _img
    
def fill_white_holes(img):
    return 255 - fill_black_holes(255 - img)

def crop_tiles(original_img, tile_width, tile_height, is_test_phase=True):
    # 色々考えたのですが，マスクの作成はグレースケール変換して大津法で2値化して穴を埋める，という方法がいい気がしています．
    
    gray = cv2.cvtColor(original_img, cv2.COLOR_BGR2GRAY)
    thresh_val, thresh_img = cv2.threshold(gray, 0, 255, cv2.THRESH_OTSU)
    mask = fill_white_holes(thresh_img)
    
    target_region_points = np.where(mask == 0) # 注: ここでは背景が 255, 前景が 0 になっている (前景のほうが暗いので)
    upperleft_y, upperleft_x = (target_region_points[i].min() for i in range(2))
    lowerright_y, lowerright_x = (target_region_points[i].max() for i in range(2))
    
    img = original_img[upperleft_y : lowerright_y + 1, upperleft_x : lowerright_x + 1]
    mask = mask[upperleft_y : lowerright_y + 1, upperleft_x : lowerright_x + 1]
    
    h, w = img.shape[:2]
    pad_x = (tile_width - w % tile_width) % tile_width
    pad_y = (tile_height - h % tile_height) % tile_height
    
    img = np.pad(img, [[0, pad_y], [0, pad_x], [0, 0]], 'constant', constant_values=255)
    mask = np.pad(mask, [[0, pad_y], [0, pad_x]], 'constant', constant_values=0)
    
    num_tiles_y = img.shape[0] // tile_height
    num_tiles_x = img.shape[1] // tile_width
    tile_list = [None] * (num_tiles_y * num_tiles_x)
    
    # 予測の結果である2次元配列 (縦 num_tiles_y x 横 num_tiles_x) を，全行を横並びにして1次元にしたもの
    if not is_test_phase:
        prediction_list = [-1] * (num_tiles_y * num_tiles_x)
    
    tile_id = 0
    for tile_id_y in range(num_tiles_y):
        upperleft_y = tile_id_y * tile_height

        for tile_id_x in range(num_tiles_x):
            upperleft_x = tile_id_x * tile_width
            
            tile_img = img[upperleft_y : upperleft_y + tile_height, upperleft_x : upperleft_x + tile_width]
            tile_mask = mask[upperleft_y : upperleft_y + tile_height, upperleft_x : upperleft_x + tile_width]
            
            if np.count_nonzero(tile_mask) > int(tile_mask.size * 0.5):
                # 5割以上背景のタイルは無視
                if not is_test_phase:
                    prediction_list[tile_id_y * num_tiles_x + tile_id_x] = 0
            else:
                tile_list[tile_id] = tile_img
                tile_id += 1 
    
    if not is_test_phase:
        return np.array(tile_list[:tile_id]), prediction_list, (num_tiles_y, num_tiles_x)
    return np.array(tile_list[:tile_id])

In [None]:
import openslide

def load_img(tiff_path, slide_level=1):
    slide = openslide.OpenSlide(tiff_path)
    img = np.array(slide.read_region((0, 0), slide_level, slide.level_dimensions[slide_level]))
    img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
    return img

In [None]:
import torch

In [None]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') 

In [None]:
from torch import nn

import sys
sys.path.append('/kaggle/input/efficientnetpytorch/')
from efficientnet_pytorch.model import EfficientNet

class Enet(nn.Module):
    def __init__(self, model_name, num_classes, use_pretrained_models):
        super(Enet, self).__init__()
        self.enet = EfficientNet.from_pretrained(model_name) if use_pretrained_models else EfficientNet.from_name(model_name)
        # self.enet.load_state_dict(torch.load(pretrained_model[backbone]))
        self.enet._fc = nn.Linear(self.enet._fc.in_features, num_classes)

    def forward(self, x):
        return self.enet(x)

    def load_parameters(self, parameters_path):
        self.load_state_dict(torch.load(parameters_path, map_location=device))

In [None]:
model = Enet('efficientnet-b0', 5, False)
model.load_parameters('../input/saved-parameters/net_epoch_0009.pth')
model.to(device)
model.eval()

In [None]:
def gleason_map_to_isup(gmap):
    '''
        引数 gmap: 2次元の numpy 配列 (ndarray)．各要素は領域ごとの Gleason スコア (3-5) または 0 (背景)，1 (違う組織), 2 (良性組織)．
        出力 isup_grade: ISUP グレード (以下のサンプルでは適当に 0 にしている)
        
        処理内容: 
        (1) gmap の要素のうち，もっとも多く含まれている要素 (a) と2番めに多く含まれている要素 (b) を調べる．
        (2) a と b の値に応じて，ISUP グレードを計算して出力する．
            - a + b == 6 ならば 1
            - a == 3, b == 4 ならば 2
            - a == 4, b == 3 ならば 3
            - a + b == 8 ならば 4
            - a + b == 9 または 10 ならば 5
            - それ以外は 0
        
        ※(1) で，gmap の要素が一種類しかなければ，a と b は同じ値とする．
        ※(1) で，もっとも多く含まれている要素が 2 種類以上ある場合は，値が大きいものから 2 つ選んで a, b とする．
        ※(1) では 0 (背景)，1 (違う組織)，2 (良性組織) は無視する．gmap に 0, 1, 2 以外の要素が含まれなければ，(2) に関係なく 0 を返す．

    '''
    

    X = np.count_nonzero(gmap == 3)
    Y = np.count_nonzero(gmap == 4)
    Z = np.count_nonzero(gmap == 5)

    if X == 0 and Y == 0 and Z == 0:
        isup_grade = 0

    elif Y == 0 and Z == 0:
        isup_grade = 1

    elif X > Y > Z:
        isup_grade = 2

    elif Y >= X > Z:
        isup_grade = 3

    elif (Z >= Y and X > Y) or (X == 0 and Z == 0):
        isup_grade = 4

    elif (Z >= X and Y >= X) or (X == 0 and Y == 0):
        isup_grade = 5
    
    else:
        isup_grade = 0
    

    
    return isup_grade

In [None]:
img_ID_list = [] #keep the img_IDs
isup_G_list = [] #keep the isup_grades
ID_append = img_ID_list.append
isup_append = isup_G_list.append
# because List => pd.frame is faster
with torch.no_grad():
    for img_no, img_path in enumerate(img_path_list, start=1):
        img_ID = os.path.basename(img_path).split('.')[0] #The ID of the image added to submission.csv
        if not is_test_phase:
            print('Testing {} ... ({} / {})'.format(os.path.basename(img_path), img_no, len(img_path_list)))
            loop_start_time = time.time()

        img = load_img(img_path)
        _cropped_tiles = crop_tiles(img, tile_width=TILE_WIDTH, tile_height=TILE_HEIGHT, is_test_phase=is_test_phase)
        if is_test_phase:
            tiles = _cropped_tiles
        else:
            tiles, pred_list, img_shape = _cropped_tiles
        
        if len(tiles) == 0:
            isup_grade = 0
        else:
            tiles = tiles.astype(np.float) / 255.0
            tiles = ((tiles - [0.485, 0.456, 0.406]) / [0.229, 0.224, 0.225]).transpose([0, 3, 1, 2])
            tiles_tensor = torch.from_numpy(tiles)
            tiles_tensor = tiles_tensor.float().to(device)

            if tiles_tensor.shape[0] > 256:
                # 大きすぎるとメモリに乗らなくなったりしそうなので分割
                num_split = tiles_tensor.shape[0] // 256 + 1
                tensor_list = [None] * num_split

                for _i in range(num_split):
                    tensor_list[_i] = model(tiles_tensor[_i * 256 : min((_i + 1) * 256, tiles_tensor.shape[0])])

                output_tensor = torch.cat(tensor_list)
            else:
                output_tensor = model(tiles_tensor)

            output_prob = nn.Softmax(dim=1)(model(tiles_tensor)).cpu()
            _, classes = torch.max(output_prob, dim=1)
            classes.add(1)

            if is_test_phase:
                # 実は2次元配列にする必要はなくて，背景のところも含める必要はない (無視される) ので，テストのときは時間短縮のためにモデルの出力をそのまま入れる
                isup_grade = gleason_map_to_isup(classes.numpy())
            else:
                # デバッグ用に整形
                _i = 0
                for i, pred in enumerate(pred_list):
                    if pred != 0:
                        pred_list[i] = classes[_i].item()
                        _i += 1

                gleason_map = np.reshape(pred_list, img_shape)
                isup_grade = gleason_map_to_isup(gleason_map)
        
        ID_append(img_ID) #add ID to the list
        isup_append(isup_grade) # add isup grade to the list
        print('image_id: {}'.format(img_ID))
        print('ISUP grade: {}'.format(isup_grade))
            

        if not is_test_phase:
            print('Done. Elapsed time: {:.2f}[s]\n'.format(time.time() - loop_start_time))

converting two colums `img_ID_list` and `isup_G_list` to the pandas frame, next making the submission data

In [None]:
import pandas as pd
submission_data = pd.DataFrame(
                    data={'image_id': img_ID_list, 'isup_grade': isup_G_list},
                    columns=['image_id', 'isup_grade']
                )

submission_data.to_csv('submission.csv', index=False)
# if is_test_phase:
#     submission_data.to_csv('submission.csv', index=False)
# else:
#     print('submission_data\n {}'.format(submission_data.head()))