In [None]:
import sys
import os

import glob
from tqdm.notebook import tqdm
import shutil
from pathlib import Path
from openslide import OpenSlide
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from skimage.measure import block_reduce
import cv2.cv2 as cv2
from sys import getsizeof
import collections
import pickle

In [None]:
import tensorflow as tf
physical_devices = tf.config.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], True)

In [None]:
data_dir = '../input/prostate-cancer-grade-assessment'
df_train = pd.read_csv(os.path.join(data_dir, 'train.csv'))
df_test = pd.read_csv(os.path.join(data_dir, 'test.csv'))
df_sub = pd.read_csv(os.path.join(data_dir, 'sample_submission.csv'))

model_dir = '../input/panda-public-models'
image_folder = os.path.join(data_dir, 'test_images')
is_test = os.path.exists(image_folder)  # IF test_images is not exists, we will use some train images.
image_folder = image_folder if is_test else os.path.join(data_dir, 'train_images')

df = df_test if is_test else df_train.loc[:10]
df.head()

In [None]:
def roll(a,      # ND array
         window,      # rolling 2D window array
         dx=1,   # horizontal step, abscissa, number of columns
         dy=1):  # vertical step, ordinate, number of rows
    shape = (
        ((a.shape[0] - window[0]) // dy + 1),
        ((a.shape[1] - window[1]) // dx + 1),
        *window,
    )
    strides = (
        a.strides[0] * dy,
        a.strides[1] * dx,
        a.strides[0],
        a.strides[1],
    )
    return np.lib.stride_tricks.as_strided(a, shape=shape, strides=strides)


def reduce(image, block, stride):
    return roll(image, (block, block), stride, stride).mean((-1,-2))

def tile_filter(tile, provider):
    if provider[:3] == 'rad':
        if ((1.0 - tile / 255).mean(2) < 0.1).astype(np.float).mean() > 0.4:
            return False
        if ((tile / 255).mean(2) < 0.4).astype(np.float).mean() > 0.2:
            return False
    else:
        if ((1.0 - tile / 255).mean(2) < 0.2).astype(np.float).mean() > 0.3:
            return False
    return True

## Model

In [None]:
from tensorflow.keras.models import load_model
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras.preprocessing.image import img_to_array, load_img
model = load_model('../input/tilenet/checkout_panda_v3.hdf5')

In [None]:
output = [model.output, model.layers[-2].output]
K = output[1].shape[1:]
from tensorflow.keras.models import Model
mmodel = Model(inputs=model.input, outputs = output)
print(f'# features: {K}')

## WSI Model

In [None]:
from typing import Type
from torch import nn, Tensor


class Conv2dBlock(nn.Module):
    def __init__(self, in_channels: int, out_channels: int, layers: int = 2,
                 activation: Type[nn.Module] = nn.ReLU, kernel_size: int = 3,
                 batchnorm: bool = True):
        super(Conv2dBlock, self).__init__()
        self.blocks = []

        for i in range(layers):
            block_layers = [
                nn.Conv2d((in_channels if i == 0 else out_channels),
                          out_channels, kernel_size, padding=kernel_size // 2),
                activation()
            ]
            if batchnorm:
                block_layers.insert(1, nn.BatchNorm2d(out_channels))
            block = nn.Sequential(*block_layers)
            self.blocks.append(block)

        self.main = nn.Sequential(*self.blocks)

    def forward(self, inputs: Tensor) -> Tensor:
        """Run layer"""
        return self.main(inputs)


In [None]:
from torch.nn.functional import pad
import torch
from typing import Union, Tuple, Iterable

class PandaModel(nn.Module):
    def __init__(self, n_channels: int, n_classes: Union[int, Tuple[int, ...]]):
        super().__init__()
        self.encoder = nn.Sequential(
            Conv2dBlock(n_channels, 1024),
         #   nn.MaxPool2d(2),
            Conv2dBlock(1024, 512),
         #   nn.MaxPool2d(2),
            Conv2dBlock(512, 256),
         #   nn.MaxPool2d(2),
            Conv2dBlock(256, 128),
        )

        def make_head(n: int):
            return nn.Sequential(
                nn.Linear(128, 32),
                nn.ReLU(),
                nn.Linear(32, 16),
                nn.ReLU(),
                nn.Linear(16, n)
            )

        if isinstance(n_classes, Iterable):
            self.heads = nn.ModuleList([make_head(n) for n in n_classes])
        else:
            self.heads = nn.ModuleList([make_head(n_classes)])

    def forward(self, x):
        d = self.encoder(x).detach()
        d = d.max(3).values.max(2).values.detach()
        res = [h(d).detach() for h in self.heads]
        return res if len(res) > 1 else res[0]

In [None]:
wsi_model = PandaModel(2048, 6)
wsi_model.load_state_dict(torch.load('../input/wsinet/best_qp2_723.pth', map_location='cpu')['model'])

## Normalization

In [None]:
def color_deconvolution(im_rgb, w, I_0=None):
    if np.linalg.norm(w[:, 2]) <= 1e-16:
        wc = complement_stain_matrix(w)
    else:
        wc = w

    wc = normalize(wc)
    Q = np.linalg.inv(wc)
    m = convert_image_to_matrix(im_rgb)[:3]
    sda_fwd = rgb_to_sda(m, I_0)
    sda_deconv = np.dot(Q, sda_fwd)
    sda_inv = sda_to_rgb(sda_deconv, 255 if I_0 is not None else None)

    # reshape output
    StainsFloat = convert_matrix_to_image(sda_inv, im_rgb.shape)

    # transform type
    Stains = StainsFloat.clip(0, 255).astype(np.uint8)

    # return
    Unmixed = collections.namedtuple('Unmixed',
                                     ['Stains', 'StainsFloat', 'Wc'])
    Output = Unmixed(Stains, StainsFloat, wc)

    return Output

In [None]:
stain_color_map = {
    'hematoxylin': [0.65, 0.70, 0.29],
    'eosin':       [0.07, 0.99, 0.11],
    'dab':         [0.27, 0.57, 0.78],
    'null':        [0.0, 0.0, 0.0]
}

def stain_unmixing_routine(
        im_rgb, stains=None, stain_unmixing_method='macenko_pca',
        stain_unmixing_params=None, mask_out=None):
    stains = ['hematoxylin', 'eosin'] if stains is None else stains
    stain_unmixing_params = {} if stain_unmixing_params is None else stain_unmixing_params

    stain_unmixing_method = stain_unmixing_method.lower()

    if stain_unmixing_method == 'macenko_pca':
        stain_deconvolution = rgb_separate_stains_macenko_pca
        stain_unmixing_params['I_0'] = None
        stain_unmixing_params['mask_out'] = mask_out

    elif stain_unmixing_method == 'xu_snmf':
        stain_deconvolution = rgb_separate_stains_xu_snmf
        stain_unmixing_params['I_0'] = None
        assert mask_out is None, "Masking is not yet implemented in xu_snmf."

    else:
        raise ValueError("Unknown/Unimplemented deconvolution method.")

    # get W_source
    W_source = stain_deconvolution(im_rgb, **stain_unmixing_params)

    # If Macenco method, reorder channels in W_target and W_source as desired.
    # This is actually a necessary step in macenko's method since we're
    # not guaranteed the order of the different stains.
    if stain_unmixing_method == 'macenko_pca':
        W_source = _reorder_stains(W_source, stains=stains)

    return W_source


def color_deconvolution_routine(
        im_rgb, W_source=None, mask_out=None, **kwargs):
    # get W_source if not provided
    if W_source is None:
        W_source = stain_unmixing_routine(im_rgb, mask_out=mask_out, **kwargs)

    # deconvolve
    Stains, StainsFloat, wc = color_deconvolution(im_rgb, w=W_source, I_0=None)

    # mask out (keep in mind, image is inverted)
    if mask_out is not None:
        for i in range(3):
            Stains[..., i][mask_out] = 255
            StainsFloat[..., i][mask_out] = 255.

    return Stains, StainsFloat, wc

In [None]:
def find_stain_index(reference, w):
    dot_products = np.dot(reference, w)
    return np.argmax(np.abs(dot_products))

In [None]:
def _reorder_stains(W, stains=None):
    stains = ['hematoxylin', 'eosin'] if stains is None else stains

    assert len(stains) == 2, "Only two-stain matrices are supported for now."

    def _get_channel_order(W):
        first = find_stain_index(stain_color_map[stains[0]], W)
        second = 1 - first
        # If 2 stains, third "stain" is cross product of 1st 2 channels
        # calculated using complement_stain_matrix()
        third = 2
        return first, second, third

    def _ordered_stack(mat, order):
        return np.stack([mat[..., j] for j in order], -1)

    return _ordered_stack(W, _get_channel_order(W))

In [None]:
def convert_image_to_matrix(im):
    if im.ndim == 2:
        return im

    return im.reshape((-1, im.shape[-1])).T

def convert_matrix_to_image(m, shape):
    if len(shape) == 2:
        return m

    return m.T.reshape(shape[:-1] + (m.shape[0],))

import numpy

def get_principal_components(m):
    return numpy.linalg.svd(m.astype(float), full_matrices=False)[0].astype(m.dtype)


def magnitude(m):
    return numpy.sqrt((m ** 2).sum(0))


def normalize(m):
    return m / magnitude(m)

In [None]:
def rgb_to_sda(im_rgb, I_0, allow_negatives=False):
    is_matrix = im_rgb.ndim == 2
    if is_matrix:
        im_rgb = im_rgb.T

    if I_0 is None:  # rgb_to_od compatibility
        im_rgb = im_rgb.astype(float) + 1
        I_0 = 256

    if not allow_negatives:
        im_rgb = np.minimum(im_rgb, I_0)

    im_sda = -np.log(im_rgb/(1.*I_0)) * 255/np.log(I_0)
    return im_sda.T if is_matrix else im_sda

def sda_to_rgb(im_sda, I_0):
    is_matrix = im_sda.ndim == 2
    if is_matrix:
        im_sda = im_sda.T

    od = I_0 is None
    if od:  # od_to_rgb compatibility
        I_0 = 256

    im_rgb = I_0 ** (1 - im_sda / 255.)
    return (im_rgb.T if is_matrix else im_rgb) - od

def exclude_nonfinite(m):
    return m[:, np.isfinite(m).all(axis=0)]

In [None]:
def color_convolution(im_stains, w, I_0=None):
    m = convert_image_to_matrix(im_stains)
    sda_fwd = rgb_to_sda(m, 255 if I_0 is not None else None,allow_negatives=True)
    sda_conv = np.dot(w, sda_fwd)
    sda_inv = sda_to_rgb(sda_conv, I_0)

    # reshape output, transform type
    im_rgb = (convert_matrix_to_image(sda_inv, im_stains.shape).clip(0, 255).astype(np.uint8))

    return im_rgb

In [None]:
def complement_stain_matrix(w):
    stain0 = w[:, 0]
    stain1 = w[:, 1]
    stain2 = np.cross(stain0, stain1)
    # Normalize new vector to have unit norm
    return np.array([stain0, stain1, stain2 / np.linalg.norm(stain2)]).T

In [None]:
def rgb_separate_stains_macenko_pca(im_rgb, I_0, *args, **kwargs):
    im_sda = rgb_to_sda(im_rgb, I_0)
    return separate_stains_macenko_pca(im_sda, *args, **kwargs)

In [None]:
def separate_stains_macenko_pca(
        im_sda, minimum_magnitude=16, min_angle_percentile=0.01,
        max_angle_percentile=0.99, mask_out=None):
    m = convert_image_to_matrix(im_sda)

    # mask out irrelevant values
    if mask_out is not None:
        keep_mask = numpy.equal(mask_out[..., None], False)
        keep_mask = numpy.tile(keep_mask, (1, 1, 3))
        keep_mask = convert_image_to_matrix(keep_mask)
        m = m[:, keep_mask.all(axis=0)]

    # get rid of NANs and infinities
    m = exclude_nonfinite(m)

    # Principal components matrix
    pcs = get_principal_components(m)
    # Input pixels projected into the PCA plane
    proj = pcs.T[:-1].dot(m)
    # Pixels above the magnitude threshold
    filt = proj[:, magnitude(proj) > minimum_magnitude]
    # The "angles"
    angles = _get_angles(filt)

    # The stain vectors

    def get_percentile_vector(p):
        return pcs[:, :-1].dot(filt[:, argpercentile(angles, p)])

    min_v = get_percentile_vector(min_angle_percentile)
    max_v = get_percentile_vector(max_angle_percentile)

    # The stain matrix
    w = complement_stain_matrix(normalize(numpy.array([min_v, max_v]).T))
    return w


def _get_angles(m):
    m = normalize(m)
    return (1 - m[1]) * numpy.sign(m[0])

def argpercentile(arr, p):
    """Calculate index in arr of element nearest the pth percentile."""
    # Index corresponding to percentile
    i = int(p * arr.size + 0.5)
    return numpy.argpartition(arr, i)[i]

In [None]:
stain_unmixing_routine_params = {
    'stains': ['hematoxylin', 'eosin'],
    'stain_unmixing_method': 'macenko_pca',
}

def dbn(im_src, W_source=None, W_target=None, im_target=None,stains=None, mask_out=None, stain_unmixing_routine_params=None):
    stains = ['hematoxylin', 'eosin'] if stains is None else stains
    stain_unmixing_routine_params = (
        {} if stain_unmixing_routine_params is None else
        stain_unmixing_routine_params)
    for k in ['W_source', 'mask_out']:
        assert k not in stain_unmixing_routine_params.keys(), \
            "%s must be provided as a separate parameter." % k

    # find stains matrix from source image
    stain_unmixing_routine_params['stains'] = stains
    _, StainsFloat, _ = color_deconvolution_routine(
        im_src, W_source=W_source, mask_out=mask_out,
        **stain_unmixing_routine_params)

    # Get W_target

    if all(j is None for j in [W_target, im_target]):
        # Normalize to 'ideal' stain matrix if none is provided
        W_target = np.array(
            [stain_color_map[stains[0]], stain_color_map[stains[1]]]).T
        W_target = complement_stain_matrix(W_target)

    elif im_target is not None:
        # Get W_target from target image
        W_target = stain_unmixing_routine(
            im_target, **stain_unmixing_routine_params)

    # Convolve source image StainsFloat with W_target
    im_src_normalized = color_convolution(StainsFloat, W_target)

    # return masked values using unnormalized image
    if mask_out is not None:
        keep_mask = np.not_equal(mask_out, True)
        for i in range(3):
            original = im_src[:, :, i].copy()
            new = im_src_normalized[:, :, i].copy()
            original[keep_mask] = 0
            new[mask_out] = 0
            im_src_normalized[:, :, i] = new + original

    return im_src_normalized

## Predict

In [None]:
LEVEL = 1
TILE_SIZE = 256

res_isup = np.zeros(len(df), dtype=np.int)

In [None]:
wsi_model = wsi_model.cuda()

In [None]:
import skimage.io
bi = None
s = None
tiles = None
for i, row in tqdm(df.iterrows(), total=len(df)):
    try:
        image_id = row['image_id']
        slide = skimage.io.MultiImage(f'{image_folder}/{image_id}.tiff', conserve_memory=False)[1]#OpenSlide(f'{image_folder}/{image_id}.tiff')
        size = slide.shape[:2]
        #size = slide.level_dimensions[2]
        #downscale = slide.level_downsamples[2] / slide.level_downsamples[LEVEL]
        #pos_downscale = slide.level_downsamples[LEVEL]

        ys = np.arange(0,size[0],TILE_SIZE)
        xs = np.arange(0,size[1],TILE_SIZE)
        ty = len(ys)
        tx = len(xs)
        ys, xs = np.meshgrid(ys,xs)
        ys = ys.reshape(-1)
        xs = xs.reshape(-1)
        feat_image = np.zeros((ty, tx, K[0]))
        
        tss = []
        for y, x in zip(ys, xs):
            tile = slide[y:y+TILE_SIZE, x:x+TILE_SIZE]
            if not tile_filter(tile, row['data_provider']):
                continue

            mask_out = ((1.0 - tile / 255).mean(2) <  0.2)
            tissue_rgb_normalized_kar = dbn(tile,
                                            stain_unmixing_routine_params=stain_unmixing_routine_params,
                                            mask_out=mask_out)
            p, f = mmodel(preprocess_input(tissue_rgb_normalized_kar[None]))
            feat_image[y // TILE_SIZE, x // TILE_SIZE] = f[0]
            tss.append(tile)
            
        if bi is None or (bi == 0).mean() > (feat_image == 0).mean():
            bi = feat_image
            sss = slide
            tiles = tss
            print('CLICK')

        with torch.no_grad():  
            feats = torch.tensor(feat_image.astype(np.float32)).permute(2, 0, 1).cuda()
            s = feats.shape[1:]
            s = [16 - s[0], 16 - s[1]]
            if s[0] < 0:
                s[0] = 0
            if s[1] < 0:
                s[1] = 0
            feats = pad(feats, [s[1] // 2, (s[1] + 1) // 2, s[0] // 2, (s[0] + 1) // 2, ]).detach()
            res = wsi_model(feats[None]).cpu()
            res = res.detach()[0].softmax(0)
            print(res)
            isup_score = res.argmax().item()
            res_isup[i] = isup_score
    except:
        pass

In [None]:
plt.imshow(sss)
cv2.imwrite('3_wsi.png', cv2.cvtColor(sss, cv2.COLOR_RGB2BGR))

k = 0
for t in tiles:
    cv2.imwrite(f'3_tiles_{k}.png', cv2.cvtColor(t, cv2.COLOR_RGB2BGR))
    k+=1

In [None]:
m_ind = (bi == 0).mean((0,1))
idxs = np.argsort(m_ind)[:3]

k = 0
for i in idxs:
    fig = plt.figure(frameon=False)
    ax = plt.Axes(fig, [0., 0., 1., 1.])
    ax.set_axis_off()
    fig.add_axes(ax)
    ax.imshow(bi[...,i], cmap='inferno')
    fig.savefig(f'3_features_{k}.png')
    k+=1

# Prediction

In [None]:
df['isup_grade'] = np.array(res_isup, dtype=np.int)
df[['image_id', 'isup_grade']].to_csv('submission.csv', index=False)
print(df.head())
print()
print(df.isup_grade.value_counts())