In [1]:
# from apex import amp
import numpy as np # linear algebra
import pandas as pd
import os, cv2, gc
from PIL import Image
from torch.utils.data import Dataset, DataLoader, DistributedSampler
import torch, torchvision
from torch import nn, optim
import kornia as K
from torchvision import transforms
from torch import tensor
from functools import partial

import sys, random, cv2
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import cohen_kappa_score
import matplotlib.pyplot as plt
import torch.nn.functional as F

import scipy as sp
import functools, time
import itertools

from tqdm.auto import tqdm

In [2]:
N = 12
sz= 128
mean = torch.tensor([1-0.81, 1-0.610, 1-0.738])
std = torch.tensor([0.405, 0.512, 0.418])

def rgb2tensor(x):
    x = np.moveaxis(x, -1, 0)
    x = np.ascontiguousarray(x)
    return torch.from_numpy(x/255.)

def get_tiles(f):
    imgs = []
    for i in range(N):
        imgs.append(rgb2tensor(cv2.cvtColor(~cv2.imread(f'../input/panda-dataset-x2/{f}_{i}.png'), cv2.COLOR_BGR2RGB)))
    return torch.stack(imgs)

In [3]:
coefs = [[0.54798694, 2.35815373, 3.4727479,  3.91126344, 4.59170151],
          [0.70456434, 2.58822624, 3.52927984, 4.32791742, 4.6536504 ],
         [0.58389584, 1.95216901, 3.66551634, 4.04248727, 4.87116245],
         [0.55569171, 2.56664067, 3.30064876, 4.05469193, 4.58283954],
         [0.66421877, 2.62153281, 3.58990835, 3.83454367, 4.2873346 ]]

In [4]:
class ToBatches():
    def __call__(self, x):
        return x.view(-1, N, 3, sz, sz)
    
class squeezed():
    def __call__(self, x):
        return x.view(-1, 3, sz, sz)

In [5]:
train_aug = transforms.Compose([
                          squeezed(),  
                          K.augmentation.RandomAffine(20, shear= (-5, 5), scale=(0.95, 1.1)),
                          K.augmentation.RandomHorizontalFlip(p=0.3),
                          K.augmentation.RandomVerticalFlip(p=0.3),
#                           K.augmentation.RandomMotionBlur(5, (0, 15), 20, 1),
                          K.augmentation.RandomPerspective(0.2, 0.2),  
                          ToBatches()  
                    ])

aug2img = transforms.Compose([
                              K.tensor_to_image])


aug2img2 = transforms.Compose([
    K.augmentation.Denormalize(mean, std),
                              K.tensor_to_image])

primary_tfms = transforms.Compose([K.augmentation.Normalize(mean, std)])

In [6]:
class PandaDataset(Dataset):
    def __init__(self, df):
        self.image_id = df.image_id.values
        self.targs = df['isup_grade'].values
        
    def __getitem__(self, idx):
        return primary_tfms(get_tiles(self.image_id[idx])), self.image_id[idx]
    
    def __len__(self):
        return len(self.image_id)

In [7]:
def get_base():
    model = torchvision.models.resnet34(pretrained=False)
    return model

In [8]:
class Mish(nn.Module):
    def __init__(self):
        super().__init__()
        
    def forward(self, x):
        return x*(torch.tanh(F.softplus(x)))

def to_Mish(model):
    for child_name, child in model.named_children():
        if isinstance(child, nn.ReLU):
            setattr(model, child_name, Mish())
        else:
            to_Mish(child)

In [9]:
class AdaptiveConcatPool2d(nn.Module):
    def __init__(self, sz=1):
        super().__init__()
        self.output_size = sz
        self.ap = nn.AdaptiveAvgPool2d(sz)
        self.mp = nn.AdaptiveMaxPool2d(sz)
    def forward(self, x): return torch.cat([self.mp(x), self.ap(x)], 1)

In [10]:
class OptimizedRounder():
    def __init__(self):
        self.coef_ = 0

    def _kappa_loss(self, coef, X, y):
        X_p = np.copy(X)
        for i, pred in enumerate(X_p):
            if pred < coef[0]:
                X_p[i] = 0
            elif pred >= coef[0] and pred < coef[1]:
                X_p[i] = 1
            elif pred >= coef[1] and pred < coef[2]:
                X_p[i] = 2
            elif pred >= coef[2] and pred < coef[3]:
                X_p[i] = 3
            elif pred >= coef[3] and pred < coef[4]:
                X_p[i] = 4
            else:
                X_p[i] = 5

        ll = calc_score(y, X_p)
        return -ll

    def fit(self, X, y):
        loss_partial = partial(self._kappa_loss, X=X, y=y)
        initial_coef = [0.5, 1.5, 2.5, 3.5, 4.5]
        self.coef_ = sp.optimize.minimize(loss_partial, initial_coef, method='nelder-mead')

    def predict(self, X, coef):
        X_p = np.copy(X)
        for i, pred in enumerate(X_p):
            if pred < coef[0]:
                X_p[i] = 0
            elif pred >= coef[0] and pred < coef[1]:
                X_p[i] = 1
            elif pred >= coef[1] and pred < coef[2]:
                X_p[i] = 2
            elif pred >= coef[2] and pred < coef[3]:
                X_p[i] = 3
            elif pred >= coef[3] and pred < coef[4]:
                X_p[i] = 4
            else:
                X_p[i] = 5
        return X_p

    def coefficients(self):
        return self.coef_['x']

In [11]:
class Model(nn.Module):
    def __init__(self, base, n=6):
        super().__init__()
        m = base
        self.enc = nn.Sequential(*list(m.children())[:-2])       
        nc = list(m.children())[-1].in_features
        self.head = nn.Sequential(AdaptiveConcatPool2d(), nn.Flatten(),nn.Linear(2*nc,512),
                            Mish(),nn.BatchNorm1d(512), nn.Dropout(0.5),nn.Linear(512,n))
        
    def forward(self, x):
        n = x.shape[1]
        y1 = self.enc(x.view(-1, 3, sz, sz))
        s = y1.shape
        y1 = y1.view(-1,n,s[1],s[2],s[3]).permute(0,2,1,3,4).contiguous()\
          .view(-1,s[1],s[2]*n,s[3])
        return self.head(y1)

In [12]:
def get_dl(df, bs= 8):
    ds = PandaDataset(df)
    return DataLoader(ds, shuffle=False, batch_size=bs, num_workers=4, pin_memory=True)

def timer(f):
    @functools.wraps(f)
    def wrap_timer(*args, **kwargs):
        start_time = time.perf_counter()
        val = f(*args, **kwargs)
        end_time = time.perf_counter()
        run_time = end_time - start_time
        print(f'Time elapsed : {int(run_time//60)}m {run_time%60:.2f}s', end= " "*30+'\n')
        return val
    return wrap_timer

In [13]:
@timer
def predict(fold):
    preds_acc, img_ids = [], []
    device = torch.device('cuda' if torch.cuda.is_available else 'cpu')
    
    df = pd.read_pickle(f'../input/panda-remove-noise-1/valid_{fold}.pkl')
    dl = get_dl(df, bs=32)
    rounder = OptimizedRounder()
    
    model = Model(get_base(), 1)
    model.load_state_dict(torch.load(f'../input/panda-remove-noise-1/model_{fold}.pth'))
    model = model.to(device)
    print('loaded model')
    model.eval()
    with torch.no_grad():
        for bi, (xb, img_id) in enumerate(tqdm(dl)):
            xb = xb.to(device)
            img_ids += list(img_id)
            preds = model(xb.float())
            preds_acc.extend(preds.detach().to('cpu').numpy().tolist())
            
    preds = np.concatenate(preds_acc)
    coefficients = coefs[fold]
    final_preds = rounder.predict(preds, coefficients)
    df['cleaned'] = final_preds
    return df

In [14]:
fold0 = predict(0)

loaded model


HBox(children=(FloatProgress(value=0.0, max=67.0), HTML(value='')))


Time elapsed : 0m 46.84s                              


In [15]:
fold1 = predict(1)

loaded model


HBox(children=(FloatProgress(value=0.0, max=67.0), HTML(value='')))


Time elapsed : 0m 43.11s                              


In [16]:
fold2 = predict(2)

loaded model


HBox(children=(FloatProgress(value=0.0, max=67.0), HTML(value='')))


Time elapsed : 0m 43.72s                              


In [17]:
fold3 = predict(3)

loaded model


HBox(children=(FloatProgress(value=0.0, max=67.0), HTML(value='')))


Time elapsed : 0m 42.28s                              


In [18]:
fold4 = predict(4)

loaded model


HBox(children=(FloatProgress(value=0.0, max=67.0), HTML(value='')))


Time elapsed : 0m 42.94s                              


In [19]:
df = pd.concat([fold0, fold1, fold2, fold3, fold4])

In [20]:
df

Unnamed: 0,image_id,data_provider,isup_grade,gleason_score,cleaned
0,0005f7aaab2800f6170c399693a96917,0,0,0+0,0.0
1,000920ad0b612851f8e01bcc880d9b3d,0,0,0+0,1.0
2,0018ae58b01bdadc8e347995b69f99aa,1,4,4+4,4.0
3,001c62abd11fa4b57bf7a6c603a11bb9,0,4,4+4,4.0
4,001d865e65ef5d2579c190a0e0350d8f,0,0,0+0,0.0
...,...,...,...,...,...
10611,ffd2841373b39792ab0c84cccd066e31,1,0,negative,0.0
10612,ffdc59cd580a1468eac0e6a32dd1ff2d,1,5,4+5,5.0
10613,ffe06afd66a93258f8fabdef6044e181,1,0,negative,1.0
10614,ffe236a25d4cbed59438220799920749,1,2,3+4,3.0


In [21]:
df['difference'] = np.abs(df['isup_grade'] - df['cleaned'])

In [22]:
df['difference'].value_counts()

0.0    5071
1.0    3983
2.0    1050
3.0     355
4.0     130
5.0      27
Name: difference, dtype: int64

### let's remove all images and labels with prediction greater than 2 absolute difference

In [23]:
df = df[df['difference']<2]

In [24]:
df.shape

(9054, 6)

In [25]:
df.to_csv('cleaned_labels.csv')