# RANZCR 1st Place Solution Inference

Hi all,

We're very exciting to writing this notebook and the summary of our solution here.

Our final pipeline has 4 training stages but the minimal pipeline I show here has only 2 stages.

The 5-fold model trained with this minimal pipeline is sufficient to achieve CV 0.968-0.969 and pub/pvt LB 0.972

I published 3 notebooks to demonstrate how our MINIMAL pipeline works.

* Stage1: Segmentation (https://www.kaggle.com/haqishen/ranzcr-1st-place-soluiton-seg-model-small-ver)
* Stage2: Classification (https://www.kaggle.com/haqishen/ranzcr-1st-place-soluiton-cls-model-small-ver)
* Inference (This notebook)

This notebook shows how we can inference my minimal pipeline to make a valid submission, using my 5 fold segmentation models (b1 w/ input size 1024) and 5 fold classification models (b1 w/ input size 512)


Our brief summary of winning solution: https://www.kaggle.com/c/ranzcr-clip-catheter-line-classification/discussion/226633



# Thanks!

In [None]:
import os
import sys
# add smp & timm into kernel w/o internet
sys.path = [
    '../input/smp20210127/segmentation_models.pytorch-master/segmentation_models.pytorch-master/',
    '../input/smp20210127/EfficientNet-PyTorch-master/EfficientNet-PyTorch-master',
    '../input/smp20210127/pytorch-image-models-master/pytorch-image-models-master',
    '../input/smp20210127/pretrained-models.pytorch-master/pretrained-models.pytorch-master',
] + sys.path

In [None]:
import time
import subprocess
import numpy as np
import pandas as pd
import cv2
import PIL.Image
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import torch
from torch.utils.data import DataLoader, Dataset
import torch.nn as nn
import torch.nn.functional as F
import albumentations
import segmentation_models_pytorch as smp
import timm
from tqdm.notebook import tqdm

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

In [None]:
sample_dir = '../input/50005fold'#修
data_dir = '../input/ranzcr-clip-catheter-line-classification'#修
model1_dir = '../input/ranzcr-public-model-qishen' #修
model2_dir = '../input/5000-no-flip-test' #修
num_workers = 2
image_size = 512
batch_size = 8

In [None]:
df_sub = pd.read_csv(os.path.join(sample_dir, 'a25000.csv')) #修
df_sub = df_sub.iloc[:100] #if df_sub.shape[0] == 10000 else df_sub #修

# Dataset

In [None]:
class RANZCRDatasetTest(Dataset):

    def __init__(self, df):

        self.df = df.reset_index(drop=True)

    def __len__(self):
        return self.df.shape[0]

    def __getitem__(self, index):
        row = self.df.iloc[index]
        image = cv2.imread(os.path.join(data_dir, 'train', row.StudyInstanceUID + '.jpg'))[:, :, ::-1] #修

        image1024 = cv2.resize(image ,(1024, 1024)).astype(np.float32).transpose(2, 0, 1) / 255.
        image512 = cv2.resize(image ,(512, 512)).astype(np.float32).transpose(2, 0, 1) / 255.

        return {
            '1024': torch.tensor(image1024),
            '512': torch.tensor(image512),
        }


dataset_test = RANZCRDatasetTest(df_sub)
test_loader = torch.utils.data.DataLoader(dataset_test, batch_size=batch_size, shuffle=False, num_workers=num_workers)

In [None]:
dataset_show = RANZCRDatasetTest(df_sub)
from pylab import rcParams
rcParams['figure.figsize'] = 20,10

f, axarr = plt.subplots(1,5)

for p in range(5):
    img = dataset_show[p]
    axarr[p].imshow(img['512'].transpose(0, 1).transpose(1,2))

# Model

In [None]:
class SegModel(nn.Module):
    def __init__(self, backbone):
        super(SegModel, self).__init__()
        self.seg = smp.UnetPlusPlus(encoder_name=backbone, encoder_weights=None, classes=2, activation=None)
        
    def forward(self,x):
        global_features = self.seg.encoder(x)
        seg_features = self.seg.decoder(*global_features)
        seg_features = self.seg.segmentation_head(seg_features)
        return seg_features


class enetv2(nn.Module):
    def __init__(self, backbone):
        super(enetv2, self).__init__()
        self.enet = timm.create_model(backbone, False)
        self.enet.conv_stem.weight = nn.Parameter(self.enet.conv_stem.weight.repeat(1,5//3+1,1,1)[:, :5])
        self.myfc = nn.Linear(self.enet.classifier.in_features, 12)
        self.enet.classifier = nn.Identity()
        
    def extract(self, x):
        return self.enet(x)
    
    def forward(self, x, mask):
        mask = F.interpolate(mask, x.shape[2])
        x = torch.cat([x, mask], 1)
        x = self.extract(x)
        x = self.myfc(x)
        return x

# Loading Models

In [None]:
enet_type_seg = 'timm-efficientnet-b1'
kernel_type_seg = 'unetb1_2cbce_1024T15tip_lr1e4_bs4_augv2_30epo'

enet_type_cls = 'tf_efficientnet_b1_ns'
kernel_type_cls = 'enetb1_5ch_512_lr3e4_bs32_30epo'

In [None]:
models_seg = []
for fold in range(5):
    model = SegModel(enet_type_seg)
    model = model.to(device)
    model_file = os.path.join(model1_dir, f'{kernel_type_seg}_best_fold{fold}.pth')
    model.load_state_dict(torch.load(model_file), strict=True)
    model.eval()
    models_seg.append(model)

models_cls = []
for fold in range(5):
    model = enetv2(enet_type_cls)
    model = model.to(device)
    model_file = os.path.join(model2_dir, f'{kernel_type_cls}_best_fold{fold}.pth')
    model.load_state_dict(torch.load(model_file), strict=True)
    model.eval()
    models_cls.append(model)

# Inference

In [None]:
PROBS = []
with torch.no_grad():
    for batch_id, data in tqdm(enumerate(test_loader), total=len(test_loader)):
        for k in data.keys():
            data[k] = data[k].cuda()
        mask = torch.stack([model(data['1024']).sigmoid() for model in models_seg], 0).mean(0)
        logits = torch.stack([model(data['512'], mask) for model in models_cls], 0)
        logits[:, :, :4] = logits[:, :, :4].softmax(2)
        logits[:, :, 4:] = logits[:, :, 4:].sigmoid()
        PROBS.append(logits.cpu())
PROBS = torch.cat(PROBS, 1)
PROBS = PROBS[:, :, [0,1,2,4,5,6,7,8,9,10,11]]
PROBS = PROBS.numpy()

# Check Distribution

In [None]:
df_sub[[
    'ETT - Abnormal',
    'ETT - Borderline',
    'ETT - Normal',
    'NGT - Abnormal',
    'NGT - Borderline',
    'NGT - Incompletely Imaged',
    'NGT - Normal',
    'CVC - Abnormal',
    'CVC - Borderline',
    'CVC - Normal',
    'Swan Ganz Catheter Present'
]] = PROBS.mean(0)

sns.distplot(df_sub[[
    'CVC - Abnormal',
    'CVC - Borderline',
    'CVC - Normal',
]])

# Rank Prediction & Submit

In [None]:

df_subs = [df_sub.copy() for _ in range(PROBS.shape[0])]
for i, this_sub in enumerate(df_subs):
    this_sub[[
        'ETT - Abnormal',
        'ETT - Borderline',
        'ETT - Normal',
        'NGT - Abnormal',
        'NGT - Borderline',
        'NGT - Incompletely Imaged',
        'NGT - Normal',
        'CVC - Abnormal',
        'CVC - Borderline',
        'CVC - Normal',
        'Swan Ganz Catheter Present'
    ]] = PROBS[i]

    this_sub[[
        'ETT - Abnormal',
        'ETT - Borderline',
        'ETT - Normal',
        'NGT - Abnormal',
        'NGT - Borderline',
        'NGT - Incompletely Imaged',
        'NGT - Normal',
        'CVC - Abnormal',
        'CVC - Borderline',
        'CVC - Normal',
        'Swan Ganz Catheter Present'
    ]] = this_sub[[
        'ETT - Abnormal',
        'ETT - Borderline',
        'ETT - Normal',
        'NGT - Abnormal',
        'NGT - Borderline',
        'NGT - Incompletely Imaged',
        'NGT - Normal',
        'CVC - Abnormal',
        'CVC - Borderline',
        'CVC - Normal',
        'Swan Ganz Catheter Present'
    ]].rank(pct=True)  # rank

df_sub[[
    'ETT - Abnormal',
    'ETT - Borderline',
    'ETT - Normal',
    'NGT - Abnormal',
    'NGT - Borderline',
    'NGT - Incompletely Imaged',
    'NGT - Normal',
    'CVC - Abnormal',
    'CVC - Borderline',
    'CVC - Normal',
    'Swan Ganz Catheter Present'
]] = np.stack([this_sub[[
        'ETT - Abnormal',
        'ETT - Borderline',
        'ETT - Normal',
        'NGT - Abnormal',
        'NGT - Borderline',
        'NGT - Incompletely Imaged',
        'NGT - Normal',
        'CVC - Abnormal',
        'CVC - Borderline',
        'CVC - Normal',
        'Swan Ganz Catheter Present'
    ]].values for this_sub in df_subs], 0).mean(0)  # mean


df_sub.to_csv('submission.csv', index=False)

In [None]:
target 