Credits

https://www.kaggle.com/ammarali32/resnet200d-inference-single-model-lb-96-5

https://www.kaggle.com/ammarali32/seresnet152d-inference-single-model-lb-96-2

https://www.kaggle.com/underwearfitting/resnet200d-public-benchmark-2xtta-lb0-965

https://www.kaggle.com/ttahara/ranzcr-multi-head-model-inference/data?scriptVersionId=55373372&select=fold0


In [None]:
import os
import sys
sys.path.append('../input/pytorch-images-seresnet')

import os
import math
import time
import random
import shutil
from pathlib import Path
from contextlib import contextmanager
from collections import defaultdict, Counter
import gc
import scipy as sp
import numpy as np
import pandas as pd

from tqdm.auto import tqdm
from functools import partial

import cv2
from PIL import Image

from matplotlib import pyplot as plt

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam, SGD
import torchvision.models as models
from torch.nn.parameter import Parameter
from torch.utils.data import DataLoader, Dataset
import albumentations
from albumentations import *
from albumentations.pytorch import ToTensorV2


import timm

from torch.cuda.amp import autocast, GradScaler

import warnings 
warnings.filterwarnings('ignore')

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
BATCH_SIZE = 128
TEST_PATH = '../input/ranzcr-clip-catheter-line-classification/test'

In [None]:
test = pd.read_csv('../input/ranzcr-clip-catheter-line-classification/sample_submission.csv')

In [None]:
class TestDataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df
        self.file_names = df['StudyInstanceUID'].values
        self.transform = transform
        
    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        file_name = self.file_names[idx]
        file_path = f'{TEST_PATH}/{file_name}.jpg'
        image = cv2.imread(file_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        if self.transform:
            augmented = self.transform(image=image)
            image = augmented['image']
        return image

In [None]:
def get_transforms(image_size=640):
        return Compose([
            Resize(image_size, image_size),
            Normalize(),
            ToTensorV2(),
        ])

In [None]:
class ResNet200D(nn.Module):
    def __init__(self, model_name='resnet200d_320'):
        super().__init__()
        self.model = timm.create_model(model_name, pretrained=False)
        n_features = self.model.fc.in_features
        self.model.global_pool = nn.Identity()
        self.model.fc = nn.Identity()
        self.pooling = nn.AdaptiveAvgPool2d(1)
        self.fc = nn.Linear(n_features, 11)

    def forward(self, x):
        bs = x.size(0)
        features = self.model(x)
        pooled_features = self.pooling(features).view(bs, -1)
        output = self.fc(pooled_features)
        return output

class SeResNet152D(nn.Module):
    def __init__(self, model_name='seresnet152d_320'):
        super().__init__()
        self.model = timm.create_model(model_name, pretrained=False)
        n_features = self.model.fc.in_features
        self.model.global_pool = nn.Identity()
        self.model.fc = nn.Identity()
        self.pooling = nn.AdaptiveAvgPool2d(1)
        self.fc = nn.Linear(n_features, 11)

    def forward(self, x):
        bs = x.size(0)
        features = self.model(x)
        pooled_features = self.pooling(features).view(bs, -1)
        output = self.fc(pooled_features)
        return output
    
class RANZCRResNet200D(nn.Module):
    def __init__(self, model_name='resnet200d', out_dim=11, pretrained=False):
        super().__init__()
        self.model = timm.create_model(model_name, pretrained=False)
        n_features = self.model.fc.in_features
        self.model.global_pool = nn.Identity()
        self.model.fc = nn.Identity()
        self.pooling = nn.AdaptiveAvgPool2d(1)
        self.fc = nn.Linear(n_features, out_dim)

    def forward(self, x):
        bs = x.size(0)
        features = self.model(x)
        pooled_features = self.pooling(features).view(bs, -1)
        output = self.fc(pooled_features)
        return output

In [None]:
def inference(models, test_loader, device):
    tk0 = tqdm(enumerate(test_loader), total=len(test_loader))
    probs = []
    for i, (images) in tk0:
        images = images.to(device)
        avg_preds = []
        for model in models:
            with torch.no_grad():
                y_preds1 = model(images)
                y_preds2 = model(images.flip(-1))
            y_preds = (y_preds1.sigmoid().to('cpu').numpy() + y_preds2.sigmoid().to('cpu').numpy()) / 2
            avg_preds.append(y_preds)
        avg_preds = np.mean(avg_preds, axis=0)
        probs.append(avg_preds)
    probs = np.concatenate(probs)
    return probs

In [None]:
models200D = []
model = ResNet200D()
model.load_state_dict(torch.load("../input/resnet200d-public/resnet200d_320_CV9632.pth")['model'])
model.eval()
model.to(device)
models200D.append(model)

models200D_2 = []
model = RANZCRResNet200D()
model.load_state_dict(torch.load("../input/resnet200d-baseline-benchmark-public/resnet200d_fold0_cv953.pth", map_location='cuda:0'))
model.eval()
model.to(device)
models200D_2.append(model)

model = RANZCRResNet200D()
model.load_state_dict(torch.load("../input/resnet200d-baseline-benchmark-public/resnet200d_fold1_cv955.pth", map_location='cuda:0'))
model.eval()
model.to(device)
models200D_2.append(model)

model = RANZCRResNet200D()
model.load_state_dict(torch.load("../input/resnet200d-baseline-benchmark-public/resnet200d_fold2_cv955.pth", map_location='cuda:0'))
model.eval()
model.to(device)
models200D_2.append(model)

model = RANZCRResNet200D()
model.load_state_dict(torch.load("../input/resnet200d-baseline-benchmark-public/resnet200d_fold3_cv957.pth", map_location='cuda:0'))
model.eval()
model.to(device)
models200D_2.append(model)

model = RANZCRResNet200D()
model.load_state_dict(torch.load("../input/resnet200d-baseline-benchmark-public/resnet200d_fold4_cv954.pth", map_location='cuda:0'))
model.eval()
model.to(device)
models200D_2.append(model)

models152D = []
model = SeResNet152D()
model.load_state_dict(torch.load('../input/seresnet152d-cv9615/seresnet152d_320_CV96.15.pth')['model'])
model.eval()
model.to(device)
models152D.append(model)

In [None]:
test_dataset_512 = TestDataset(test, transform=get_transforms(image_size=512))
test_loader_512 = DataLoader(test_dataset_512, batch_size=BATCH_SIZE, shuffle=False, num_workers=4 , pin_memory=True)

test_dataset_640 = TestDataset(test, transform=get_transforms(image_size=640))
test_loader_640 = DataLoader(test_dataset_640, batch_size=BATCH_SIZE, shuffle=False, num_workers=4 , pin_memory=True)

predictions200d = inference(models200D, test_loader_640, device)
predictions200d_2 = inference(models200D_2, test_loader_512, device)
predictions152d = inference(models152D, test_loader_640, device)

In [None]:
del models200D
del models200D_2
del models152D
gc.collect()

In [None]:
predictions200d.shape

https://www.kaggle.com/ttahara/ranzcr-multi-head-model-inference/data?scriptVersionId=55373372

In [None]:
from joblib import Parallel, delayed
from pathlib import Path
import typing as tp

In [None]:
ROOT = Path.cwd().parent
INPUT = ROOT / "input"
OUTPUT = ROOT / "output"
DATA = INPUT / "ranzcr-clip-catheter-line-classification"
TRAIN = DATA / "train"
TEST = DATA / "test"


TRAINED_MODEL = INPUT / "ranzcr-clip-weights-for-multi-head-model-v2"
TMP = ROOT / "tmp"
TMP.mkdir(exist_ok=True)

RANDAM_SEED = 1086
N_CLASSES = 11
FOLDS = [0, 1, 2, 3, 4]
N_FOLD = len(FOLDS)
IMAGE_SIZE = (512, 512)

CONVERT_TO_RANK = False
FAST_COMMIT = False

CLASSES = [
    'ETT - Abnormal',
    'ETT - Borderline',
    'ETT - Normal',
    'NGT - Abnormal',
    'NGT - Borderline',
    'NGT - Incompletely Imaged',
    'NGT - Normal',
    'CVC - Abnormal',
    'CVC - Borderline',
    'CVC - Normal',
    'Swan Ganz Catheter Present'
]

In [None]:
smpl_sub =  pd.read_csv(DATA / "sample_submission.csv")

In [None]:
def resize_images(img_id, input_dir, output_dir, resize_to=(512, 512), ext="png"):
    img_path = input_dir / f"{img_id}.jpg"
    save_path = output_dir / f"{img_id}.{ext}"
    
    img = cv2.imread(str(img_path), cv2.IMREAD_GRAYSCALE)
    img = cv2.resize(img, resize_to)
    cv2.imwrite(str(save_path), img, )

TEST_RESIZED = TMP / "test_{0}x{1}".format(*IMAGE_SIZE)
TEST_RESIZED.mkdir(exist_ok=True)
TEST_RESIZED

_ = Parallel(n_jobs=2, verbose=5)([
    delayed(resize_images)(img_id, TEST, TEST_RESIZED, IMAGE_SIZE, "png")
    for img_id in smpl_sub.StudyInstanceUID.values
])

In [None]:
def get_activation(activ_name: str="relu"):
    """"""
    act_dict = {
        "relu": nn.ReLU(inplace=True),
        "tanh": nn.Tanh(),
        "sigmoid": nn.Sigmoid(),
        "identity": nn.Identity()}
    if activ_name in act_dict:
        return act_dict[activ_name]
    else:
        raise NotImplementedError
        

class Conv2dBNActiv(nn.Module):
    """Conv2d -> (BN ->) -> Activation"""
    
    def __init__(
        self, in_channels: int, out_channels: int,
        kernel_size: int, stride: int=1, padding: int=0,
        bias: bool=False, use_bn: bool=True, activ: str="relu"
    ):
        """"""
        super(Conv2dBNActiv, self).__init__()
        layers = []
        layers.append(nn.Conv2d(
            in_channels, out_channels,
            kernel_size, stride, padding, bias=bias))
        if use_bn:
            layers.append(nn.BatchNorm2d(out_channels))
            
        layers.append(get_activation(activ))
        self.layers = nn.Sequential(*layers)
        
    def forward(self, x):
        """Forward"""
        return self.layers(x)
        

class SSEBlock(nn.Module):
    """channel `S`queeze and `s`patial `E`xcitation Block."""

    def __init__(self, in_channels: int):
        """Initialize."""
        super(SSEBlock, self).__init__()
        self.channel_squeeze = nn.Conv2d(
            in_channels=in_channels, out_channels=1,
            kernel_size=1, stride=1, padding=0, bias=False)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        """Forward."""
        # # x: (bs, ch, h, w) => h: (bs, 1, h, w)
        h = self.sigmoid(self.channel_squeeze(x))
        # # x, h => return: (bs, ch, h, w)
        return x * h
    
    
class SpatialAttentionBlock(nn.Module):
    """Spatial Attention for (C, H, W) feature maps"""
    
    def __init__(
        self, in_channels: int,
        out_channels_list: tp.List[int],
    ):
        """Initialize"""
        super(SpatialAttentionBlock, self).__init__()
        self.n_layers = len(out_channels_list)
        channels_list = [in_channels] + out_channels_list
        assert self.n_layers > 0
        assert channels_list[-1] == 1
        
        for i in range(self.n_layers - 1):
            in_chs, out_chs = channels_list[i: i + 2]
            layer = Conv2dBNActiv(in_chs, out_chs, 3, 1, 1, activ="relu")
            setattr(self, f"conv{i + 1}", layer)
            
        in_chs, out_chs = channels_list[-2:]
        layer = Conv2dBNActiv(in_chs, out_chs, 3, 1, 1, activ="sigmoid")
        setattr(self, f"conv{self.n_layers}", layer)
    
    def forward(self, x):
        """Forward"""
        h = x
        for i in range(self.n_layers):
            h = getattr(self, f"conv{i + 1}")(h)
            
        h = h * x
        return h

In [None]:
class MultiHeadResNet200D(nn.Module):
    
    def __init__(
        self, out_dims_head: tp.List[int]=[3, 4, 3, 1], pretrained=False
    ):
        """"""
        self.base_name = "resnet200d_320"
        self.n_heads = len(out_dims_head)
        super(MultiHeadResNet200D, self).__init__()
        
        # # load base model
        base_model = timm.create_model(
            self.base_name, num_classes=sum(out_dims_head), pretrained=False)
        in_features = base_model.num_features
        
        if pretrained:
            pretrained_model_path = '../input/startingpointschestx/resnet200d_320_chestx.pth'
            state_dict = dict()
            for k, v in torch.load(pretrained_model_path, map_location='cpu')["model"].items():
                if k[:6] == "model.":
                    k = k.replace("model.", "")
                state_dict[k] = v
            base_model.load_state_dict(state_dict)
        
        # # remove global pooling and head classifier
        base_model.reset_classifier(0, '')
        
        # # Shared CNN Bacbone
        self.backbone = base_model
        
        # # Multi Heads.
        for i, out_dim in enumerate(out_dims_head):
            layer_name = f"head_{i}"
            layer = nn.Sequential(
                SpatialAttentionBlock(in_features, [64, 32, 16, 1]),
                nn.AdaptiveAvgPool2d(output_size=1),
                nn.Flatten(start_dim=1),
                nn.Linear(in_features, in_features),
                nn.ReLU(inplace=True),
                nn.Dropout(0.5),
                nn.Linear(in_features, out_dim))
            setattr(self, layer_name, layer)

    def forward(self, x):
        """"""
        h = self.backbone(x)
        hs = [
            getattr(self, f"head_{i}")(h) for i in range(self.n_heads)]
        y = torch.cat(hs, axis=1)
        return y
    

# ## forward test
# m = MultiHeadResNet200D([3, 4, 3, 1], False)
# m = m.eval()

# x = torch.rand(1, 3, 256, 256)
# with torch.no_grad():
#     y = m(x)
# print("[forward test]")
# print("input:\t{}\noutput:\t{}".format(x.shape, y.shape))

# del m; del x; del y
gc.collect()

In [None]:
class LabeledImageDataset(Dataset):
    """
    Dataset class for (image, label) pairs

    reads images and applys transforms to them.

    Attributes
    ----------
    file_list : List[Tuple[tp.Union[str, Path], tp.Union[int, float, np.ndarray]]]
        list of (image file, label) pair
    transform_list : List[Dict]
        list of dict representing image transform 
    """

    def __init__(
        self,
        file_list: tp.List[
            tp.Tuple[tp.Union[str, Path], tp.Union[int, float, np.ndarray]]],
        transform_list: tp.List[tp.Dict],
    ):
        """Initialize"""
        self.file_list = file_list
        self.transform = ImageTransformForCls(transform_list)

    def __len__(self):
        """Return Num of Images."""
        return len(self.file_list)

    def __getitem__(self, index):
        """Return transformed image and mask for given index."""
        img_path, label = self.file_list[index]
        img = self._read_image_as_array(img_path)
        
        img, label = self.transform((img, label))
        return img, label

    def _read_image_as_array(self, path: str):
        """Read image file and convert into numpy.ndarray"""
        img_arr = cv2.imread(str(path))
        img_arr = cv2.cvtColor(img_arr, cv2.COLOR_BGR2RGB)
        return img_arr

In [None]:
def get_dataloaders_for_inference(
    file_list: tp.List[tp.List], batch_size=32,
):
    """Create DataLoader"""
    dataset = LabeledImageDataset(
        file_list,
        transform_list=[
          ["Normalize", {
              "always_apply": True, "max_pixel_value": 255.0,
              "mean": ["0.4887381077884414"], "std": ["0.23064819430546407"]}],
          ["ToTensorV2", {"always_apply": True}],
        ])
    loader = DataLoader(
        dataset,
        batch_size=batch_size, shuffle=False,
        num_workers=0, pin_memory=True,
        drop_last=False)

    return loader

In [None]:
class ImageTransformBase:
    """
    Base Image Transform class.

    Args:
        data_augmentations: List of tuple(method: str, params :dict), each elems pass to albumentations
    """

    def __init__(self, data_augmentations: tp.List[tp.Tuple[str, tp.Dict]]):
        """Initialize."""
        augmentations_list = [
            self._get_augmentation(aug_name)(**params)
            for aug_name, params in data_augmentations]
        self.data_aug = albumentations.Compose(augmentations_list)

    def __call__(self, pair: tp.Tuple[np.ndarray]) -> tp.Tuple[np.ndarray]:
        """You have to implement this by task"""
        raise NotImplementedError

    def _get_augmentation(self, aug_name: str) -> tp.Tuple[ImageOnlyTransform, DualTransform]:
        """Get augmentations from albumentations"""
        if hasattr(albumentations, aug_name):
            return getattr(albumentations, aug_name)
        else:
            return eval(aug_name)


class ImageTransformForCls(ImageTransformBase):
    """Data Augmentor for Classification Task."""

    def __init__(self, data_augmentations: tp.List[tp.Tuple[str, tp.Dict]]):
        """Initialize."""
        super(ImageTransformForCls, self).__init__(data_augmentations)

    def __call__(self, in_arrs: tp.Tuple[np.ndarray]) -> tp.Tuple[np.ndarray]:
        """Apply Transform."""
        img, label = in_arrs
        augmented = self.data_aug(image=img)
        img = augmented["image"]

        return img, label

In [None]:
def load_setting_file(path: str):
    """Load YAML setting file."""
    with open(path) as f:
        settings = yaml.safe_load(f)
    return settings


def set_random_seed(seed: int = 42, deterministic: bool = False):
    """Set seeds"""
    random.seed(seed)
    np.random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)  # type: ignore
    torch.backends.cudnn.deterministic = deterministic  # type: ignore
    

def run_inference_loop(stgs, model, loader, device):
    model.to(device)
    model.eval()
    pred_list = []
    with torch.no_grad():
        for x, t in tqdm(loader):
            y = model(x.to(device))
            pred_list.append(y.sigmoid().detach().cpu().numpy())
            # pred_list.append(y.detach().cpu().numpy())
        
    pred_arr = np.concatenate(pred_list)
    del pred_list
    return pred_arr

In [None]:
if not torch.cuda.is_available():
    device = torch.device("cpu")
else:
    device = torch.device("cuda")
print(device)

In [None]:
import yaml

In [None]:
model_dir = TRAINED_MODEL
test_dir = TEST_RESIZED

test_file_list = [
    (test_dir / f"{img_id}.png", [-1] * 11)
    for img_id in smpl_sub["StudyInstanceUID"].values]
test_loader = get_dataloaders_for_inference(test_file_list, batch_size=32)
        
test_preds_arr = np.zeros((N_FOLD, len(smpl_sub), N_CLASSES))    
for fold_id in FOLDS:
    print(f"[fold {fold_id}]")
    stgs = load_setting_file(model_dir / f"fold{fold_id}" / "settings.yml")
    # # prepare 
    stgs["model"]["params"]["pretrained"] = False
    model = MultiHeadResNet200D(**stgs["model"]["params"])
    model_path = model_dir / f"best_model_fold{fold_id}.pth"
    model.load_state_dict(torch.load(model_path, map_location=device))

    # # inference test
    test_pred = run_inference_loop(stgs, model, test_loader, device)
    test_preds_arr[fold_id] = test_pred
    
    del model
    del stgs
    torch.cuda.empty_cache()
    gc.collect()

In [None]:
predictions_MultiHeadResNet200D1 = test_preds_arr.mean(axis=0)

In [None]:
predictions_MultiHeadResNet200D1.shape

In [None]:
model_dir = INPUT / "ranzcr-multi-head-model-training-f0/"
test_dir = TEST_RESIZED

test_file_list = [
    (test_dir / f"{img_id}.png", [-1] * 11)
    for img_id in smpl_sub["StudyInstanceUID"].values]
test_loader = get_dataloaders_for_inference(test_file_list, batch_size=32)
        
test_preds_arr = np.zeros((1, len(smpl_sub), N_CLASSES))    
for fold_id in [0]:
    print(f"[fold {fold_id}]")
    stgs = load_setting_file(model_dir / f"fold{fold_id}" / "settings.yml")
    # # prepare 
    stgs["model"]["params"]["pretrained"] = False
    model = MultiHeadResNet200D(**stgs["model"]["params"])
    model_path = model_dir / f"best_model_fold{fold_id}.pth"
    model.load_state_dict(torch.load(model_path, map_location=device))

    # # inference test
    test_pred = run_inference_loop(stgs, model, test_loader, device)
    test_preds_arr[fold_id] = test_pred
    
    del model
    del stgs
    torch.cuda.empty_cache()
    gc.collect()

In [None]:
predictions_MultiHeadResNet200D2 = test_preds_arr.mean(axis=0)

In [None]:
model_dir = INPUT / "ranzcr-multi-head-model-training-f1/"
test_dir = TEST_RESIZED

test_file_list = [
    (test_dir / f"{img_id}.png", [-1] * 11)
    for img_id in smpl_sub["StudyInstanceUID"].values]
test_loader = get_dataloaders_for_inference(test_file_list, batch_size=32)
        
test_preds_arr = np.zeros((1, len(smpl_sub), N_CLASSES))    
for fold_id in [1]:
    print(f"[fold {fold_id}]")
    stgs = load_setting_file(model_dir / f"fold{fold_id}" / "settings.yml")
    # # prepare 
    stgs["model"]["params"]["pretrained"] = False
    model = MultiHeadResNet200D(**stgs["model"]["params"])
    model_path = model_dir / f"best_model_fold{fold_id}.pth"
    model.load_state_dict(torch.load(model_path, map_location=device))

    # # inference test
    test_pred = run_inference_loop(stgs, model, test_loader, device)
    test_preds_arr[0] = test_pred
    
    del model
    del stgs
    torch.cuda.empty_cache()
    gc.collect()

In [None]:
predictions_MultiHeadResNet200D3 = test_preds_arr.mean(axis=0)

In [None]:
model_dir = INPUT / "ranzcr-multi-head-model-training-f2/"
test_dir = TEST_RESIZED

test_file_list = [
    (test_dir / f"{img_id}.png", [-1] * 11)
    for img_id in smpl_sub["StudyInstanceUID"].values]
test_loader = get_dataloaders_for_inference(test_file_list, batch_size=32)
        
test_preds_arr = np.zeros((1, len(smpl_sub), N_CLASSES))    
for fold_id in [2]:
    print(f"[fold {fold_id}]")
    stgs = load_setting_file(model_dir / f"fold{fold_id}" / "settings.yml")
    # # prepare 
    stgs["model"]["params"]["pretrained"] = False
    model = MultiHeadResNet200D(**stgs["model"]["params"])
    model_path = model_dir / f"best_model_fold{fold_id}.pth"
    model.load_state_dict(torch.load(model_path, map_location=device))

    # # inference test
    test_pred = run_inference_loop(stgs, model, test_loader, device)
    test_preds_arr[0] = test_pred
    
    del model
    del stgs
    torch.cuda.empty_cache()
    gc.collect()

In [None]:
predictions_MultiHeadResNet200D4 = test_preds_arr.mean(axis=0)

In [None]:
predictions = (predictions200d + predictions200d_2 + 0.5 * predictions152d + 0.5 * predictions_MultiHeadResNet200D1 + 0.25* predictions_MultiHeadResNet200D2 + 0.25 * predictions_MultiHeadResNet200D3 + 0.25 * predictions_MultiHeadResNet200D4) / 3.75

In [None]:
#predictions = (predictions200d + predictions200d_2 + predictions152d + predictions_MultiHeadResNet200D) / 4

In [None]:
target_cols = test.iloc[:, 1:12].columns.tolist()
test[target_cols] = predictions
test[['StudyInstanceUID'] + target_cols].to_csv('submission.csv', index=False)
test.head()