In [None]:
# https://github.com/rwightman/pytorch-image-model
!pip install ../input/timm-repo/pytorch-image-models-master/ > /dev/null

In [None]:
# https://github.com/Cadene/pretrained-models.pytorch
!pip install ../input/pretrainedmodels-pytorch/pretrained-models.pytorch-master/ > /dev/null

In [None]:
# https://github.com/lukemelas/EfficientNet-PyTorch
!pip install ../input/efficientnet-pyotrch/EfficientNet-PyTorch-master/ > /dev/null

In [None]:
# https://github.com/qubvel/segmentation_models.pytorch
!pip install ../input/segmentation-models-pytorch/segmentation_models.pytorch-master/ > /dev/null

# Dataset

In [None]:
from typing import Optional, Tuple, List
from os.path import join as pjoin
from PIL import Image

import numpy as np
import pandas as pd
import torch


class RANZCRDataset(torch.utils.data.Dataset):
    def __init__(
        self,
        df,
        root,
        ext,
        path_col,
        use_timm_aug=False,
        transforms=None,
        augmentations=None,
    ):

        super().__init__()
        df = df.reset_index(drop=True).copy()
        self.transforms = transforms
        self.augmentations = augmentations
        self.root = root
        self.use_timm_aug = use_timm_aug

        self.image_names = self._prepare_image_names(df[path_col].tolist(), root, ext)

    def __len__(self):
        return len(self.image_names)

    def _prepare_image_names(self, basenames: List[str], root: str, ext: str):
        return [pjoin(root, el) + ext for el in basenames]

    def _prepare_img_target_from_idx(self, idx: int):

        image_name = self.image_names[idx]

        img = Image.open(image_name)
        if not self.use_timm_aug:
            img = np.array(img)

        if self.augmentations is not None:
            if self.use_timm_aug:
                img = self.augmentations(img)
            else:
                img = self.augmentations(image=img)["image"]

        if self.use_timm_aug:
            img = np.array(img)

        if self.transforms is not None:
            img = self.transforms(img)

        return img

    def __getitem__(self, index: int):

        img = self._prepare_img_target_from_idx(index)
        
        return img

# Model

In [None]:
from typing import Optional, Mapping, Any

import torch.nn as nn
import torch.nn.functional as F
import torch
import timm
import segmentation_models_pytorch as smp

EFFNETB6_EMB_DIM = 2304
EFFNETB5_EMB_DIM = 2048
EFFNETB4_EMB_DIM = 1792
EFFNETB3_EMB_DIM = 1536
EFFNETB1_EMB_DIM = 1280
RESNET50_EMB_DIM = 2048
REXNET200_EMB_DIM = 2560
VIT_EMB_DIM = 768
NF_RESNET50_EMB_DIM = 2048

EPS = 1e-6

class TaylorSoftmax(nn.Module):

    def __init__(self, dim=1, n=2):
        super(TaylorSoftmax, self).__init__()
        assert n % 2 == 0
        self.dim = dim
        self.n = n

    def forward(self, x):
        
        fn = torch.ones_like(x)
        denor = 1.
        for i in range(1, self.n+1):
            denor *= i
            fn = fn + x.pow(i) / denor
        out = fn / fn.sum(dim=self.dim, keepdims=True)
        return out


class CNNModel(nn.Module):
    def __init__(
        self,
        classifiier_config: Mapping[str, Any],
        encoder_type: str,
        device: str,
        use_pretrained_encoder: bool = False,
        path_to_chkp: Optional[str] = None,
        use_taylorsoftmax: bool = False,
        one_channel: bool = True,
    ):
        super().__init__()

        if path_to_chkp is not None:
            use_pretrained_encoder = False

        if encoder_type == "rexnet_200":
            self.encoder = timm.create_model(
                encoder_type, pretrained=use_pretrained_encoder
            )
            if one_channel:
                self.encoder.stem.conv.in_channels = 1
                weight = self.encoder.stem.conv.weight.mean(1, keepdim=True)
                self.encoder.stem.conv.weight = torch.nn.Parameter(weight)
            self.encoder.head.fc = nn.Identity()
            nn_embed_size = REXNET200_EMB_DIM
        elif encoder_type == "tf_efficientnet_b3_ns":
            self.encoder = timm.create_model(
                encoder_type, pretrained=use_pretrained_encoder
            )
            if one_channel:
                self.encoder.conv_stem.in_channels = 1
                weight = self.encoder.conv_stem.weight.mean(1, keepdim=True)
                self.encoder.conv_stem.weight = torch.nn.Parameter(weight)
            self.encoder.classifier = nn.Identity()
            nn_embed_size = EFFNETB3_EMB_DIM
        elif encoder_type == "tf_efficientnet_b5_ns":
            self.encoder = timm.create_model(
                encoder_type, pretrained=use_pretrained_encoder, num_classes=11
            )
            if path_to_chkp is not None:
                print("Loading starting point")
                state_dict = load_effnet_b5_start_point(path_to_chkp)
                self.encoder.load_state_dict(state_dict)
            if one_channel:
                self.encoder.conv_stem.in_channels = 1
                weight = self.encoder.conv_stem.weight.mean(1, keepdim=True)
                self.encoder.conv_stem.weight = torch.nn.Parameter(weight)
            self.encoder.classifier = nn.Identity()
            nn_embed_size = EFFNETB5_EMB_DIM
        elif encoder_type == "resnet200d":
            self.encoder = timm.create_model(
                encoder_type, pretrained=use_pretrained_encoder, num_classes=11
            )
            if path_to_chkp is not None:
                print("Loading starting point")
                state_dict = load_resnet200d_start_point(path_to_chkp)
                self.encoder.load_state_dict(state_dict)
            if one_channel:
                self.encoder.conv1[0].in_channels = 1
                weight = self.encoder.conv1[0].weight.mean(1, keepdim=True)
                self.encoder.conv1[0].weight = torch.nn.Parameter(weight)
            self.encoder.fc = nn.Identity()
            nn_embed_size = RESNET50_EMB_DIM
        else:
            raise ValueError(f"{encoder_type} is invalid model_type")

        classes_num = classifiier_config["classes_num"]
        hidden_dims = classifiier_config["hidden_dims"]
        second_dropout_rate = classifiier_config["second_dropout_rate"]
        if classifiier_config["classifier_type"] == "relu":
            first_dropout_rate = classifiier_config["first_dropout_rate"]
            self.classifier = nn.Sequential(
                nn.Linear(nn_embed_size, hidden_dims),
                nn.ReLU(),
                nn.Dropout(p=first_dropout_rate),
                nn.Linear(hidden_dims, hidden_dims),
                nn.ReLU(),
                nn.Dropout(p=second_dropout_rate),
                nn.Linear(hidden_dims, classes_num),
            )
        elif classifiier_config["classifier_type"] == "elu":
            first_dropout_rate = classifiier_config["first_dropout_rate"]
            self.classifier = nn.Sequential(
                nn.Dropout(first_dropout_rate),
                nn.Linear(nn_embed_size, hidden_dims),
                nn.ELU(),
                nn.Dropout(second_dropout_rate),
                nn.Linear(hidden_dims, classes_num),
            )
        elif classifiier_config["classifier_type"] == "dima":
            self.classifier = nn.Sequential(
                nn.BatchNorm1d(nn_embed_size),
                nn.Linear(nn_embed_size, hidden_dims),
                nn.BatchNorm1d(hidden_dims),
                nn.PReLU(hidden_dims),
                nn.Dropout(p=second_dropout_rate),
                nn.Linear(hidden_dims, classes_num),
            )
        elif classifiier_config["classifier_type"] == "prelu":
            first_dropout_rate = classifiier_config["first_dropout_rate"]
            self.classifier = nn.Sequential(
                nn.Dropout(first_dropout_rate),
                nn.Linear(nn_embed_size, hidden_dims),
                nn.PReLU(hidden_dims),
                nn.Dropout(p=second_dropout_rate),
                nn.Linear(hidden_dims, classes_num),
            )
        elif classifiier_config["classifier_type"] == "multiscale_relu":
            first_dropout_rate = classifiier_config["first_dropout_rate"]
            self.big_dropout = nn.Dropout(p=0.5)
            self.classifier = nn.Sequential(
                nn.Linear(nn_embed_size, hidden_dims),
                nn.ELU(),
                nn.Dropout(p=second_dropout_rate),
                nn.Linear(hidden_dims, classes_num),
            )
        elif classifiier_config["classifier_type"] == "drop_linear":
            self.classifier = nn.Sequential(
                nn.Dropout(p=second_dropout_rate),
                nn.Linear(nn_embed_size, classes_num),
            )
        else:
            raise ValueError("Invalid classifier_type")

        # Final activation
        self.use_taylorsoftmax = use_taylorsoftmax
        if self.use_taylorsoftmax:
            self.taylorsoftmax = TaylorSoftmax()
        # Classifier type
        self.classifier_type = classifiier_config["classifier_type"]
        # Some additional stuff
        self.encoder_type = encoder_type
        self.device = device
        self.to(self.device)

    def forward(self, image):
        x = self.encoder(image)

        if self.classifier_type == "multiscale_relu":
            logits = torch.mean(
                torch.stack(
                    [self.classifier(self.big_dropout(x)) for _ in range(5)],
                    dim=0,
                ),
                dim=0,
            )
        else:
            logits = self.classifier(x)

        if self.use_taylorsoftmax:
            logits = self.taylorsoftmax(logits).log()

        return logits
    
    
class CNNSegModel(nn.Module):
    def __init__(
        self,
        classifiier_config: Mapping[str, Any],
        encoder_type: str,
        encoder_config: Mapping[str, Any],
        device: str,
        path_to_chkp: Optional[str] = None,
        use_taylorsoftmax: bool = False,
        one_channel: bool = True,
        enable_inference_mode: bool = False
    ):
        super().__init__()

        if path_to_chkp is not None:
            use_pretrained_encoder = False

        if encoder_type == "timm-efficientnet-b5_unet":
            self.encoder = smp.Unet(**encoder_config)
            if path_to_chkp is not None:
                print("Loading starting point")
                state_dict = load_effnet_b5_start_point(path_to_chkp)
                self.encoder.encoder.load_state_dict(state_dict)
            if one_channel:
                self.encoder.encoder.conv_stem.in_channels = 1
                weight = self.encoder.encoder.conv_stem.weight.mean(1, keepdim=True)
                self.encoder.encoder.conv_stem.weight = torch.nn.Parameter(weight)
            self.encoder.classification_head[3] = nn.Identity()
            nn_embed_size = 512
        elif encoder_type == "densenet121_unet":
            self.encoder = smp.Unet(**encoder_config)
            if path_to_chkp is not None:
                print("Loading starting point")
                state_dict = load_densenet121_start_point(path_to_chkp)
                self.encoder.encoder.load_state_dict(state_dict)
            if one_channel:
                self.encoder.encoder.features.conv0.in_channels = 1
                weight = self.encoder.encoder.features.conv0.weight.mean(
                    1, keepdim=True
                )
                self.encoder.encoder.features.conv0.weight = torch.nn.Parameter(
                    weight
                )
            self.encoder.classification_head[3] = nn.Identity()
            nn_embed_size = 1024
        else:
            raise ValueError(f"{encoder_type} is invalid model_type")

        classes_num = classifiier_config["classes_num"]
        hidden_dims = classifiier_config["hidden_dims"]
        second_dropout_rate = classifiier_config["second_dropout_rate"]
        if classifiier_config["classifier_type"] == "relu":
            first_dropout_rate = classifiier_config["first_dropout_rate"]
            self.classifier = nn.Sequential(
                nn.Linear(nn_embed_size, hidden_dims),
                nn.ReLU(),
                nn.Dropout(p=first_dropout_rate),
                nn.Linear(hidden_dims, hidden_dims),
                nn.ReLU(),
                nn.Dropout(p=second_dropout_rate),
                nn.Linear(hidden_dims, classes_num),
            )
        elif classifiier_config["classifier_type"] == "elu":
            first_dropout_rate = classifiier_config["first_dropout_rate"]
            self.classifier = nn.Sequential(
                nn.Dropout(first_dropout_rate),
                nn.Linear(nn_embed_size, hidden_dims),
                nn.ELU(),
                nn.Dropout(second_dropout_rate),
                nn.Linear(hidden_dims, classes_num),
            )
        elif classifiier_config["classifier_type"] == "dima":
            self.classifier = nn.Sequential(
                nn.BatchNorm1d(nn_embed_size),
                nn.Linear(nn_embed_size, hidden_dims),
                nn.BatchNorm1d(hidden_dims),
                nn.PReLU(hidden_dims),
                nn.Dropout(p=second_dropout_rate),
                nn.Linear(hidden_dims, classes_num),
            )
        elif classifiier_config["classifier_type"] == "prelu":
            first_dropout_rate = classifiier_config["first_dropout_rate"]
            self.classifier = nn.Sequential(
                nn.Dropout(first_dropout_rate),
                nn.Linear(nn_embed_size, hidden_dims),
                nn.PReLU(hidden_dims),
                nn.Dropout(p=second_dropout_rate),
                nn.Linear(hidden_dims, classes_num),
            )
        elif classifiier_config["classifier_type"] == "multiscale_relu":
            first_dropout_rate = classifiier_config["first_dropout_rate"]
            self.big_dropout = nn.Dropout(p=0.5)
            self.classifier = nn.Sequential(
                nn.Linear(nn_embed_size, hidden_dims),
                nn.ELU(),
                nn.Dropout(p=second_dropout_rate),
                nn.Linear(hidden_dims, classes_num),
            )
        elif classifiier_config["classifier_type"] == "drop_linear":
            self.classifier = nn.Sequential(
                nn.Dropout(p=second_dropout_rate),
                nn.Linear(nn_embed_size, classes_num),
            )
        elif classifiier_config["classifier_type"] == "double_elu_mlp":
            first_dropout_rate = classifiier_config["first_dropout_rate"]
            self.classifier_prenet = nn.Sequential(
                nn.Dropout(first_dropout_rate),
                nn.Linear(nn_embed_size, hidden_dims),
                nn.ELU(),
            )
            self.classifier_hidden_class = nn.Sequential(
                nn.Dropout(second_dropout_rate),
                nn.Linear(
                    hidden_dims, classifiier_config["hidden_classes_num"]
                ),
            )
            self.classifier_class_final = nn.Sequential(
                nn.Dropout(second_dropout_rate),
                nn.Linear(
                    hidden_dims + classifiier_config["hidden_classes_num"],
                    classes_num,
                ),
            )
        else:
            raise ValueError("Invalid classifier_type")

        # Final activation
        self.use_taylorsoftmax = use_taylorsoftmax
        if self.use_taylorsoftmax:
            self.taylorsoftmax = TaylorSoftmax()
        # Classifier type
        self.classifier_type = classifiier_config["classifier_type"]
        # Some additional stuff
        self.enable_inference_mode = enable_inference_mode
        self.encoder_type = encoder_type
        self.device = device
        self.to(self.device)

    def forward(self, image, enable_inference_mode=False):
        enable_inference_mode = enable_inference_mode or self.enable_inference_mode

        if enable_inference_mode:
            embs = self.encoder.encoder(image)
            x = self.encoder.classification_head(embs[-1])
        else:
            mask, x = self.encoder(image)

        if self.classifier_type == "multiscale_relu":
            logits = torch.mean(
                torch.stack(
                    [self.classifier(self.big_dropout(x)) for _ in range(5)],
                    dim=0,
                ),
                dim=0,
            )
        elif self.classifier_type == "double_elu_mlp":
            pre_logits = self.classifier_prenet(x)
            class_logits = self.classifier_hidden_class(pre_logits)
            logits = self.classifier_class_final(
                torch.cat([pre_logits, class_logits], axis=-1)
            )
        else:
            logits = self.classifier(x)

        if self.use_taylorsoftmax:
            logits = self.taylorsoftmax(logits).log()

        if enable_inference_mode:
            return logits
        else:
            if self.classifier_type == "double_elu_mlp":
                return mask, class_logits, logits
            else:
                return mask, logits

# Inference utils

In [None]:
import os

from typing import List, Optional, Callable, Mapping, Any
from collections import OrderedDict
from tqdm import tqdm

import numpy as np
import torch

def get_validation_models(
    model_initilizer: Callable,
    model_config: Mapping[str, Any],
    model_ckp_dicts: List[OrderedDict],
    device: str,
):
    t_models = []

    for mcd in model_ckp_dicts:

        t_model = model_initilizer(**model_config, device=device)
        t_model.load_state_dict(mcd)
        t_model = t_model.to(device)
        t_model.eval()
        t_models.append(t_model)

    return t_models


def create_val_loaders(
    loader_initilizer: object,
    loader_config: Mapping[str, Any],
    dfs: List[str],
    batch_size: int,
):
    t_loaders = []

    for df in dfs:
        t_dataset = loader_initilizer(df=df, **loader_config)
        t_loader = torch.utils.data.DataLoader(
            t_dataset,
            batch_size=batch_size,
            drop_last=False,
            shuffle=False,
            num_workers=os.cpu_count() // 2,
        )

        t_loaders.append(t_loader)

    return t_loaders


@torch.no_grad()
def cnn_model_predict(t_batch, t_model, t_device):
    image = t_batch.to(t_device)
    logits = t_model(image)
    logits = logits.detach().cpu().numpy()
    return logits


def predict_over_all_train(
    my_loaders, my_models, model_predict_func, device, do_concat=True
):
    logits = []
    for loader, model in zip(my_loaders, my_models):
        for batch in tqdm(loader):
            logit = model_predict_func(batch, model, device)
            logits.append(logit)

    if do_concat:
        logits = np.concatenate(logits)

    return logits



# Main imports

In [None]:
import os

from glob import glob
from os.path import splitext

from sklearn.metrics import roc_auc_score

import torch
import cv2
import torch.nn as nn
import numpy as np
import pandas as pd
import torchvision.transforms as T
import albumentations as albu

from scipy.special import softmax
from matplotlib import pyplot as plt
from torch.utils.data import DataLoader, Dataset
from albumentations import Compose, Resize, Normalize
from albumentations.pytorch import ToTensorV2
%matplotlib inline

In [None]:
SKIP_VAL = True

# Public models

In [None]:
def public_notebook():
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    
    BATCH_SIZE = 64
    TEST_PATH = '../input/ranzcr-clip-catheter-line-classification/test'
    
    test = pd.read_csv('../input/ranzcr-clip-catheter-line-classification/sample_submission.csv')
    
    class TestDataset(Dataset):
        def __init__(self, df, transform=None):
            self.df = df
            self.file_names = df['StudyInstanceUID'].values
            self.transform = transform

        def __len__(self):
            return len(self.df)

        def __getitem__(self, idx):
            file_name = self.file_names[idx]
            file_path = f'{TEST_PATH}/{file_name}.jpg'
            image = cv2.imread(file_path)
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            if self.transform:
                augmented = self.transform(image=image)
                image = augmented['image']
            return image
        
    def get_transforms(image_size=640):
        return Compose([
            Resize(image_size, image_size),
            Normalize(),
            ToTensorV2(),
        ])
    
    class ResNet200D(nn.Module):
        def __init__(self, model_name='resnet200d'):
            super().__init__()
            self.model = timm.create_model(model_name, pretrained=False)
            n_features = self.model.fc.in_features
            self.model.global_pool = nn.Identity()
            self.model.fc = nn.Identity()
            self.pooling = nn.AdaptiveAvgPool2d(1)
            self.fc = nn.Linear(n_features, 11)

        def forward(self, x):
            bs = x.size(0)
            features = self.model(x)
            pooled_features = self.pooling(features).view(bs, -1)
            output = self.fc(pooled_features)
            return output

    class SeResNet152D(nn.Module):
        def __init__(self, model_name='seresnet152d'):
            super().__init__()
            self.model = timm.create_model(model_name, pretrained=False)
            n_features = self.model.fc.in_features
            self.model.global_pool = nn.Identity()
            self.model.fc = nn.Identity()
            self.pooling = nn.AdaptiveAvgPool2d(1)
            self.fc = nn.Linear(n_features, 11)

        def forward(self, x):
            bs = x.size(0)
            features = self.model(x)
            pooled_features = self.pooling(features).view(bs, -1)
            output = self.fc(pooled_features)
            return output

    class RANZCRResNet200D(nn.Module):
        def __init__(self, model_name='resnet200d', out_dim=11, pretrained=False):
            super().__init__()
            self.model = timm.create_model(model_name, pretrained=False)
            n_features = self.model.fc.in_features
            self.model.global_pool = nn.Identity()
            self.model.fc = nn.Identity()
            self.pooling = nn.AdaptiveAvgPool2d(1)
            self.fc = nn.Linear(n_features, out_dim)

        def forward(self, x):
            bs = x.size(0)
            features = self.model(x)
            pooled_features = self.pooling(features).view(bs, -1)
            output = self.fc(pooled_features)
            return output
        
    def inference(models, test_loader, device):
        tk0 = tqdm(enumerate(test_loader), total=len(test_loader))
        probs = []
        for i, (images) in tk0:
            images = images.to(device)
            avg_preds = []
            for model in models:
                with torch.no_grad():
                    y_preds1 = model(images)
                    y_preds2 = model(images.flip(-1))
                y_preds = (
                    y_preds1.sigmoid().to('cpu').numpy() + 
                    y_preds2.sigmoid().to('cpu').numpy()
                ) / 2
                avg_preds.append(y_preds)
            avg_preds = np.stack(avg_preds, axis=0)
            probs.append(avg_preds)
        probs = np.concatenate(probs, axis=1)
        return probs
    
    models200D = []
    model = ResNet200D()
    model.load_state_dict(torch.load("../input/resnet200d-public/resnet200d_320_CV9632.pth")['model'])
    model.eval()
    model.to(device)
    models200D.append(model)

    models200D_2 = []
    model = RANZCRResNet200D()
    model.load_state_dict(torch.load("../input/resnet200d-baseline-benchmark-public/resnet200d_fold0_cv953.pth", map_location='cuda:0'))
    model.eval()
    model.to(device)
    models200D_2.append(model)

    model = RANZCRResNet200D()
    model.load_state_dict(torch.load("../input/resnet200d-baseline-benchmark-public/resnet200d_fold1_cv955.pth", map_location='cuda:0'))
    model.eval()
    model.to(device)
    models200D_2.append(model)

    model = RANZCRResNet200D()
    model.load_state_dict(torch.load("../input/resnet200d-baseline-benchmark-public/resnet200d_fold2_cv955.pth", map_location='cuda:0'))
    model.eval()
    model.to(device)
    models200D_2.append(model)

    model = RANZCRResNet200D()
    model.load_state_dict(torch.load("../input/resnet200d-baseline-benchmark-public/resnet200d_fold3_cv957.pth", map_location='cuda:0'))
    model.eval()
    model.to(device)
    models200D_2.append(model)

    model = RANZCRResNet200D()
    model.load_state_dict(torch.load("../input/resnet200d-baseline-benchmark-public/resnet200d_fold4_cv954.pth", map_location='cuda:0'))
    model.eval()
    model.to(device)
    models200D_2.append(model)

    model = SeResNet152D()
    model.load_state_dict(torch.load('../input/seresnet152d-cv9615/seresnet152d_320_CV96.15.pth')['model'])
    model.eval()
    model.to(device)
    models200D.append(model)
    
    test_dataset_512 = TestDataset(test, transform=get_transforms(image_size=512))
    test_loader_512 = DataLoader(test_dataset_512, batch_size=BATCH_SIZE, shuffle=False, num_workers=os.cpu_count() // 2 , pin_memory=True)

    test_dataset_640 = TestDataset(test, transform=get_transforms(image_size=640))
    test_loader_640 = DataLoader(test_dataset_640, batch_size=BATCH_SIZE, shuffle=False, num_workers=os.cpu_count() // 2 , pin_memory=True)

    predictions200d = inference(models200D, test_loader_640, device)
    predictions200d_2 = inference(models200D_2, test_loader_512, device)
    
    return predictions200d, predictions200d_2

In [None]:
# pred_1, pred_2 = public_notebook()


# public_pred_1, public_pred_3 = pred_1[0], pred_1[1]
# public_pred_2 = pred_2.mean(0)


In [None]:
# '../input/ranzcr-subs/resnet200d-inference-single-model-lb-96-5.csv' - public_pred_1
# '../input/ranzcr-subs/resnet200d-public-benchmark-2xtta-lb0-965.csv' - public_pred_2
# '../input/ranzcr-subs/ranzcr-resnet200d-seresnet152d-inference.csv' - public_pred_3
# '../input/ranzcr-subs/resnet-c12.csv' - public_pred_2

# Resize images

In [None]:
def create_folder(folder_name):
    if os.path.exists(folder_name):
        !rm -rf {folder_name}
    
    os.makedirs(folder_name)

In [None]:
RESIZE_SIZE = 640

In [None]:
test_im_names = glob('../input/ranzcr-clip-catheter-line-classification/test/*.jpg')


create_folder('test_images_512_512')

for im_n in tqdm(test_im_names):
    im = Image.open(im_n)
    im = im.resize((RESIZE_SIZE, RESIZE_SIZE), Image.ANTIALIAS)
    name, _ = splitext(os.path.basename(im_n))
    im_name = name + '.jpeg'
    im.save(pjoin('test_images_512_512', im_name), 'JPEG', quality=100)
 

if not SKIP_VAL:
    train_im_names = glob('../input/ranzcr-clip-catheter-line-classification/train/*.jpg')

    create_folder('train_images_512_512')

    for im_n in tqdm(train_im_names):
        im = Image.open(im_n)
        im = im.resize((RESIZE_SIZE, RESIZE_SIZE), Image.ANTIALIAS)
        name, _ = splitext(os.path.basename(im_n))
        im_name = name + '.jpeg'
        im.save(pjoin('train_images_512_512', im_name),'JPEG', quality=100)

# Read data

In [None]:
PATH2DIR = '../input/ranzcr-clip-catheter-line-classification/'
os.listdir(PATH2DIR)

train = pd.read_csv(pjoin(PATH2DIR, 'train.csv'))
sample_sub = pd.read_csv(pjoin(PATH2DIR, 'sample_submission.csv'))
split = np.load('../input/ranzcr-models/naive_cv_split.npy', allow_pickle=True)

# Model

In [None]:
DEVICE = 'cuda'

In [None]:
os.listdir('../input/ranzcr-models/')

## 512 Res

In [None]:
[el  for el in os.listdir('../input/ranzcr-models/') if '512res' in el]

In [None]:
models_512 = []

# Public 0.
ckp_names = glob('../input/ranzcr-models/timm_efficientnet_b5_unet_32bs_640res_lesslaugs_ls005_shedchanged_startpoint_difflrs_segbranch_125coefs_1e4noseg_bigholes_firstpseudo_swa_roc_auc_score/timm_efficientnet_b5_unet_32bs_640res_lesslaugs_ls005_shedchanged_startpoint_difflrs_segbranch_125coefs_1e4noseg_bigholes_firstpseudo_swa_roc_auc_score/*.pt')
print(ckp_names)
chckps = [torch.load(el, map_location='cpu') for el in ckp_names]

models_512.append(get_validation_models(
    model_initilizer=CNNSegModel,
    model_config={
            "classifiier_config": {
                "classifier_type": "elu",
                "classes_num": 11,
                "hidden_dims": 1024,
                "second_dropout_rate": 0.2,
                "first_dropout_rate": 0.3,
            },
            "encoder_config":{
                "in_channels":3,
                "encoder_name":'timm-efficientnet-b5', 
                "encoder_weights":None, 
                "classes":2, 
                "activation":'sigmoid',
                "aux_params":dict(
                    pooling='avg',             # one of 'avg', 'max'
                    dropout=None,               # dropout ratio, default is None
                    classes=4,                 # define number of output labels
                )
            },
            "encoder_type": "timm-efficientnet-b5_unet",
            "use_taylorsoftmax": False,
            "one_channel": True,
            "enable_inference_mode": True
        },
    model_ckp_dicts=chckps,
    device=DEVICE
))

# # Public 0.
# ckp_names = glob('../input/ranzcr-models/timm_efficientnet_b5_unet_24bs_640res_qubvelaugs_ls005_shedchanged_startpoint_difflrs_segbranch_113coefs_1e4noseg_bigholes_selected2steppseudo_swa_roc_auc_score/timm_efficientnet_b5_unet_24bs_640res_qubvelaugs_ls005_shedchanged_startpoint_difflrs_segbranch_113coefs_1e4noseg_bigholes_selected2steppseudo_swa_roc_auc_score/*.pt')
# print(ckp_names)
# chckps = [torch.load(el, map_location='cpu') for el in ckp_names]

# models_512.append(get_validation_models(
#     model_initilizer=CNNSegModel,
#     model_config={
#             "classifiier_config": {
#                 "classifier_type": "elu",
#                 "classes_num": 11,
#                 "hidden_dims": 1024,
#                 "second_dropout_rate": 0.2,
#                 "first_dropout_rate": 0.3,
#             },
#             "encoder_config":{
#                 "in_channels":3,
#                 "encoder_name":'timm-efficientnet-b5', 
#                 "encoder_weights":None, 
#                 "classes":2, 
#                 "activation":'sigmoid',
#                 "aux_params":dict(
#                     pooling='avg',             # one of 'avg', 'max'
#                     dropout=None,               # dropout ratio, default is None
#                     classes=4,                 # define number of output labels
#                 )
#             },
#             "encoder_type": "timm-efficientnet-b5_unet",
#             "use_taylorsoftmax": False,
#             "one_channel": True,
#             "enable_inference_mode": True
#         },
#     model_ckp_dicts=chckps,
#     device=DEVICE
# ))

# # Public 0.963
# ckp_names = glob('../input/ranzcr-models/timm_efficientnet_b5_unet_32bs_640res_lesslaugs_ls005_shedchanged_startpoint_difflrs_segbranch_125coefs_1e4noseg_bigholes_best_roc_auc_score/timm_efficientnet_b5_unet_32bs_640res_lesslaugs_ls005_shedchanged_startpoint_difflrs_segbranch_125coefs_1e4noseg_bigholes_best_roc_auc_score/*.pt')
# print(ckp_names)
# chckps = [torch.load(el, map_location='cpu') for el in ckp_names]

# models_512.append(get_validation_models(
#     model_initilizer=CNNSegModel,
#     model_config={
#             "classifiier_config": {
#                 "classifier_type": "elu",
#                 "classes_num": 11,
#                 "hidden_dims": 1024,
#                 "second_dropout_rate": 0.2,
#                 "first_dropout_rate": 0.3,
#             },
#             "encoder_config":{
#                 "in_channels":3,
#                 "encoder_name":'timm-efficientnet-b5', 
#                 "encoder_weights":None, 
#                 "classes":2, 
#                 "activation":'sigmoid',
#                 "aux_params":dict(
#                     pooling='avg',             # one of 'avg', 'max'
#                     dropout=None,               # dropout ratio, default is None
#                     classes=4,                 # define number of output labels
#                 )
#             },
#             "encoder_type": "timm-efficientnet-b5_unet",
#             "use_taylorsoftmax": False,
#             "one_channel": True,
#             "enable_inference_mode": True
#         },
#     model_ckp_dicts=chckps,
#     device=DEVICE
# ))

# # Public 0.963
# ckp_names = glob('../input/ranzcr-models/timm_efficientnet_b5_unet_32bs_640res_lesslaugs_ls005_shedchanged_startpoint_difflrs_segbranch_125coefs_1e4noseg_bigholes_swa_roc_auc_score/timm_efficientnet_b5_unet_32bs_640res_lesslaugs_ls005_shedchanged_startpoint_difflrs_segbranch_125coefs_1e4noseg_bigholes_swa_roc_auc_score/*.pt')
# print(ckp_names)
# chckps = [torch.load(el, map_location='cpu') for el in ckp_names]

# models_512.append(get_validation_models(
#     model_initilizer=CNNSegModel,
#     model_config={
#             "classifiier_config": {
#                 "classifier_type": "elu",
#                 "classes_num": 11,
#                 "hidden_dims": 1024,
#                 "second_dropout_rate": 0.2,
#                 "first_dropout_rate": 0.3,
#             },
#             "encoder_config":{
#                 "in_channels":3,
#                 "encoder_name":'timm-efficientnet-b5', 
#                 "encoder_weights":None, 
#                 "classes":2, 
#                 "activation":'sigmoid',
#                 "aux_params":dict(
#                     pooling='avg',             # one of 'avg', 'max'
#                     dropout=None,               # dropout ratio, default is None
#                     classes=4,                 # define number of output labels
#                 )
#             },
#             "encoder_type": "timm-efficientnet-b5_unet",
#             "use_taylorsoftmax": False,
#             "one_channel": True,
#             "enable_inference_mode": True
#         },
#     model_ckp_dicts=chckps,
#     device=DEVICE
# ))

# Public 0.961
ckp_names = glob('../input/ranzcr-models/timm_efficientnet_b5_unet_32bs_640res_qubvelaugs_ls005_shedchanged_startpoint_difflrs_segbranch_113coefs_1e4noseg_bigholes_morepseudo_swa_roc_auc_score/timm_efficientnet_b5_unet_32bs_640res_qubvelaugs_ls005_shedchanged_startpoint_difflrs_segbranch_113coefs_1e4noseg_bigholes_morepseudo_swa_roc_auc_score/*.pt')
print(ckp_names)
chckps = [torch.load(el, map_location='cpu') for el in ckp_names]

models_512.append(get_validation_models(
    model_initilizer=CNNSegModel,
    model_config={
            "classifiier_config": {
                "classifier_type": "elu",
                "classes_num": 11,
                "hidden_dims": 1024,
                "second_dropout_rate": 0.2,
                "first_dropout_rate": 0.3,
            },
            "encoder_config":{
                "in_channels":3,
                "encoder_name":'timm-efficientnet-b5', 
                "encoder_weights":None, 
                "classes":2, 
                "activation":'sigmoid',
                "aux_params":dict(
                    pooling='avg',             # one of 'avg', 'max'
                    dropout=None,               # dropout ratio, default is None
                    classes=4,                 # define number of output labels
                )
            },
            "encoder_type": "timm-efficientnet-b5_unet",
            "use_taylorsoftmax": False,
            "one_channel": True,
            "enable_inference_mode": True
        },
    model_ckp_dicts=chckps,
    device=DEVICE
))

# # Public 0.959
# ckp_names = glob('../input/ranzcr-models/timm_efficientnet_b5_unet_24bs_640res_qubvelaugs_ls005_shedchanged_startpoint_difflrs_segbranch_113coefs_1e4noseg_bigholes_pseudo_best_roc_auc_score/timm_efficientnet_b5_unet_24bs_640res_qubvelaugs_ls005_shedchanged_startpoint_difflrs_segbranch_113coefs_1e4noseg_bigholes_pseudo_best_roc_auc_score/*.pt')
# print(ckp_names)
# chckps = [torch.load(el, map_location='cpu') for el in ckp_names]

# models_512.append(get_validation_models(
#     model_initilizer=CNNSegModel,
#     model_config={
#             "classifiier_config": {
#                 "classifier_type": "elu",
#                 "classes_num": 11,
#                 "hidden_dims": 1024,
#                 "second_dropout_rate": 0.2,
#                 "first_dropout_rate": 0.3,
#             },
#             "encoder_config":{
#                 "in_channels":3,
#                 "encoder_name":'timm-efficientnet-b5', 
#                 "encoder_weights":None, 
#                 "classes":2, 
#                 "activation":'sigmoid',
#                 "aux_params":dict(
#                     pooling='avg',             # one of 'avg', 'max'
#                     dropout=None,               # dropout ratio, default is None
#                     classes=4,                 # define number of output labels
#                 )
#             },
#             "encoder_type": "timm-efficientnet-b5_unet",
#             "use_taylorsoftmax": False,
#             "one_channel": True,
#             "enable_inference_mode": True
#         },
#     model_ckp_dicts=chckps,
#     device=DEVICE
# ))

# Public 0.961
ckp_names = glob('../input/ranzcr-models/timm_efficientnet_b5_unet_24bs_640res_qubvelaugs_ls005_shedchanged_startpoint_difflrs_segbranch_113coefs_1e4noseg_bigholes_pseudo_swa_roc_auc_score/timm_efficientnet_b5_unet_24bs_640res_qubvelaugs_ls005_shedchanged_startpoint_difflrs_segbranch_113coefs_1e4noseg_bigholes_pseudo_swa_roc_auc_score/*.pt')
print(ckp_names)
chckps = [torch.load(el, map_location='cpu') for el in ckp_names]

models_512.append(get_validation_models(
    model_initilizer=CNNSegModel,
    model_config={
            "classifiier_config": {
                "classifier_type": "elu",
                "classes_num": 11,
                "hidden_dims": 1024,
                "second_dropout_rate": 0.2,
                "first_dropout_rate": 0.3,
            },
            "encoder_config":{
                "in_channels":3,
                "encoder_name":'timm-efficientnet-b5', 
                "encoder_weights":None, 
                "classes":2, 
                "activation":'sigmoid',
                "aux_params":dict(
                    pooling='avg',             # one of 'avg', 'max'
                    dropout=None,               # dropout ratio, default is None
                    classes=4,                 # define number of output labels
                )
            },
            "encoder_type": "timm-efficientnet-b5_unet",
            "use_taylorsoftmax": False,
            "one_channel": True,
            "enable_inference_mode": True
        },
    model_ckp_dicts=chckps,
    device=DEVICE
))

# # Public 0.
# ckp_names = glob('../input/ranzcr-models/timm_efficientnet_b5_unet_24bs_640res_qubvelaugs_rotaugs_ls005_shedchanged_startpoint_difflrs_segbranch_124coefs_1e4noseg_doublemlp_best_roc_auc_score/timm_efficientnet_b5_unet_24bs_640res_qubvelaugs_rotaugs_ls005_shedchanged_startpoint_difflrs_segbranch_124coefs_1e4noseg_doublemlp_best_roc_auc_score/*.pt')
# print(ckp_names)
# chckps = [torch.load(el, map_location='cpu') for el in ckp_names]

# models_512.append(get_validation_models(
#     model_initilizer=CNNSegModel,
#     model_config={
#              "classifiier_config": {
#                 "classifier_type": "double_elu_mlp",
#                 "classes_num": 11,
#                 "hidden_classes_num": 4,
#                 "hidden_dims": 1024,
#                 "second_dropout_rate": 0.2,
#                 "first_dropout_rate": 0.3,
#             },
#             "encoder_config": {
#                 "in_channels": 3,
#                 "encoder_name": "timm-efficientnet-b5",
#                 "encoder_weights": None,
#                 "classes": 12,
#                 "activation": "sigmoid",
#                 "aux_params": dict(
#                     pooling="avg",  # one of 'avg', 'max'
#                     dropout=None,  # dropout ratio, default is None
#                     classes=4,  # define number of output labels
#                 ),
#             },
#             "encoder_type": "timm-efficientnet-b5_unet",
#             "use_taylorsoftmax": False,
#             "one_channel": True,
#             "enable_inference_mode": True
#             #"path_to_chkp": "/data/additional_data/startingpoints/densenet121_chestx.pth",
#         },
#     model_ckp_dicts=chckps,
#     device=DEVICE
# ))

# # Public 0.
# ckp_names = glob('../input/ranzcr-models/timm_efficientnet_b5_unet_24bs_640res_qubvelaugs_rotaugs_ls005_shedchanged_startpoint_difflrs_segbranch_124coefs_1e4noseg_doublemlp_swa_roc_auc_score/timm_efficientnet_b5_unet_24bs_640res_qubvelaugs_rotaugs_ls005_shedchanged_startpoint_difflrs_segbranch_124coefs_1e4noseg_doublemlp_swa_roc_auc_score/*.pt')
# print(ckp_names)
# chckps = [torch.load(el, map_location='cpu') for el in ckp_names]

# models_512.append(get_validation_models(
#     model_initilizer=CNNSegModel,
#     model_config={
#              "classifiier_config": {
#                 "classifier_type": "double_elu_mlp",
#                 "classes_num": 11,
#                 "hidden_classes_num": 4,
#                 "hidden_dims": 1024,
#                 "second_dropout_rate": 0.2,
#                 "first_dropout_rate": 0.3,
#             },
#             "encoder_config": {
#                 "in_channels": 3,
#                 "encoder_name": "timm-efficientnet-b5",
#                 "encoder_weights": None,
#                 "classes": 12,
#                 "activation": "sigmoid",
#                 "aux_params": dict(
#                     pooling="avg",  # one of 'avg', 'max'
#                     dropout=None,  # dropout ratio, default is None
#                     classes=4,  # define number of output labels
#                 ),
#             },
#             "encoder_type": "timm-efficientnet-b5_unet",
#             "use_taylorsoftmax": False,
#             "one_channel": True,
#             "enable_inference_mode": True
#             #"path_to_chkp": "/data/additional_data/startingpoints/densenet121_chestx.pth",
#         },
#     model_ckp_dicts=chckps,
#     device=DEVICE
# ))

# # Public 0.93
# ckp_names = glob('../input/ranzcr-models/densenet121_unet_52bs_640res_qubvelaugs_rotaugs_ls005_shedchanged_startpoint_difflrs_segbranch_124coefs_1e4noseg_best_roc_auc_score/densenet121_unet_52bs_640res_qubvelaugs_rotaugs_ls005_shedchanged_startpoint_difflrs_segbranch_124coefs_1e4noseg_best_roc_auc_score/*.pt')
# print(ckp_names)
# chckps = [torch.load(el, map_location='cpu') for el in ckp_names]

# models_512.append(get_validation_models(
#     model_initilizer=CNNSegModel,
#     model_config={
#             "classifiier_config": {
#                 "classifier_type": "elu",
#                 "classes_num": 11,
#                 #"hidden_classes_num": 4,
#                 "hidden_dims": 1024,
#                 "second_dropout_rate": 0.2,
#                 "first_dropout_rate": 0.3,
#             },
#             "encoder_config": {
#                 "in_channels": 3,
#                 "encoder_name": "densenet121",
#                 "encoder_weights": None,
#                 "classes": 2,
#                 "activation": "sigmoid",
#                 "aux_params": dict(
#                     pooling="avg",  # one of 'avg', 'max'
#                     dropout=None,  # dropout ratio, default is None
#                     classes=4,  # define number of output labels
#                 ),
#             },
#             "encoder_type": "densenet121_unet",
#             "use_taylorsoftmax": False,
#             "one_channel": True,
#             "enable_inference_mode": True
#             #"path_to_chkp": "/data/additional_data/startingpoints/densenet121_chestx.pth",
#         },
#     model_ckp_dicts=chckps,
#     device=DEVICE
# ))

# # Public 0.953
# ckp_names = glob('../input/ranzcr-models/timm_efficientnet_b5_unet_24bs_640res_qubvelaugs_rotaugs_ls005_shedchanged_startpoint_difflrs_segbranch_124coefs_1e4noseg_multichanneltube_mtm_swa_roc_auc_score/timm_efficientnet_b5_unet_24bs_640res_qubvelaugs_rotaugs_ls005_shedchanged_startpoint_difflrs_segbranch_124coefs_1e4noseg_multichanneltube_mtm_swa_roc_auc_score/*.pt')
# print(ckp_names)
# chckps = [torch.load(el, map_location='cpu') for el in ckp_names]

# models_512.append(get_validation_models(
#     model_initilizer=CNNSegModel,
#     model_config={
#             "classifiier_config": {
#                 "classifier_type": "elu",
#                 "classes_num": 11,
#                 "hidden_dims": 1024,
#                 "second_dropout_rate": 0.2,
#                 "first_dropout_rate": 0.3,
#             },
#             "encoder_config":{
#                 "in_channels":3,
#                 "encoder_name":'timm-efficientnet-b5', 
#                 "encoder_weights":None, 
#                 "classes":12, 
#                 "activation":'sigmoid',
#                 "aux_params":dict(
#                     pooling='avg',             # one of 'avg', 'max'
#                     dropout=None,               # dropout ratio, default is None
#                     classes=4,                 # define number of output labels
#                 )
#             },
#             "encoder_type": "timm-efficientnet-b5_unet",
#             "use_taylorsoftmax": False,
#             "one_channel": True,
#             "enable_inference_mode": True
#         },
#     model_ckp_dicts=chckps,
#     device=DEVICE
# ))

# Public 0.965
ckp_names = glob('../input/ranzcr-models/timm_efficientnet_b5_unet_24bs_640res_qubvelaugs_ls005_shedchanged_startpoint_difflrs_segbranch_113coefs_1e4noseg_bigholes_swa_roc_auc_score/timm_efficientnet_b5_unet_24bs_640res_qubvelaugs_ls005_shedchanged_startpoint_difflrs_segbranch_113coefs_1e4noseg_bigholes_swa_roc_auc_score/*.pt')
print(ckp_names)
chckps = [torch.load(el, map_location='cpu') for el in ckp_names]

models_512.append(get_validation_models(
    model_initilizer=CNNSegModel,
    model_config={
            "classifiier_config": {
                "classifier_type": "elu",
                "classes_num": 11,
                "hidden_dims": 1024,
                "second_dropout_rate": 0.2,
                "first_dropout_rate": 0.3,
            },
            "encoder_config":{
                "in_channels":3,
                "encoder_name":'timm-efficientnet-b5', 
                "encoder_weights":None, 
                "classes":2, 
                "activation":'sigmoid',
                "aux_params":dict(
                    pooling='avg',             # one of 'avg', 'max'
                    dropout=None,               # dropout ratio, default is None
                    classes=4,                 # define number of output labels
                )
            },
            "encoder_type": "timm-efficientnet-b5_unet",
            "use_taylorsoftmax": False,
            "one_channel": True,
            "enable_inference_mode": True
        },
    model_ckp_dicts=chckps,
    device=DEVICE
))

# # Public 0.963
# ckp_names = glob('../input/ranzcr-models/timm_efficientnet_b5_unet_24bs_640res_qubvelaugs_ls005_shedchanged_startpoint_difflrs_segbranch_113coefs_1e4noseg_bigholes_best_roc_auc_score/timm_efficientnet_b5_unet_24bs_640res_qubvelaugs_ls005_shedchanged_startpoint_difflrs_segbranch_113coefs_1e4noseg_bigholes_best_roc_auc_score/*.pt')
# print(ckp_names)
# chckps = [torch.load(el, map_location='cpu') for el in ckp_names]

# models_512.append(get_validation_models(
#     model_initilizer=CNNSegModel,
#     model_config={
#             "classifiier_config": {
#                 "classifier_type": "elu",
#                 "classes_num": 11,
#                 "hidden_dims": 1024,
#                 "second_dropout_rate": 0.2,
#                 "first_dropout_rate": 0.3,
#             },
#             "encoder_config":{
#                 "in_channels":3,
#                 "encoder_name":'timm-efficientnet-b5', 
#                 "encoder_weights":None, 
#                 "classes":2, 
#                 "activation":'sigmoid',
#                 "aux_params":dict(
#                     pooling='avg',             # one of 'avg', 'max'
#                     dropout=None,               # dropout ratio, default is None
#                     classes=4,                 # define number of output labels
#                 )
#             },
#             "encoder_type": "timm-efficientnet-b5_unet",
#             "use_taylorsoftmax": False,
#             "one_channel": True,
#             "enable_inference_mode": True
#         },
#     model_ckp_dicts=chckps,
#     device=DEVICE
# ))


# # Public 0.962
# ckp_names = glob('../input/ranzcr-models/timm_efficientnet_b5_unet_24bs_640res_qubvelaugs_ls005_shedchanged_startpoint_difflrs_segbranch_113coefs_best_roc_auc_score/timm_efficientnet_b5_unet_24bs_640res_qubvelaugs_ls005_shedchanged_startpoint_difflrs_segbranch_113coefs_best_roc_auc_score/*.pt')
# ckp_names = sorted(ckp_names, key=lambda x: int(x.split('_')[-1].split('.')[0]))
# print(ckp_names)
# chckps = [torch.load(el, map_location='cpu') for el in ckp_names]

# models_512.append(get_validation_models(
#     model_initilizer=CNNSegModel,
#     model_config={
#             "classifiier_config": {
#                 "classifier_type": "elu",
#                 "classes_num": 11,
#                 "hidden_dims": 1024,
#                 "second_dropout_rate": 0.2,
#                 "first_dropout_rate": 0.3,
#             },
#             "encoder_config":{
#                 "in_channels":3,
#                 "encoder_name":'timm-efficientnet-b5', 
#                 "encoder_weights":None, 
#                 "classes":2, 
#                 "activation":'sigmoid',
#                 "aux_params":dict(
#                     pooling='avg',             # one of 'avg', 'max'
#                     dropout=None,               # dropout ratio, default is None
#                     classes=4,                 # define number of output labels
#                 )
#             },
#             "encoder_type": "timm-efficientnet-b5_unet",
#             "use_taylorsoftmax": False,
#             "one_channel": True,
#             "enable_inference_mode": True
#         },
#     model_ckp_dicts=chckps,
#     device=DEVICE
# ))

# # Public 0.96
# ckp_names = glob('../input/ranzcr-models/timm_efficientnet_b5_unet_24bs_640res_qubvelaugs_ls005_shedchanged_startpoint_difflrs_segbranch_113coefs_swa_roc_auc_score/timm_efficientnet_b5_unet_24bs_640res_qubvelaugs_ls005_shedchanged_startpoint_difflrs_segbranch_113coefs_swa_roc_auc_score/*.pt')
# ckp_names = sorted(ckp_names, key=lambda x: int(x.split('_')[-1].split('.')[0]))
# print(ckp_names)
# chckps = [torch.load(el, map_location='cpu') for el in ckp_names]

# models_512.append(get_validation_models(
#     model_initilizer=CNNSegModel,
#     model_config={
#             "classifiier_config": {
#                 "classifier_type": "elu",
#                 "classes_num": 11,
#                 "hidden_dims": 1024,
#                 "second_dropout_rate": 0.2,
#                 "first_dropout_rate": 0.3,
#             },
#             "encoder_config":{
#                 "in_channels":3,
#                 "encoder_name":'timm-efficientnet-b5', 
#                 "encoder_weights":None, 
#                 "classes":2, 
#                 "activation":'sigmoid',
#                 "aux_params":dict(
#                     pooling='avg',             # one of 'avg', 'max'
#                     dropout=None,               # dropout ratio, default is None
#                     classes=4,                 # define number of output labels
#                 )
#             },
#             "encoder_type": "timm-efficientnet-b5_unet",
#             "use_taylorsoftmax": False,
#             "one_channel": True,
#             "enable_inference_mode": True
#         },
#     model_ckp_dicts=chckps,
#     device=DEVICE
# ))

# # Public 0.951
# ckp_names = glob('../input/ranzcr-models/tf_efficientnet_b3_ns_20bs_512res_specialaugs_ls005_shedchanged_swa_roc_auc_score/tf_efficientnet_b3_ns_20bs_512res_specialaugs_ls005_shedchanged_swa_roc_auc_score/*.pt')
# ckp_names = sorted(ckp_names, key=lambda x: int(x.split('_')[-1].split('.')[0]))
# print(ckp_names)
# chckps = [torch.load(el, map_location='cpu') for el in ckp_names]

# models_512.append(get_validation_models(
#     model_initilizer=CNNModel,
#     model_config={
#             "classifiier_config": {
#                 "classifier_type": "elu",
#                 "classes_num": 11,
#                 "hidden_dims": 1024,
#                 "second_dropout_rate": 0.2,
#                 "first_dropout_rate": 0.3,
#             },
#             "encoder_type": "tf_efficientnet_b3_ns",
#             "use_taylorsoftmax": False
#         },
#     model_ckp_dicts=chckps,
#     device=DEVICE
# ))

# # Public 0.955
# ckp_names = glob('../input/ranzcr-models/rexnet_200_18bs_512res_specialaugs_ls005_shedchanged_swa_roc_auc_score/rexnet_200_18bs_512res_specialaugs_ls005_shedchanged_swa_roc_auc_score/*.pt')
# ckp_names = sorted(ckp_names, key=lambda x: int(x.split('_')[-1].split('.')[0]))
# print(ckp_names)
# chckps = [torch.load(el, map_location='cpu') for el in ckp_names]

# models_512.append(get_validation_models(
#     model_initilizer=CNNModel,
#     model_config={
#             "classifiier_config": {
#                 "classifier_type": "elu",
#                 "classes_num": 11,
#                 "hidden_dims": 1024,
#                 "second_dropout_rate": 0.2,
#                 "first_dropout_rate": 0.3,
#             },
#             "encoder_type": "rexnet_200",
#             "use_taylorsoftmax": False
#         },
#     model_ckp_dicts=chckps,
#     device=DEVICE
# ))

# # Public 0.95
# ckp_names = glob('../input/ranzcr-models/tf_efficientnet_b5_ns_32bs_640res_qubvelaugs_ls005_shedchanged_smallerler_swa_roc_auc_score/tf_efficientnet_b5_ns_32bs_640res_qubvelaugs_ls005_shedchanged_smallerler_swa_roc_auc_score/*.pt')
# ckp_names = sorted(ckp_names, key=lambda x: int(x.split('_')[-1].split('.')[0]))
# print(ckp_names)
# chckps = [torch.load(el, map_location='cpu') for el in ckp_names]

# models_512.append(get_validation_models(
#     model_initilizer=CNNModel,
#     model_config={
#             "classifiier_config": {
#                 "classifier_type": "elu",
#                 "classes_num": 11,
#                 "hidden_dims": 1024,
#                 "second_dropout_rate": 0.2,
#                 "first_dropout_rate": 0.3,
#             },
#             "encoder_type": "tf_efficientnet_b5_ns",
#             "use_taylorsoftmax": False,
#             "one_channel": True,
#         },
#     model_ckp_dicts=chckps,
#     device=DEVICE
# ))

# # Public 0.958
# ckp_names = glob('../input/ranzcr-models/tf_efficientnet_b5_ns_32bs_640res_qubvelaugs_ls005_shedchanged_startpoint_difflrs_swa_roc_auc_score/tf_efficientnet_b5_ns_32bs_640res_qubvelaugs_ls005_shedchanged_startpoint_difflrs_swa_roc_auc_score/*.pt')
# ckp_names = sorted(ckp_names, key=lambda x: int(x.split('_')[-1].split('.')[0]))
# print(ckp_names)
# chckps = [torch.load(el, map_location='cpu') for el in ckp_names]

# models_512.append(get_validation_models(
#     model_initilizer=CNNModel,
#     model_config={
#             "classifiier_config": {
#                 "classifier_type": "elu",
#                 "classes_num": 11,
#                 "hidden_dims": 1024,
#                 "second_dropout_rate": 0.2,
#                 "first_dropout_rate": 0.3,
#             },
#             "encoder_type": "tf_efficientnet_b5_ns",
#             "use_taylorsoftmax": False,
#             "one_channel": True,
#         },
#     model_ckp_dicts=chckps,
#     device=DEVICE
# ))

# # Public 0.955
# ckp_names = glob('../input/ranzcr-models/rexnet_200_32bs_512res_fmix_specialaugs_ls005_shedchanged_swa_roc_auc_score/rexnet_200_32bs_512res_fmix_specialaugs_ls005_shedchanged_swa_roc_auc_score/*.pt')
# ckp_names = sorted(ckp_names, key=lambda x: int(x.split('_')[-1].split('.')[0]))
# print(ckp_names)
# chckps = [torch.load(el, map_location='cpu') for el in ckp_names]

# models_512.append(get_validation_models(
#     model_initilizer=CNNModel,
#     model_config={
#             "classifiier_config": {
#                 "classifier_type": "elu",
#                 "classes_num": 11,
#                 "hidden_dims": 1024,
#                 "second_dropout_rate": 0.2,
#                 "first_dropout_rate": 0.3,
#             },
#             "encoder_type": "rexnet_200",
#             "use_taylorsoftmax": False
#         },
#     model_ckp_dicts=chckps,
#     device=DEVICE
# ))

# # Public 0.957
# ckp_names = glob('../input/ranzcr-models/resnet200d_40bs_640res_qubvelaugs_ls005_shedchanged_startpoint_difflrs_swa_roc_auc_score/resnet200d_40bs_640res_qubvelaugs_ls005_shedchanged_startpoint_difflrs_swa_roc_auc_score/*.pt')
# ckp_names = sorted(ckp_names, key=lambda x: int(x.split('_')[-1].split('.')[0]))
# print(ckp_names)
# chckps = [torch.load(el, map_location='cpu') for el in ckp_names]

# models_512.append(get_validation_models(
#     model_initilizer=CNNModel,
#     model_config={
#             "classifiier_config": {
#                 "classifier_type": "elu",
#                 "classes_num": 11,
#                 "hidden_dims": 1024,
#                 "second_dropout_rate": 0.2,
#                 "first_dropout_rate": 0.3,
#             },
#             "encoder_type": "resnet200d",
#             "use_taylorsoftmax": False,
#             "one_channel": True,
#             #"path_to_chkp": "/data/additional_data/startingpoints/resnet200d_320_chestx.pth"
#         },
#     model_ckp_dicts=chckps,
#     device=DEVICE
# ))

# # Public 0.955
# ckp_names = glob('../input/ranzcr-models/rexnet_200_32bs_512res_cutmix_qubvelaug_noblur_ls005_nocutout_shedchanged_swa_roc_auc_score/rexnet_200_32bs_512res_cutmix_qubvelaug_noblur_ls005_nocutout_shedchanged_swa_roc_auc_score/*.pt')
# ckp_names = sorted(ckp_names, key=lambda x: int(x.split('_')[-1].split('.')[0]))
# print(ckp_names)
# chckps = [torch.load(el, map_location='cpu') for el in ckp_names]

# models_512.append(get_validation_models(
#     model_initilizer=CNNModel,
#     model_config={
#             "classifiier_config": {
#                 "classifier_type": "elu",
#                 "classes_num": 11,
#                 "hidden_dims": 1024,
#                 "second_dropout_rate": 0.2,
#                 "first_dropout_rate": 0.3,
#             },
#             "encoder_type": "rexnet_200",
#             "use_taylorsoftmax": False
#         },
#     model_ckp_dicts=chckps,
#     device=DEVICE
# ))

# Val Predict

In [None]:
if not SKIP_VAL:

    val_dfs = [
        train.iloc[split[i][1]] for i in range(5)
    ]

    val_loaders = create_val_loaders(
        loader_initilizer=RANZCRDataset,
        loader_config={
               "root":'train_images_512_512',
               "path_col": "StudyInstanceUID",
               "ext": ".jpeg",
               "transforms":T.ToTensor()
                },
        dfs=val_dfs,
        batch_size=32
    )
    
    train_logits = predict_over_all_train(
        val_loaders,
        models_512[0],
        cnn_model_predict,
        DEVICE
    )
    
    val_dfs = pd.concat(val_dfs).reset_index(drop=True)

    oof_score = roc_auc_score(val_dfs.iloc[:,1:-1], train_logits)

    print(f"OOF score : {oof_score}")

# Predict Test

In [None]:
def predict_test_with_multiple_models(
    my_models: List[List[torch.nn.Module]],
    my_loaders: List[torch.utils.data.DataLoader],
    predict_func: Callable,
    device: str,
):
    logits = []
    
    # TTA loop
    for my_loader in my_loaders:
        temp_logits = []
        
        # Loader loop
        for batch in tqdm(my_loader):
            temp_logits_inner = []
            
            # Experiments loop
            for exp_models in my_models:
                logit = np.stack(
                    # CV loop
                    [predict_func(batch, m, device) for m in exp_models], axis=0
                )#.mean(0)
                temp_logits_inner.append(logit)
                
            temp_logits.append(np.stack(temp_logits_inner, axis=0))

        logits.append(np.concatenate(temp_logits, axis=2))

    return np.stack(logits, axis=0)

## 512 res

In [None]:
INF_BS = 32

In [None]:
all_test_loaders_512 = []

test_original = RANZCRDataset(**{
    "df":sample_sub,
    "root":'test_images_512_512',
    "path_col": "StudyInstanceUID",
   "ext": ".jpeg",
   "transforms":T.ToTensor()
})

all_test_loaders_512.append(torch.utils.data.DataLoader(
    test_original,
    batch_size=INF_BS,
    drop_last=False,
    shuffle=False,
    num_workers=os.cpu_count() // 2
))

test_hf = RANZCRDataset(**{
    "df":sample_sub,
    "root":'test_images_512_512',
    "path_col": "StudyInstanceUID",
   "ext": ".jpeg",
   "transforms":T.ToTensor(),
    "augmentations": albu.HorizontalFlip(p=1.0)
})

all_test_loaders_512.append(torch.utils.data.DataLoader(
    test_hf,
    batch_size=INF_BS,
    drop_last=False,
    shuffle=False,
    num_workers=os.cpu_count() // 2
))

# test_tp = RANZCRDataset(**{
#     "df":sample_sub,
#     "root":'test_images_512_512',
#     "path_col": "StudyInstanceUID",
#    "ext": ".jpeg",
#    "transforms":T.ToTensor(),
#     "augmentations": albu.Transpose(p=1.0)
# })

# all_test_loaders_512.append(torch.utils.data.DataLoader(
#     test_tp,
#     batch_size=INF_BS,
#     drop_last=False,
#     shuffle=False,
#     num_workers=os.cpu_count() // 2
# ))

# test_tp_vf = RANZCRDataset(**{
#     "df":sample_sub,
#     "root":'test_images_512_512',
#     "path_col": "StudyInstanceUID",
#    "ext": ".jpeg",
#    "transforms":T.ToTensor(),
#     "augmentations": albu.Compose([albu.Transpose(p=1.0), albu.VerticalFlip(p=1.0)])
# })

# all_test_loaders_512.append(torch.utils.data.DataLoader(
#     test_tp_vf,
#     batch_size=INF_BS,
#     drop_last=False,
#     shuffle=False,
#     num_workers=os.cpu_count() // 2
# ))

# test_tp_hf = RANZCRDataset(**{
#     "df":sample_sub,
#     "root":'test_images_512_512',
#     "path_col": "StudyInstanceUID",
#    "ext": ".jpeg",
#    "transforms":T.ToTensor(),
#     "augmentations": albu.Compose([albu.Transpose(p=1.0), albu.HorizontalFlip(p=1.0)])
# })

# all_test_loaders_512.append(torch.utils.data.DataLoader(
#     test_tp_hf,
#     batch_size=INF_BS,
#     drop_last=False,
#     shuffle=False,
#     num_workers=os.cpu_count() // 2
# ))


# test_vf = RANZCRDataset(**{
#     "df":sample_sub,
#     "root":'test_images_512_512',
#     "path_col": "StudyInstanceUID",
#    "ext": ".jpeg",
#    "transforms":T.ToTensor(),
#     "augmentations": albu.VerticalFlip(p=1.0)
# })

# all_test_loaders_512.append(torch.utils.data.DataLoader(
#     test_vf,
#     batch_size=INF_BS,
#     drop_last=False,
#     shuffle=False,
#     num_workers=os.cpu_count() // 2
# ))

# test_hf_vf = RANZCRDataset(**{
#     "df":sample_sub,
#     "root":'test_images_512_512',
#     "path_col": "StudyInstanceUID",
#    "ext": ".jpeg",
#    "transforms":T.ToTensor(),
#     "augmentations": albu.Compose([albu.HorizontalFlip(p=1.0), albu.VerticalFlip(p=1.0)])
# })

# all_test_loaders_512.append(torch.utils.data.DataLoader(
#     test_hf_vf,
#     batch_size=INF_BS,
#     drop_last=False,
#     shuffle=False,
#     num_workers=os.cpu_count() // 2
# ))

test_left_rot = RANZCRDataset(**{
    "df":sample_sub,
    "root":'test_images_512_512',
    "path_col": "StudyInstanceUID",
   "ext": ".jpeg",
   "transforms":T.ToTensor(),
    "augmentations": albu.ShiftScaleRotate(
        shift_limit=0,
        scale_limit=0,
        rotate_limit=(15,16),
        p=1.0
    )
})

all_test_loaders_512.append(torch.utils.data.DataLoader(
    test_left_rot,
    batch_size=INF_BS,
    drop_last=False,
    shuffle=False,
    num_workers=os.cpu_count() // 2
))

test_right_rot = RANZCRDataset(**{
    "df":sample_sub,
    "root":'test_images_512_512',
    "path_col": "StudyInstanceUID",
   "ext": ".jpeg",
   "transforms":T.ToTensor(),
    "augmentations": albu.ShiftScaleRotate(
        shift_limit=0,
        scale_limit=0,
        rotate_limit=(-16,-15),
        p=1.0
    )
})

all_test_loaders_512.append(torch.utils.data.DataLoader(
    test_right_rot,
    batch_size=INF_BS,
    drop_last=False,
    shuffle=False,
    num_workers=os.cpu_count() // 2
))

In [None]:
test_logits_512 = predict_test_with_multiple_models(
    models_512,
    all_test_loaders_512,
    cnn_model_predict,
    DEVICE
)


# Blend and create submission.csv

In [None]:
def to_rank(input):
    return pd.DataFrame(input).rank().values

In [None]:
from scipy.special import expit

In [None]:
test_logits_512.shape

In [None]:
test_logits = expit(test_logits_512).mean(0).mean(1) # by TTA and by CV by Exp

In [None]:
my_exp_1 = test_logits[0]
my_exp_2 = test_logits[1]
my_exp_3 = test_logits[2]
my_exp_4 = test_logits[3]

In [None]:
my_exp_1.shape

In [None]:
blend = (
#     public_pred_1**0.5 +  
#     public_pred_3**0.5 +
#     (public_pred_2**0.5) * 2 + 
    
    my_exp_1**0.5 + 
    my_exp_2**0.5 +
    my_exp_3**0.5 +
    my_exp_3**0.5
) 

In [None]:
sample_sub.iloc[:,1:] = blend

#sample_sub.iloc[:,1:] = result_512

sample_sub

In [None]:
sample_sub.nunique(axis=0)

In [None]:
!rm -rf test_images_512_512

sample_sub.to_csv('submission.csv', index=False)

os.listdir('./')