Train Kernel Version Info:
* V1: fold0 {CV=0.9504 LB=0.957}
* V4: fold1 {CV=0.9522 LB=0.955}      
* V6: fold2 {CV=0.9486 LB=0.955}
* V7: fold3 {CV=0.9520 LB=0.960}      
* V8: fold4 {CV=0.9464 LB=0.956}

# Submit Info
* V1: mean of 5 folds results      
    LB=0.962         
* V2: res^0.5 mean from:[https://www.kaggle.com/c/ranzcr-clip-catheter-line-classification/discussion/211194](http://)      
    LB=0.961
* V3: weighted mean(CV scores)       
    LB=0.962
* V6: best_score_model       
    LB=0.962
* V7: focal+mctr model; test without mctr                 
    LB=

# Directory settings

In [None]:
# ====================================================
# Directory settings
# ====================================================
import os

OUTPUT_DIR = './'
if not os.path.exists(OUTPUT_DIR):
    os.makedirs(OUTPUT_DIR)

TEST_PATH = '../input/ranzcr-clip-catheter-line-classification/test'

# CFG

In [None]:
# ====================================================
# CFG
# ====================================================
class CFG:
    seed=35
    trn_fold=[[0, 1, 2, 3, 4]] # [0, 1, 2, 3, 4]
#     model_path = ['../input/ranzcr3stepsmodelweightsv0fold0/fold0/resnet200d_fold0_best_loss_cpu.pth',
#                   '../input/ranzcr3stepsmodelweightsv0fold12/fold1/resnet200d_fold1_best_loss.pth',
#                   '../input/ranzcr3stepsmodelweightsv0fold12/fold2/resnet200d_fold2_best_loss.pth',
#                   '../input/ranzcr3stepsmodelweightsv0fold3/fold3/resnet200d_fold3_best_loss.pth',
#                   '../input/ranzcr3stepsmodelweightsv0fold34/fold4/resnet200d_fold4_best_loss.pth',
#                  ]
#     model_path = ['../input/ranzcr3stepsmodelweightsv0fold0/fold0/resnet200d_fold0_best_score_cpu.pth',
#                   '../input/ranzcr3stepsmodelweightsv0fold12/fold1/resnet200d_fold1_best_score.pth',
#                   '../input/ranzcr3stepsmodelweightsv0fold12/fold2/resnet200d_fold2_best_score.pth',
#                   '../input/ranzcr3stepsmodelweightsv0fold3/fold3/resnet200d_fold3_best_score.pth',
#                   '../input/ranzcr3stepsmodelweightsv0fold34/fold4/resnet200d_fold4_best_score.pth',
#                  ]
    model_path = ['../input/ranzcrresnet200dfocalmctrmodelf0/fold0resnet200d_fold0_best_score.pth',
                  '../input/ranzcrresnet200dfocalmctrmodelf1/fold1resnet200d_fold1_best_score.pth',
                  '../input/ranzcrresnet200dfocalmctrmodelf2/fold2resnet200d_fold2_best_score.pth',
                  '../input/ranzcrresnet200dfocalmctrmodelf3/fold3resnet200d_fold3_best_score.pth',
                  '../input/ranzcrresnet200dfocalmctrmodelf4/fold4resnet200d_fold4_best_score.pth',
                 ]
#     oof_path = ['../input/ranzcr3stepsmodelweightsv0fold0/fold0/oof_df.csv',
#                '../input/ranzcr3stepsmodelweightsv0fold12/fold1/oof_df.csv',
#                '../input/ranzcr3stepsmodelweightsv0fold12/fold2/oof_df.csv',
#                '../input/ranzcr3stepsmodelweightsv0fold3/fold3/oof_df.csv',
#                '../input/ranzcr3stepsmodelweightsv0fold34/fold4/oof_df.csv',
#                ]
        
    debug=False
    num_workers=4
    model_name='resnet200d'
    size=512
    batch_size=128
    
    target_size=11
    target_cols=['ETT - Abnormal', 'ETT - Borderline', 'ETT - Normal',
                 'NGT - Abnormal', 'NGT - Borderline', 'NGT - Incompletely Imaged', 'NGT - Normal', 
                 'CVC - Abnormal', 'CVC - Borderline', 'CVC - Normal',
                 'Swan Ganz Catheter Present']
    n_fold=5
   

# Library

In [None]:
# ====================================================
# Library
# ====================================================
import sys
sys.path.append('../input/pytorch-image-models/pytorch-image-models-master')
sys.path.insert(0,'../input/timm-nfnet')
import timm

import os
import math
import time
import random
import shutil
from pathlib import Path
from contextlib import contextmanager
from collections import defaultdict, Counter

import scipy as sp
import numpy as np
import pandas as pd

from sklearn import preprocessing
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import StratifiedKFold, GroupKFold, KFold

from tqdm.auto import tqdm
from functools import partial

import cv2
from PIL import Image

from matplotlib import pyplot as plt

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam, SGD
import torchvision.models as models
from torch.nn.parameter import Parameter
from torch.utils.data import DataLoader, Dataset
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts, CosineAnnealingLR, ReduceLROnPlateau

from albumentations import (
    Compose, OneOf, Normalize, Resize, RandomResizedCrop, RandomCrop, HorizontalFlip, VerticalFlip, 
    RandomBrightness, RandomContrast, RandomBrightnessContrast, Rotate, ShiftScaleRotate, Cutout, 
    IAAAdditiveGaussianNoise, Transpose
    )
from albumentations.pytorch import ToTensorV2
from albumentations import ImageOnlyTransform

from torch.cuda.amp import autocast, GradScaler

import warnings 
warnings.filterwarnings('ignore')

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

# Utils

In [None]:
# ====================================================
# Utils
# ====================================================
def get_score(y_true, y_pred):
    scores = []
    for i in range(y_true.shape[1]):
        score = roc_auc_score(y_true[:,i], y_pred[:,i])
        scores.append(score)
    avg_score = np.mean(scores)
    return avg_score, scores


def get_result(result_df):
    preds = result_df[[f'pred_{c}' for c in CFG.target_cols]].values
    labels = result_df[CFG.target_cols].values
    score, scores = get_score(labels, preds)
    LOGGER.info(f'Score: {score:<.4f}  Scores: {np.round(scores, decimals=4)}')


@contextmanager
def timer(name):
    t0 = time.time()
    LOGGER.info(f'[{name}] start')
    yield
    LOGGER.info(f'[{name}] done in {time.time() - t0:.0f} s.')


def init_logger(log_file=OUTPUT_DIR+'inference.log'):
    from logging import getLogger, INFO, FileHandler,  Formatter,  StreamHandler
    logger = getLogger(__name__)
    logger.setLevel(INFO)
    handler1 = StreamHandler()
    handler1.setFormatter(Formatter("%(message)s"))
    handler2 = FileHandler(filename=log_file)
    handler2.setFormatter(Formatter("%(message)s"))
    logger.addHandler(handler1)
    logger.addHandler(handler2)
    return logger

LOGGER = init_logger()


def seed_torch(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True

seed_torch(seed=CFG.seed)

# Data Loading

In [None]:
# oof_df=pd.DataFrame()
# for single_oof_path in CFG.oof_path:
#     temp_oof_df = pd.read_csv(single_oof_path)
#     oof_df = pd.concat([temp_oof_df,oof_df])

# oof_df.columns

In [None]:
# oof_folds_score = 0
# for fold in range(5):
#     fold_oof_df = oof_df[oof_df['fold']==fold].reset_index(drop=True)
#     LOGGER.info(f"========== fold: {fold} result ==========")
#     get_result(fold_oof_df)
        
#     preds = fold_oof_df[[f'pred_{c}' for c in CFG.target_cols]].values
#     labels = fold_oof_df[CFG.target_cols].values
#     score,_= get_score(labels, preds)
#     oof_folds_score+=score/5
# print('CV score of 5folds avg = {}',(oof_folds_score))

In [None]:
if CFG.debug:
    test = pd.read_csv('../input/ranzcr-clip-catheter-line-classification/sample_submission.csv', nrows=10)
else:
    test = pd.read_csv('../input/ranzcr-clip-catheter-line-classification/sample_submission.csv')

print(test.shape)
test.head()

# Transforms

In [None]:
# ====================================================
# Transforms
# ====================================================
def get_transforms(*, data):
    
    if data == 'train':
        return Compose([
            #Resize(CFG.size, CFG.size),
            RandomResizedCrop(CFG.size, CFG.size, scale=(0.85, 1.0)),
            HorizontalFlip(p=0.5),
            RandomBrightnessContrast(p=0.2, brightness_limit=(-0.2, 0.2), contrast_limit=(-0.2, 0.2)),
            HueSaturationValue(p=0.2, hue_shift_limit=0.2, sat_shift_limit=0.2, val_shift_limit=0.2),
            ShiftScaleRotate(p=0.2, shift_limit=0.0625, scale_limit=0.2, rotate_limit=20),
            CoarseDropout(p=0.2),
            Cutout(p=0.2, max_h_size=16, max_w_size=16, fill_value=(0., 0., 0.), num_holes=16),
            Normalize(
                mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225],
            ),
            ToTensorV2(),
        ])

    elif data == 'valid':
        return Compose([
            Resize(CFG.size, CFG.size),
            Normalize(
                mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225],
            ),
            ToTensorV2(),
        ])

# Dataset

In [None]:
# ====================================================
# Dataset
# ====================================================
class mctrTestDataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df
        self.file_names = df['StudyInstanceUID'].values
        self.transform = transform
        
    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        file_name = self.file_names[idx]
        file_path = f'{TEST_PATH}/{file_name}.jpg'
        image = cv2.imread(file_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        if self.transform:
            augmented = self.transform(image=image)
            image = augmented['image']
           
            
        return image


class TestDataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df
        self.file_names = df['StudyInstanceUID'].values
        self.transform = transform
        
    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        file_name = self.file_names[idx]
        file_path = f'{TEST_PATH}/{file_name}.jpg'
        image = cv2.imread(file_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        if self.transform:
            augmented = self.transform(image=image)
            image = augmented['image']
        return image

In [None]:
train_dataset = TestDataset(test, transform=get_transforms(data='valid'))

for i in range(1):
    image = train_dataset[i]
    plt.imshow(image[0])
    plt.show()
    plt.imshow(image[0].flip(-1))
    plt.show()

# MODEL

In [None]:
# ====================================================
# MODEL
# ====================================================
class CustomResNet200D(nn.Module):
    def __init__(self, model_name='resnet200d_320', pretrained=False):
        super().__init__()
        self.model = timm.create_model(model_name, pretrained=False)
        if pretrained:
            pretrained_path = '../input/resnet200d-pretrained-weight/resnet200d_ra2-bdba9bf9.pth'
            self.model.load_state_dict(torch.load(pretrained_path))
            print(f'load {model_name} pretrained model')
        n_features = self.model.fc.in_features
        self.model.global_pool = nn.Identity()
        self.model.fc = nn.Identity()
        self.pooling = nn.AdaptiveAvgPool2d(1)
        self.fc = nn.Linear(n_features, CFG.target_size)

    def forward(self, x):
        bs = x.size(0)
        features = self.model(x)
        pooled_features = self.pooling(features).view(bs, -1)
        output = self.fc(pooled_features)
        return output

# Helper functions

In [None]:
# ====================================================
# Helper functions
# ====================================================
def mctrinference(models, test_loader, device):
    tk0 = tqdm(enumerate(test_loader), total=len(test_loader))
    probs = []
    for i, (images) in tk0:
        images = images.to(device)
        avg_preds = []
        for model in models:
            with torch.no_grad():
                y_preds1 = model(images)
                y_preds2 = model(images.flip(-1))
            y_preds = (y_preds1.sigmoid().to('cpu').numpy() + y_preds2.sigmoid().to('cpu').numpy()) / 2
            avg_preds.append(y_preds)
        avg_preds = np.mean(avg_preds, axis=0)
        probs.append(avg_preds)
    probs = np.concatenate(probs)
    return probs


def inference(models, test_loader, device):
    tk0 = tqdm(enumerate(test_loader), total=len(test_loader))
    probs = []
    for i, (images) in tk0:
        images = images.to(device)
        avg_preds = []
        for model in models:
#             print('finish one model')
            with torch.no_grad():
                y_preds1 = model(images)
                y_preds2 = model(images.flip(-1))
            y_preds = (y_preds1.sigmoid().to('cpu').numpy() + y_preds2.sigmoid().to('cpu').numpy()) / 2
            avg_preds.append(y_preds)
        avg_preds = np.mean(avg_preds, axis=0)
        probs.append(avg_preds)
    probs = np.concatenate(probs)
    return probs

# def inference(models, test_loader, device):
#     tk0 = tqdm(enumerate(test_loader), total=len(test_loader))
#     probs = []
#     weights=[0.9504,0.9522,0.9486,0.9520,0.9464] 
    
#     for i, (images) in tk0:
#         images = images.to(device)
#         avg_preds = []
#         for j,model in enumerate(models):
#             with torch.no_grad():
#                 y_preds1 = model(images)
#                 y_preds2 = model(images.flip(-1))
#             y_preds = (y_preds1.sigmoid().to('cpu').numpy() + y_preds2.sigmoid().to('cpu').numpy()) / 2
            
#             y_preds = y_preds * weights[j]
            
#             avg_preds.append(y_preds)            
#         avg_preds = np.sum(avg_preds, axis=0) / (0.9504+0.9522+0.9486+0.9520+0.9464)
#         probs.append(avg_preds)
#     probs = np.concatenate(probs)
#     return probs

# inference

In [None]:
%%time
models = []
for fold in range(5):#CFG.trn_fold:
    model = CustomResNet200D(CFG.model_name, pretrained=False)
    model_path = CFG.model_path[fold] # '../input/ranzcr-resnet200d-3-stage-training-step3/resnet200d_fold0_best_loss_cpu.pth'
    model.load_state_dict(torch.load(model_path,map_location=device)['model'])
    model.eval()
    models.append(model.to(device))

In [None]:
%%time
test_dataset = TestDataset(test, transform=get_transforms(data='valid'))
test_loader = DataLoader(test_dataset, batch_size=CFG.batch_size, shuffle=False, 
                         num_workers=CFG.num_workers, pin_memory=True)
predictions = inference(models, test_loader, device)

In [None]:
# submission
test[CFG.target_cols] = predictions
test[['StudyInstanceUID'] + CFG.target_cols].to_csv(OUTPUT_DIR+'submission.csv', index=False)
test.head()