## About this notebook
- Ensemble
- Timm ResNet200d - Size 512
- Timm EfficientNetB5_ns - Size 640
- Average(Multi-label stratified k-Fold num=5)
- weighted average

## Library

In [1]:
# ====================================================
# Library
# ====================================================
import sys
sys.path.append('../input/pytorch-image-models/pytorch-image-models-master')

import os
import ast
import copy
import gc
import math
import time
import random
import shutil
from pathlib import Path
from contextlib import contextmanager
from collections import defaultdict, Counter

import scipy as sp
from scipy.sparse import coo_matrix
import numpy as np
import pandas as pd

from sklearn import preprocessing
from sklearn.metrics import roc_auc_score
from sklearn.utils import check_random_state
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold, GroupKFold, KFold

from tqdm.auto import tqdm
from functools import partial

import cv2
from PIL import Image

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam, SGD
import torchvision.models as models
from torch.nn.parameter import Parameter
from torch.utils.data import DataLoader, Dataset
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts, CosineAnnealingLR, ReduceLROnPlateau

from albumentations import (
    Compose, OneOf, Normalize, Resize, RandomResizedCrop, RandomCrop, HorizontalFlip, VerticalFlip, 
    RandomBrightness, RandomContrast, RandomBrightnessContrast, Rotate, ShiftScaleRotate, Cutout, 
    IAAAdditiveGaussianNoise, Transpose,
    MotionBlur, MedianBlur, GaussianBlur,
    CLAHE, RandomGamma, OpticalDistortion, HueSaturationValue, RGBShift, ToGray
    
    )
from albumentations.pytorch import ToTensorV2,ToTensor
from albumentations import ImageOnlyTransform

import timm

from torch.cuda.amp import autocast, GradScaler

import warnings 
warnings.filterwarnings('ignore')

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('GPU:',device)

GPU: cuda


## Config

In [2]:
# ====================================================
# CFG
# ====================================================
class CFG:
    debug=False
    print_freq=100
    
    num_workers=4
    model_name_1 = 'resnet200d'
    model_name_2 = 'tf_efficientnet_b5_ns'#'resnext50_32x4d'
    
    size_1=512
    size_2=640 #512
    
    scheduler='CosineAnnealingLR' 
    # ['ReduceLROnPlateau', 'CosineAnnealingLR', 'CosineAnnealingWarmRestarts']
    
    epochs=6
    #factor=0.2 # ReduceLROnPlateau
    #patience=4 # ReduceLROnPlateau
    #eps=1e-6 # ReduceLROnPlateau
    T_max=6 # CosineAnnealingLR
    #T_0=6 # CosineAnnealingWarmRestarts
    lr=1e-4
    min_lr=1e-6
    
    weight_decay=1e-6
    gradient_accumulation_steps=1
    max_grad_norm=1000
    
    batch_size=64
    
    seed=42
    
    target_size=11
    
    target_cols=['ETT - Abnormal',
                 'ETT - Borderline',
                 'ETT - Normal',
                 'NGT - Abnormal',
                 'NGT - Borderline',
                 'NGT - Incompletely Imaged',
                 'NGT - Normal', 
                 'CVC - Abnormal',
                 'CVC - Borderline',
                 'CVC - Normal',
                 'Swan Ganz Catheter Present']
    
    trn_fold=[0, 1, 2, 3, 4]
    n_fold=len(trn_fold)
    
    train=True
    annotation=False
    Filter=False
    
if CFG.debug:
    CFG.epochs = 1
    train = train.sample(n=100, random_state=CFG.seed).reset_index(drop=True)

In [3]:
def seed_torch(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True

seed_torch(seed=CFG.seed)

In [4]:
TEST_PATH = '../input/ranzcr-clip-catheter-line-classification/test'
MODEL_PATH_1 = '../input/resnet200d-512x512/'
MODEL_PATH_2 = '../input/effb5-ns-exp007/'

In [5]:
test = pd.read_csv('../input/ranzcr-clip-catheter-line-classification/sample_submission.csv')

In [6]:
class TestDataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df
        self.file_names = df['StudyInstanceUID'].values
        self.transform = transform
        
    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        file_name = self.file_names[idx]
        file_path = f'{TEST_PATH}/{file_name}.jpg'
        image = cv2.imread(file_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        
        if self.transform:
            augmented = self.transform(image=image)
            image = augmented['image']
            
        return image

In [7]:
def get_transforms_1():
        return Compose([
            Resize(CFG.size_1, CFG.size_1),
            Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225],),
            ToTensorV2(),
        ])
    
def get_transforms_2():
        return Compose([
            Resize(CFG.size_2, CFG.size_2),
            Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225],),
            ToTensorV2(),
        ])

In [8]:
class ResNet(nn.Module):
    def __init__(self, model_name='resnext50_32x4d', pretrained=False):
        super().__init__()
        self.model = timm.create_model(model_name, pretrained=pretrained)
        n_features = self.model.fc.in_features
        self.model.fc = nn.Linear(n_features, CFG.target_size)
        
    def forward(self, x):
        x = self.model(x)
        return x

class EffB5_ns(nn.Module):
    def __init__(self, model_name='tf_efficientnet_b5_ns', pretrained=False):
        super().__init__()
        self.model = timm.create_model(model_name, pretrained=pretrained)
        n_features = self.model.classifier.in_features
        self.model.classifier = nn.Linear(n_features, CFG.target_size)
        
    def forward(self, x):
        x = self.model(x)
        return x

In [9]:
def load_state(model,model_path):
    
    try:  # single GPU model_file
        model.load_state_dict(torch.load(model_path)['model'], strict=True)
        state_dict = torch.load(model_path)['model']
        
    except:  # multi GPU model_file
        state_dict = torch.load(model_path)['model']
        state_dict = {k[7:] if k.startswith('module.') else k: state_dict[k] for k in state_dict.keys()}

    return state_dict

In [10]:
def inference(model, states, test_loader, device):
    model.to(device)
    
    tk0 = tqdm(enumerate(test_loader), total=len(test_loader))
    probs = []
    
    for i, (images) in tk0:
        images = images.to(device)
        avg_preds = []
        for state in states:
            model.load_state_dict(state)
            model.eval()
            with torch.no_grad():
                y_preds = model(images)
                
            avg_preds.append(y_preds.sigmoid().to('cpu').numpy())
        avg_preds = np.mean(avg_preds, axis=0)
        probs.append(avg_preds)
    probs = np.concatenate(probs)
    return probs

In [11]:
model1 = ResNet(CFG.model_name_1, pretrained=False)
states1 = [load_state(model1,
                      MODEL_PATH_1+f'{CFG.model_name_1}_fold{fold}_best.pth') for fold in CFG.trn_fold]

model2 = EffB5_ns(CFG.model_name_2, pretrained=False)
states2 = [load_state(model2,
                      MODEL_PATH_2+f'{CFG.model_name_2}_fold{fold}_best.pth') for fold in CFG.trn_fold]

In [12]:
test_dataset_1 = TestDataset(test, transform=get_transforms_1())

test_loader_1 = DataLoader(test_dataset_1, batch_size=CFG.batch_size, shuffle=False, 
                           num_workers=CFG.num_workers , pin_memory=True)

test_dataset_2 = TestDataset(test, transform=get_transforms_2())

test_loader_2 = DataLoader(test_dataset_2, batch_size=CFG.batch_size, shuffle=False, 
                           num_workers=CFG.num_workers , pin_memory=True)

predictions1 = inference(model1, states1, test_loader_1, device)
predictions2 = inference(model2, states2, test_loader_2, device)

print('predictions1.shape',predictions1.shape)
print('predictions2.shape',predictions2.shape)

# weighted
a = [0.5, 0.5, 0.5, 1.0, 1.0, 0.5, 0.5, 0.4, 0.4, 0.4, 0.5]
b = [0.5, 0.5, 0.5, 0.0, 0.0, 0.5, 0.5, 0.6, 0.6, 0.6, 0.5]

predictions = a*predictions1 +b*predictions2

  0%|          | 0/56 [00:00<?, ?it/s]

  0%|          | 0/56 [00:00<?, ?it/s]

predictions1.shape (3582, 11)
predictions2.shape (3582, 11)


In [13]:
target_cols = test.iloc[:, 1:12].columns.tolist()
test[target_cols] = predictions
test[['StudyInstanceUID'] + target_cols].to_csv('submission.csv', index=False)
test.head()

Unnamed: 0,StudyInstanceUID,ETT - Abnormal,ETT - Borderline,ETT - Normal,NGT - Abnormal,NGT - Borderline,NGT - Incompletely Imaged,NGT - Normal,CVC - Abnormal,CVC - Borderline,CVC - Normal,Swan Ganz Catheter Present
0,1.2.826.0.1.3680043.8.498.46923145579096002617...,0.037092,0.422785,0.467188,0.000734,0.002099,0.027942,0.939476,0.021512,0.121197,0.966528,0.994875
1,1.2.826.0.1.3680043.8.498.84006870182611080091...,9.8e-05,0.000299,0.000297,3.7e-05,5.7e-05,0.000276,7.1e-05,0.005022,0.011402,0.993929,6.1e-05
2,1.2.826.0.1.3680043.8.498.12219033294413119947...,0.000185,0.000432,0.000419,0.000167,0.000235,0.000368,0.000129,0.010118,0.348131,0.706719,0.00017
3,1.2.826.0.1.3680043.8.498.84994474380235968109...,0.005447,0.057792,0.058753,0.014815,0.01446,0.953185,0.02563,0.046685,0.152435,0.907846,0.035114
4,1.2.826.0.1.3680043.8.498.35798987793805669662...,0.000303,0.000874,0.000718,0.000278,0.000374,0.00043,0.000477,0.009369,0.27078,0.907916,0.000184
