# SUMMARY

This notebook reproduces my submission that scores **0.971** on private LB and reaches the **71st** place. The notebook implements ensemble of 5+2 CNN models using pretrained weights saved as Kaggle datasets.

![ensemble](https://i.postimg.cc/c4cPcXng/ranzcr.png)
- A detailed summary of my solution is published [in this discussion topic](https://www.kaggle.com/c/ranzcr-clip-catheter-line-classification/discussion/226664)
- Complete code including base model training is available in [this GitHub repo](https://github.com/kozodoi/Kaggle_RANZCR_Challenge)

# PARAMETERS

In [None]:
####### NOTEBOOK CONFIGURATION

class CFG:
    
    # environment
    device      = 'GPU'  # device ['CPU', 'GPU']
    cpu_workers = 2      # no. cores
    debug       = True   # debug runs inference on a single batch
    seed        = 13353  # random state

    # inference
    batch_size  = 50     # no. images per batch
    num_tta     = 2      # no. TTA flips (between 1 and 8)
    num_folds   = 3      # no. folds per model (between 1 and 5)
    fold_idx    = False  # load weights from one fold [False, fold index]
      
    # blending
    fold_blend  = 'pmean'  # how to blend folds  ['amean', 'median', 'gmean', 'pmean', 'rmean']
    model_blend = 'pmean'  # how to blend models ['amean', 'median', 'gmean', 'pmean', 'rmean']
    power       = 1/11     # power parameter for pmean
    w_public    = 0.25     # weight of public models in the final blend   

    # stacking
    lgb_folds       = 5      # no. folds for stacking
    label_features  = False  # use only label-specific features
    sort_targets    = True   # sort targets by AUC
    pred_as_feature = True   # include class predictions as features for other classes
    lgb_stop_rounds = 200    # no. early stopping rounds
    lgb_params      = {'objective':         'binary',
                       'metrics':           'auc',
                       'n_estimators':      10000,
                       'learning_rate':     0.01,
                       'num_leaves':        8,
                       'max_depth':         5,
                       'min_child_samples': 20,
                       'subsample':         0.3,
                       'colsample_bytree':  0.5,
                       'reg_alpha':         0.1,
                       'reg_lambda':        0.1,
                       'silent':            True,
                       'verbosity':         -1,
                       'n_jobs' :           -1,
                       'random_state':      13353}

    # paths
    data_path = '/kaggle/input/ranzcr-clip-catheter-line-classification/'
    
    # list of models
    models = ['/kaggle/input/ranzcr-v12/', 
              '/kaggle/input/ranzcr-v15-pub/', 
              '/kaggle/input/ranzcr-v17-pub/', 
              '/kaggle/input/ranzcr-v13-pub/', 
              '/kaggle/input/ranzcr-v14-pub/']

In [None]:
####### CONVERT CONFIGURATION

CFG = dict(vars(CFG))
for key in ['__dict__', '__doc__', '__module__', '__weakref__']:
    del CFG[key]

During training, I save a `pickle` configuration file for each base model to make it convenient to import their weights, meta-parameters and image sizes during inference. Below I load all model configurations in a list.

In [None]:
####### LOAD MODEL CONFIGURATIONS

import pickle

CFGs = []

for model in CFG['models']:
    model_cfg = pickle.load(open(model + 'configuration.pkl', 'rb'))
    CFGs.append(model_cfg)
    
print('Numer of models:', len(CFGs))

# PREPARATIONS

In [None]:
####### PACKAGES

import numpy as np
import pandas as pd
pd.set_option('display.max_columns', 100)

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torch.utils.data.sampler import RandomSampler, SequentialSampler

import albumentations as A
from albumentations.pytorch import ToTensorV2

from PIL import Image, ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True
import cv2

from scipy.stats import rankdata, gmean
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import KFold, StratifiedKFold, GroupKFold

import lightgbm as lgb

from tqdm import tqdm

import random
import time
import sys
import os

import gc

import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

import warnings
warnings.filterwarnings('ignore')

sys.path.append('../input/timm-pytorch-image-models/pytorch-image-models-master')
import timm

In [None]:
####### TRAINING DEVICE
    
if CFG['device'] == 'GPU':
    print('Training on GPU...')
    device = torch.device('cuda:0')

if CFG['device'] == 'CPU':
    print('Training on CPU...')
    device = torch.device('cpu') 

The function `compute_blend()` implements simple blends (arithmetic, geometric, power and rank mean).

In [None]:
####### UTILITIES

# competition metric
def get_score(y_true, y_pred):
    scores = []
    for i in range(y_true.shape[1]):
        score = roc_auc_score(y_true[:,i], y_pred[:,i])
        scores.append(score)
    avg_score = np.mean(scores)
    return avg_score, scores

# simple ensembles
def compute_blend(df, preds, blend, CFG, weights = None):
    
    if weights is None:
        weights = np.ones(len(preds)) / len(preds)
        
    if blend == 'amean':
        out = np.sum(df[preds].values * weights, axis = 1)
    elif blend == 'median':
        out = df[preds].median(axis = 1)
    elif blend == 'gmean':
        out = np.prod(np.power(df[preds].values, weights), axis = 1)
    elif blend == 'pmean':
        out = np.sum(np.power(df[preds].values, CFG['power']) * weights, axis = 1) ** (1 / CFG['power'])
    elif blend == 'rmean':
        out = np.sum(df[preds].rank(pct = True).values * weights, axis = 1)
    return out

# randomness
def seed_everything(seed):
    os.environ['PYTHONHASHSEED'] = str(seed)
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    print('setting random seed to {}...'.format(seed))
    
seed_everything(CFG['seed'])

# DATA PREP

To save GPU quota, I was subsetting the test images to a single batch when commiting the notebook. During submission, the condition `len(df) == 3582` is not fulfilled and the code is run on the full test set.

In [None]:
####### DATA PREPARATION

# import data
df = pd.read_csv(CFG['data_path'] + 'sample_submission.csv')

# num classes
CFG['targets'] = ['ETT - Abnormal', 
                  'ETT - Borderline', 
                  'ETT - Normal', 
                  'NGT - Abnormal', 
                  'NGT - Borderline', 
                  'NGT - Incompletely Imaged', 
                  'NGT - Normal', 
                  'CVC - Abnormal',
                  'CVC - Borderline', 
                  'CVC - Normal', 
                  'Swan Ganz Catheter Present']
CFG['num_classes'] = len(CFG['targets'])

# debug mode
if CFG['debug'] and len(df) == 3582:
    print('Subsetting data in the debug mode...')
    df = df.head(CFG['batch_size'])
    
print(df.shape)

To evaluate OOF performance, I am averaging AUCs on validation folds instead of computing OOF AUC on the full training data. Since AUC is a ranking indicator, this way to evaluate models provided a more robust CV/LB correlation.

In [None]:
####### CHECK OOF PERFORMANCE

# load preds
for m in CFG['models']:

    tmp_train_preds         = pd.read_csv(m + '/oof.csv')
    tmp_train_preds.columns = ['StudyInstanceUID'] + CFG['targets'] + ['PatientID', 'fold'] + [m + ' ' + c for c in CFG['targets']]

    if m == CFG['models'][0]:
        train_preds = tmp_train_preds        
    else:
        train_preds = train_preds.merge(tmp_train_preds[['StudyInstanceUID'] + [m + ' ' + c for c in CFG['targets']]], how = 'left', on = 'StudyInstanceUID')

# sort models by performance
weights = []
for model_idx, m in enumerate(CFG['models']):
    score = 0
    for fold_idx in range(5):
        tmp_train_preds = train_preds.loc[train_preds['fold'] == fold_idx]
        score += get_score(tmp_train_preds[CFG['targets']].values, tmp_train_preds[[m + ' ' + c for c in CFG['targets']]].values)[0] / 5
    weights.append(score)
sorted_ids     = list(np.argsort(np.array(weights)))
sorted_weights = [weights[i] for i in sorted_ids]
CFG['models']  = [CFG['models'][i] for i in sorted_ids]
CFGs           = [CFGs[i] for i in sorted_ids]

# display performance 
print('-' * 45)
print('{:<5}{:<33}{:>5}'.format('ID', 'Model', 'AUC'))
print('-' * 45)
for model_idx, m in enumerate(CFG['models']):
    print('{:<5}{:<33}{:.4f}'.format(model_idx + 1, m, sorted_weights[model_idx]))
print('-' * 45)

Below I evaluate different ensembles on OOF predictions. Unweighted power mean with `power = 1/11` worked best in my experiments.

In [None]:
####### CHECK BLEND PERFORMANCE

# compute predcitions
for c in CFG['targets']:   
    
    class_preds = train_preds.filter(like = 'kaggle').filter(like = c).columns
    
    for blend in ['amean', 'median', 'gmean', 'pmean', 'rmean']:
        train_preds[blend + ' ' + c] = compute_blend(train_preds, class_preds, blend, CFG)
        
    for blend in ['amean', 'median', 'gmean', 'pmean', 'rmean']:
        train_preds['w' + blend + ' ' + c] = compute_blend(train_preds, class_preds, blend, CFG, weights = np.array(sorted_weights))
    
# compute performance
print('-' * 18)
print('{:<10}{:>5}'.format('Blend', 'AUC'))
print('-' * 18)
for blend in ['amean', 'median', 'gmean', 'pmean', 'rmean']:
    score = 0
    for fold_idx in range(5):
        tmp_train_preds = train_preds.loc[train_preds['fold'] == fold_idx]
        score += get_score(tmp_train_preds[CFG['targets']].values, tmp_train_preds[[blend + ' ' + c for c in CFG['targets']]].values)[0] / 5
    print('{:<10}{:>5.4f}'.format(blend, score))    
print('-' * 18)
for blend in ['amean', 'median', 'gmean', 'pmean', 'rmean']:
    score = 0
    for fold_idx in range(5):
        tmp_train_preds = train_preds.loc[train_preds['fold'] == fold_idx]
        score += get_score(tmp_train_preds[CFG['targets']].values, tmp_train_preds[['w' + blend + ' ' + c for c in CFG['targets']]].values)[0] / 5
    print('{:<10}{:>5.4f}'.format('w' + blend, score))    
print('-' * 18)

# IMAGE PROCESSING

In [None]:
####### DATASET

def get_dataset(CFG):

    class ImageData(Dataset):

        # initialization
        def __init__(self, 
                     df, 
                     path, 
                     transform = None, 
                     labeled   = False,
                     indexed   = False):
            self.df        = df
            self.path      = path
            self.transform = transform
            self.labeled   = labeled
            self.indexed   = indexed

        # length
        def __len__(self):
            return len(self.df)

        # get item  
        def __getitem__(self, idx):

            # import
            path  = os.path.join(self.path, self.df.iloc[idx]['StudyInstanceUID'] + '.jpg')
            image = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
            if image is None:
                raise FileNotFoundError(path)
        
            # crop black borders
            '''
            Borrowed from https://www.kaggle.com/c/ranzcr-clip-catheter-line-classification/discussion/224146
            '''
            mask  = image > 0
            image = image[np.ix_(mask.any(1), mask.any(0))]

            # convert to RGB
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            
            # augmentations
            if self.transform is not None:
                image = self.transform(image = image)['image']
                
            # transform 
            if CFG['normalize'] == 'public':
                image = image.astype(np.float32)
                image = image.transpose(2, 0, 1)
                image = torch.tensor(image).float()

            # output
            if self.labeled:
                label = torch.tensor(self.df.iloc[idx][CFG['targets']]).float()
                if self.indexed:
                    idx = torch.tensor(idx)
                    return idx, image, label
                else: 
                    return image, label
            else:
                return image
            
    return ImageData

In [None]:
####### AUGMENTATIONS

def get_augs(CFG, image_size = None):

    if CFG['normalize'] != 'public':
        test_augs = A.Compose([A.Resize(height = image_size, 
                                        width  = image_size),
                               A.Normalize(mean = CFG['pixel_mean'],
                                           std  = CFG['pixels_std']),
                               ToTensorV2()])
    else:
        test_augs = A.Compose([A.Resize(height = image_size, 
                                        width  = image_size),
                               A.Normalize()])

    # output
    return test_augs

In [None]:
####### TTA HELPER FUNCTION

'''
Based on https://github.com/haqishen/SIIM-ISIC-Melanoma-Classification-1st-Place-Solution
'''
def get_tta_flips(img, i):

    if i >= 4:
        img = img.transpose(2, 3)
    if i % 4 == 0:
        return img
    elif i % 4 == 1:
        return img.flip(3)
    elif i % 4 == 2:
        return img.flip(2)
    elif i % 4 == 3:
        return img.flip(3).flip(2)

# MODEL PREP

All models were imported from the `timm` library. I use [this dataset](https://www.kaggle.com/kozodoi/timm-pytorch-image-models) with the most recent version of `timm`.

In [None]:
####### MODEL ARCHITECTURE

def get_model(CFG, device, num_classes):
    
    if CFG['weights'] != 'public':

        model = timm.create_model(model_name = CFG['backbone'], 
                                  pretrained = False,
                                  in_chans   = CFG['channels'])

        if 'efficient' in CFG['backbone']:
            model.classifier = nn.Linear(model.classifier.in_features, num_classes)
        else:
            model.fc = nn.Linear(model.fc.in_features, num_classes)

    else:
        
        class CustomModel(nn.Module):
            
            def __init__(self, model_name = 'resnet200d', out_dim = 11, pretrained = False):
                super().__init__()
                self.model             = timm.create_model(model_name, pretrained=False)
                n_features             = self.model.fc.in_features
                self.model.global_pool = nn.Identity()
                self.model.fc          = nn.Identity()
                self.pooling           = nn.AdaptiveAvgPool2d(1)
                self.fc                = nn.Linear(n_features, out_dim)

            def forward(self, x):
                bs              = x.size(0)
                features        = self.model(x)
                pooled_features = self.pooling(features).view(bs, -1)
                output          = self.fc(pooled_features)
                return output

        model = CustomModel(CFG['backbone'], num_classes, True)
    
        
    return model

# INFERENCE

The inference loop goes through 5 CNN models and stacks their predictions in a pandas dataframe. For each model, I also loop through the specified number of folds and iterate fold indices between 1 and 5.

In [None]:
########## INFERENCE FOR NORMAL MODELS

# timer
cv_start = time.time()
gc.collect()

# counter
all_counter  = 0
fold_counter = 0 if not CFG['fold_idx'] else CFG['fold_idx']

# placeholder
all_cnn_preds = None

# loop through models
for model_idx in range(len(CFG['models'])):
           
    # data prep
    ImageData    = get_dataset(CFGs[model_idx])
    test_dataset = ImageData(df        = df, 
                             path      = CFG['data_path'] + 'test/',
                             transform = get_augs(CFGs[model_idx], image_size = CFGs[model_idx]['image_size']),
                             labeled   = False,
                             indexed   = False)
    test_loader = DataLoader(test_dataset, 
                             batch_size  = CFG['batch_size'], 
                             shuffle     = False, 
                             num_workers = CFG['cpu_workers'],
                             pin_memory  = True)
    
    # loop thgough folds
    for fold_idx in tqdm(range(CFG['num_folds'])):
           
        # model prep
        model = get_model(CFGs[model_idx], device = device, num_classes = CFG['num_classes'])
        model = model.to(device)
        model.load_state_dict(torch.load(CFG['models'][model_idx] + 'weights_fold{}.pth'.format(fold_counter),map_location = device))
        model.eval()

        # probs placeholder
        PROBS = []

        # loop through batches 
        with torch.no_grad():
            for batch_idx, inputs in enumerate(test_loader):

                # extract inputs 
                inputs = inputs.to(device)
                
                # preds placeholders
                probs = torch.zeros((inputs.shape[0], CFG['num_classes']), device = device)

                # inference with TTA
                for tta_idx in range(CFG['num_tta']): 
                    preds  = model(get_tta_flips(inputs, tta_idx))
                    probs += preds.sigmoid()

                # store predictions
                PROBS.append(probs.detach().cpu() / CFG['num_tta'])
                
        # store predictions
        cnn_preds     = pd.DataFrame(torch.cat(PROBS).numpy(), columns = [CFG['models'][model_idx] + str(fold_idx) + '/' + c for c in CFG['targets']])
        all_cnn_preds = pd.concat([all_cnn_preds, cnn_preds], axis = 1)

        # update counters
        all_counter += 1
        if not CFG['fold_idx']:
            fold_counter += 1
            if fold_counter == CFG['num_folds']:
                fold_counter = 0

        # clear memory
        del model, inputs, preds, probs, PROBS
        gc.collect()
        
    # clear memory
    del test_loader, test_dataset
    gc.collect()
        
# print performance
print('Finished {} preds x {} TTA in {:.2f} hours'.format(all_counter, CFG['num_tta'], (time.time() - cv_start) / 3600))

In [None]:
####### BLEND FOLD PREDICTIONS

print('Blending fold predictions with: ' + CFG['fold_blend'])

for m in CFG['models']:
    for c in CFG['targets']:
        class_preds = all_cnn_preds.filter(like = m).filter(like = c).columns
        all_cnn_preds[m + c] = compute_blend(all_cnn_preds, class_preds, CFG['fold_blend'], CFG)
        all_cnn_preds.drop(class_preds, axis = 1, inplace = True)
           
all_cnn_preds.head()

# STACKING

First, I load OOF predictions from CNNs and make sure train / test dataframes have the same format.

In [None]:
####### PREPARE OOF PREDS

for m in CFG['models']:

    tmp_train_preds         = pd.read_csv(m + '/oof.csv')
    tmp_train_preds.columns = ['StudyInstanceUID'] + CFG['targets'] + ['PatientID', 'fold'] + [m + '' + c for c in CFG['targets']]

    if m == CFG['models'][0]:
        train_preds = tmp_train_preds        
    else:
        train_preds = train_preds.merge(tmp_train_preds[['StudyInstanceUID'] + [m + '' + c for c in CFG['targets']]], how = 'left', on = 'StudyInstanceUID')

train_preds.head()

In [None]:
####### PREPARE TEST PREDS

test_preds = all_cnn_preds.copy()
test_preds = pd.concat([df['StudyInstanceUID'], test_preds], axis = 1)    
test_preds.head()

In [None]:
####### TRANSFORM DATA

X      = train_preds.copy()
X_test = test_preds.copy()

drop_features = ['StudyInstanceUID', 'PatientID', 'fold'] + CFG['targets']
features      = [f for f in X.columns if f not in drop_features]
print(len(features), 'features')
display(features)

Stacking is performed on OOFs using [group stratified 5-fold partitiioning](https://www.kaggle.com/underwearfitting/how-to-properly-split-folds) shared by [@underwearfitting](https://www.kaggle.com/underwearfitting). I am using the same partitinoing to train base models as well.

In [None]:
########## PARTITIONING

folds = pd.read_csv('/kaggle/input/how-to-properly-split-folds/train_folds.csv')
del X['fold']
X = X.merge(folds[['StudyInstanceUID', 'fold']], how = 'left', on = 'StudyInstanceUID')

Sorting targets by AUC helps to improve stacking performance. When starting with easier labels, meta-models for more difficult labels have more features (that include OOF meta predictions of previous labels).

In [None]:
####### SORT TARGETS BY AUC

if CFG['sort_targets']:

    sorted_targets = ['Swan Ganz Catheter Present',
                      'ETT - Normal', 
                      'ETT - Abnormal', 
                      'ETT - Borderline', 
                      'NGT - Abnormal', 
                      'NGT - Normal', 
                      'NGT - Incompletely Imaged', 
                      'NGT - Borderline', 
                      'CVC - Abnormal',
                      'CVC - Normal',
                      'CVC - Borderline']

For each label, I do the following:
- training a separate stacking model on 5 folds
- comparing performance of stacking with the best simple blend (power mean)
- saving predictions of the method with the highest OOF AUC
- saving meta predictions as a feature for the stacking model for the next label

In [None]:
########## STACKING

# placeholders
cnn_oof = np.zeros((len(X),      CFG['num_classes']))
lgb_oof = np.zeros((len(X),      CFG['num_classes']))
lgb_tst = np.zeros((len(X_test), CFG['lgb_folds'], CFG['num_classes']))
all_lgb_preds = None

# modeling loop
cv_start = time.time()
print('-' * 45)
print('{:<28}{:<7}{:>5}'.format('Label', 'Model', 'AUC'))
print('-' * 45)
for label in sorted_targets:

    # extract label
    y = X[label]

    # extract features
    label_features = [f for f in features if label in f] if CFG['label_features'] else features
    
    # placeholders
    lgb_auc = 0
    cnn_auc = 0

    # cross-validation
    for fold in range(CFG['lgb_folds']):
        
        # extract index
        trn_idx = X.loc[X['fold'] != fold].index
        val_idx = X.loc[X['fold'] == fold].index
        
        # extract samples
        X_train, y_train = X.iloc[trn_idx][label_features], y.iloc[trn_idx]
        X_valid, y_valid = X.iloc[val_idx][label_features], y.iloc[val_idx]
        X_test_label     = X_test[label_features]

        # modeling
        clf = lgb.LGBMClassifier(**CFG['lgb_params']) 
        clf = clf.fit(X_train, y_train, 
                      eval_set              = [(X_valid, y_valid)],
                      early_stopping_rounds = CFG['lgb_stop_rounds'],
                      verbose               = False)

        # prediction
        cnn_oof[val_idx, CFG['targets'].index(label)] = compute_blend(X_valid, list(X_valid.filter(like = label).columns), CFG['fold_blend'], CFG)
        lgb_oof[val_idx, CFG['targets'].index(label)] = clf.predict_proba(X_valid)[:,      1]
        lgb_tst[:, fold, CFG['targets'].index(label)] = clf.predict_proba(X_test_label)[:, 1]
        
        # performance
        cnn_auc += roc_auc_score(y_valid ,cnn_oof[val_idx, CFG['targets'].index(label)]) / CFG['lgb_folds']
        lgb_auc += roc_auc_score(y_valid, lgb_oof[val_idx, CFG['targets'].index(label)]) / CFG['lgb_folds']

    # print label performance
    print('{:<29}{:<7}{:>5.4f}'.format(label, 'CNN', cnn_auc))
    print('{:<29}{:<7}{:>5.4f}'.format(label, 'LGB', lgb_auc))
    print('-' * 45)    
    
    # replace LGB pred if CNN pred is better
    if cnn_auc >= lgb_auc:
        for fold in range(CFG['lgb_folds']):

            # extract data
            val_idx = X.loc[X['fold'] == fold].index
            X_valid, y_valid = X.iloc[val_idx][label_features], y.iloc[val_idx]

            # replace predcitions
            lgb_oof[val_idx, CFG['targets'].index(label)] = compute_blend(X_valid,      list(X_valid.filter(like      = label).columns), CFG['fold_blend'], CFG)
            lgb_tst[:, fold, CFG['targets'].index(label)] = compute_blend(X_test_label, list(X_test_label.filter(like = label).columns), CFG['fold_blend'], CFG)
            
    # store predictions
    for fold in range(CFG['lgb_folds']):
        lgb_preds     = pd.DataFrame(lgb_tst[:, fold, CFG['targets'].index(label)], columns = [str(fold) + '/' + label])
        all_lgb_preds = pd.concat([all_lgb_preds, lgb_preds], axis = 1)
    
    # add prediction as feature to next model
    if CFG['pred_as_feature']:
        X['LGB '      + label] = lgb_oof[:, CFG['targets'].index(label)]
        X_test['LGB ' + label] = np.mean(lgb_tst[:, :, CFG['targets'].index(label)], axis = 1)
        features.append('LGB ' + label)
        
# print overall performance
score_cnn = 0
score_lgb = 0
for fold in range(CFG['lgb_folds']):
    val_idx    = X.loc[X['fold'] == fold].index
    score_cnn += get_score(X.iloc[val_idx][CFG['targets']].values, cnn_oof[val_idx, :])[0] / CFG['lgb_folds']
    score_lgb += get_score(X.iloc[val_idx][CFG['targets']].values, lgb_oof[val_idx, :])[0] / CFG['lgb_folds']
print('{:<29}{:<7}{:>5.4f}'.format('OVERALL', 'CNN', score_cnn))
print('{:<29}{:<7}{:>5.4f}'.format('OVERALL', 'LGB', score_lgb))
print('-' * 45)
print('Finished in {:.2f} minutes'.format((time.time() - cv_start) / 60))

# clear memory
del cnn_oof, lgb_tst, lgb_oof, clf, X_train, X_valid, X_test, X_test_label, y_train, y_valid
del features, label_features, trn_idx, val_idx, folds, train_preds, test_preds
gc.collect()

In [None]:
####### BLEND FOLD PREDICTIONS

print('Blending fold predictions with: ' + CFG['fold_blend'])

for c in CFG['targets']:
    class_preds      = all_lgb_preds.filter(like = c).columns
    all_lgb_preds[c] = compute_blend(all_lgb_preds, class_preds, CFG['fold_blend'], CFG)
    all_lgb_preds.drop(class_preds, axis = 1, inplace = True)
           
all_lgb_preds.head()

# PUBLIC MODELS INFERENCE
In addition to 5 CNN models used in the stacking ensemble, I add 2 public models:
- [SeResNet152D](https://www.kaggle.com/ammarali32/seresnet152d-cv9615) by [@ammarali32](https://www.kaggle.com/ammarali32)
- [ResNet200D](https://www.kaggle.com/ammarali32/resnet200d-public) by [@ammarali32](https://www.kaggle.com/ammarali32)

In [None]:
####### INFERENCE FOR PUBLIC MODELS 

if CFG['w_public'] > 0:

    gc.collect()

    BATCH_SIZE = 96
    IMAGE_SIZE = 640
    TEST_PATH               = '../input/ranzcr-clip-catheter-line-classification/test'
    MODEL_PATH_resnet200d   = '../input/resnet200d-public/resnet200d_320_CV9632.pth'
    MODEL_PATH_seresnet152d = '../input/seresnet152d-cv9615/seresnet152d_320_CV96.15.pth'


    class TestDataset(Dataset):
        def __init__(self, df, transform=None):
            self.df = df
            self.file_names = df['StudyInstanceUID'].values
            self.transform = transform

        def __len__(self):
            return len(self.df)

        def __getitem__(self, idx):
            file_name = self.file_names[idx]
            file_path = f'{TEST_PATH}/{file_name}.jpg'   
            image = cv2.imread(file_path, cv2.IMREAD_GRAYSCALE)
            mask  = image > 0
            image = image[np.ix_(mask.any(1), mask.any(0))]
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            if self.transform:
                augmented = self.transform(image=image)
                image = augmented['image']
            return image


    def get_transforms(CFG):
        return A.Compose([A.Resize(IMAGE_SIZE, IMAGE_SIZE),
                          A.Normalize(),
                          ToTensorV2()])


    class ResNet200D(nn.Module):
        def __init__(self, model_name = 'resnet200d'):
            super().__init__()
            self.model = timm.create_model(model_name, pretrained=False)
            n_features = self.model.fc.in_features
            self.model.global_pool = nn.Identity()
            self.model.fc = nn.Identity()
            self.pooling = nn.AdaptiveAvgPool2d(1)
            self.fc = nn.Linear(n_features, 11)

        def forward(self, x):
            bs = x.size(0)
            features = self.model(x)
            pooled_features = self.pooling(features).view(bs, -1)
            output = self.fc(pooled_features)
            return output


    class SeResNet152D(nn.Module):
        def __init__(self, model_name = 'seresnet152d'):
            super().__init__()
            self.model = timm.create_model(model_name, pretrained=False)
            n_features = self.model.fc.in_features
            self.model.global_pool = nn.Identity()
            self.model.fc = nn.Identity()
            self.pooling = nn.AdaptiveAvgPool2d(1)
            self.fc = nn.Linear(n_features, 11)

        def forward(self, x):
            bs = x.size(0)
            features = self.model(x)
            pooled_features = self.pooling(features).view(bs, -1)
            output = self.fc(pooled_features)
            return output


    def inference(models, test_loader, device):
        tk0 = tqdm(enumerate(test_loader), total = len(test_loader))
        probs = []
        for i, (images) in tk0:
            images = images.to(device)
            avg_preds = []
            for model in models:
                with torch.no_grad():
                    y_preds1 = model(images)
                    y_preds2 = model(images.flip(-1))
                y_preds = (y_preds1.sigmoid().to('cpu').numpy() + y_preds2.sigmoid().to('cpu').numpy()) / 2
                avg_preds.append(y_preds)
            avg_preds = np.mean(avg_preds, axis = 0)
            probs.append(avg_preds)
        probs = np.concatenate(probs)
        return probs


    models200D = []
    model = ResNet200D()
    model.load_state_dict(torch.load(MODEL_PATH_resnet200d)['model'])
    model.eval()
    model.to(device)
    models200D.append(model)
    del model

    models152D = []
    model = SeResNet152D()
    model.load_state_dict(torch.load(MODEL_PATH_seresnet152d)['model'])
    model.eval()
    model.to(device)
    models152D.append(model)
    del model
    

    test_dataset    = TestDataset(df, transform = get_transforms(CFG))
    test_loader     = DataLoader(test_dataset, batch_size = BATCH_SIZE, shuffle = False, num_workers = CFG['cpu_workers'], pin_memory = True)
    predictions200d = inference(models200D, test_loader, device)
    predictions152d = inference(models152D, test_loader, device)
    
    target_cols = df.iloc[:, 1:12].columns.tolist()
    
    predictions200d = pd.DataFrame(predictions200d, columns = ['200d/' + c for c in target_cols])
    predictions152d = pd.DataFrame(predictions152d, columns = ['152d/' + c for c in target_cols])
    predictions     = pd.concat([predictions200d, predictions152d], axis = 1)
    
    df_pub = predictions.copy()
    display(df_pub.head())

In [None]:
####### BLEND PUBLIC MODELS

if CFG['w_public'] == 0:
    
    df_pub = all_lgb_preds.copy()
    
else:

    for c in CFG['targets']:
        class_preds = df_pub.filter(like = c).columns
        df_pub[c] = compute_blend(df_pub, class_preds, CFG['model_blend'], CFG, weights = np.array([2/3, 1/3]))
        df_pub.drop(class_preds, axis = 1, inplace = True)
        
df_pub.head()

# SUBMISSION

In [None]:
####### BLEND MY AND PUBLIC MODELS

all_preds = all_lgb_preds.copy()
all_preds.columns = ['my/'     + c for c in all_preds.columns]
df_pub.columns    = ['public/' + c for c in df_pub.columns]

preds = pd.concat([all_preds, df_pub], axis = 1)

for c in CFG['targets']:
    class_preds = preds.filter(like = c).columns
    preds[c] = compute_blend(preds, class_preds, CFG['model_blend'], CFG, weights = np.array([1 - CFG['w_public'], CFG['w_public']]))
    preds.drop(class_preds, axis = 1, inplace = True)

preds.head()

Before saving `submisssion.csv`, I check that all model/fold combinations have succeffuly completed inference. This helps to catch memory errors that sometimes do not stop the notebook from running and can produce poor LB performance because some models have not been run.

In [None]:
####### SUBMISSION FILE

if all_counter == len(CFG['models'] * CFG['num_folds']): 
        
    for c in CFG['targets']:
        df[c] = preds[c].rank(pct = True)
        
    df.to_csv('submission.csv', index = False)
    display(df.head())