## This notebook trains:
- PyTorch Resnet200d
- The folds used is based on @abhishek training kernel [here](https://www.kaggle.com/abhishek/ranzcr-tez-training-efficientnet-5/data)
- This notebook is based on kernel made by @yasufuminakama ([check it out here](https://www.kaggle.com/yasufuminakama/ranzcr-resnext50-32x4d-starter-training))

### Hope this is useful!

In [None]:
package_paths = ['../input/pytorch-image-models/pytorch-image-models-master']
OUTPUT_DIR = './'

import sys;
for pth in package_paths:
    sys.path.append(pth)

In [None]:
from glob import glob
from sklearn.model_selection import GroupKFold, StratifiedKFold
import cv2
from skimage import io
import torch
from torch import nn
import os
from contextlib import contextmanager
from datetime import datetime
import time
import math
import random
import cv2
import torchvision
from torchvision import transforms
import pandas as pd
import numpy as np
from tqdm import tqdm

import matplotlib.pyplot as plt
from torch.optim import Adam, SGD
from torch.utils.data import Dataset,DataLoader
from torch.utils.data.sampler import SequentialSampler, RandomSampler
from torch.cuda.amp import autocast, GradScaler
from torch.nn.modules.loss import _WeightedLoss
import torch.nn.functional as F
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts, CosineAnnealingLR, ReduceLROnPlateau
from torch.cuda.amp import autocast, GradScaler

from albumentations import (
    Compose, OneOf, Normalize, Resize, RandomResizedCrop, RandomCrop, HorizontalFlip, VerticalFlip, 
    RandomBrightness, RandomContrast, RandomBrightnessContrast, Rotate, ShiftScaleRotate, Cutout, 
    IAAAdditiveGaussianNoise, Transpose, ToGray
    )
from albumentations.pytorch import ToTensorV2
from albumentations import ImageOnlyTransform

import timm

import sklearn
import warnings
import joblib
from sklearn.metrics import roc_auc_score, log_loss
from sklearn import metrics
import warnings
import cv2
import pydicom
#from efficientnet_pytorch import EfficientNet
from scipy.ndimage.interpolation import zoom

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Configuration

In [None]:
CFG = {
    'output_path': './',
    'train_path': '../input/ranzcr-clip-catheter-line-classification/train',
    'print_freq': 100,
    'fold_num': 4,
    'seed': 2003,
    'model_arch': 'resnet200d',
    'img_size': 640,
    'epochs': 1,
    'scheduler': 'ReduceLROnPlateau', #['ReduceLROnPlateau', 'CosineAnnealingLR', 'CosineAnnealingWarmRestarts']
    'factor': 0.2, # ReduceLROnPlateau
    'patience': 4, # ReduceLROnPlateau
    'eps': 1e-6, # ReduceLROnPlateau
    'T_max': 6, # CosineAnnealingLR
    'T_0': 6, # CosineAnnealingWarmRestarts
    'gradient_accumulation_steps': 1,
    'max_grad_norm': 1000,
    'batch_size': 8,
    'lr': 1e-4,
    'min_lr': 1e-4,
    'weight_decay': 1e-6,
    'num_workers': 4,
    'accum_iter': 2,
    'verbose_step': 1,
    'device': 'cuda:0',
    'target_size': 11,
    'target_cols': ['ETT - Abnormal', 'ETT - Borderline', 'ETT - Normal',
                    'NGT - Abnormal', 'NGT - Borderline', 'NGT - Incompletely Imaged', 'NGT - Normal', 
                    'CVC - Abnormal', 'CVC - Borderline', 'CVC - Normal',
                    'Swan Ganz Catheter Present'],
    'train': True,
    'trn_fold': [0] # Train only fold-0
}

# CV

In [None]:
train = pd.read_csv('../input/ranzcr-clip-catheter-line-classification/train.csv')
# folds = pd.read_csv('../input/ranzcr-folds/train_folds.csv')
test = pd.read_csv('../input/ranzcr-clip-catheter-line-classification/sample_submission.csv')

In [None]:
folds = train.copy()
Fold = GroupKFold(n_splits=CFG['fold_num'])
groups = folds['PatientID'].values
for n, (train_index, val_index) in enumerate(Fold.split(folds, folds[CFG['target_cols']], groups)):
    folds.loc[val_index, 'fold'] = int(n)
folds['fold'] = folds['fold'].astype(int)

In [None]:
'''
folds.columns = ['StudyInstanceUID', 'ETT - Abnormal', 'ETT - Borderline',
       'ETT - Normal', 'NGT - Abnormal', 'NGT - Borderline',
       'NGT - Incompletely Imaged', 'NGT - Normal', 'CVC - Abnormal',
       'CVC - Borderline', 'CVC - Normal', 'Swan Ganz Catheter Present',
       'PatientID', 'fold']
'''

# Utils

In [None]:
def get_score(y_true, y_pred):
    scores = []
    for i in range(y_true.shape[1]):
        score = roc_auc_score(y_true[:,i], y_pred[:,i])
        scores.append(score)
    avg_score = np.mean(scores)
    return avg_score, scores


@contextmanager
def timer(name):
    t0 = time.time()
    LOGGER.info(f'[{name}] start')
    yield
    LOGGER.info(f'[{name}] done in {time.time() - t0:.0f} s.')


def init_logger(log_file=CFG['output_path']+'train.log'):
    from logging import getLogger, INFO, FileHandler,  Formatter,  StreamHandler
    logger = getLogger(__name__)
    logger.setLevel(INFO)
    handler1 = StreamHandler()
    handler1.setFormatter(Formatter("%(message)s"))
    handler2 = FileHandler(filename=log_file)
    handler2.setFormatter(Formatter("%(message)s"))
    logger.addHandler(handler1)
    logger.addHandler(handler2)
    return logger

LOGGER = init_logger()


def seed_torch(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True

seed_torch(seed=CFG['seed'])

# Dataset

In [None]:
class TrainDataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df
        self.file_names = df['StudyInstanceUID'].values
        self.labels = df[CFG['target_cols']].values
        self.transform = transform
    
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        file_name = self.file_names[idx]
        file_path = f'{CFG["train_path"]}/{file_name}.jpg'
        image = cv2.imread(file_path)
        image = image[:,:,::-1]
        #image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        if self.transform:
            augmented = self.transform(image=image)
            image = augmented['image']
        label = torch.tensor(self.labels[idx]).float()
        return image, label

# Transforms

In [None]:
def get_transforms(mode):
    if mode == 'train':
        return Compose([
            Resize(CFG['img_size'], CFG['img_size']),
            #RandomResizedCrop(CFG['img_size'], CFG['img_size'], scale=(0.85, 1.0)),
            HorizontalFlip(p=0.5),
            Normalize(
                mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225],
                p=1.0,
            ),
            ToTensorV2()
        ])
    elif mode == 'valid':
        return Compose([
            Resize(CFG['img_size'], CFG['img_size']),
            Normalize(
                mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225],
                p=1.0,
            ),
            ToTensorV2(),
        ])

# Model

In [None]:
class RanzcrModel(nn.Module):
    def __init__(self, model_name, pretrained=False):
        super().__init__()
        self.model = timm.create_model(model_name, pretrained=pretrained)
        # self.model .conv1[0].in_channels = 1
        # weight = self.model.conv1[0].weight.mean(1, keepdim=True)
        # self.model.conv1[0].weight = torch.nn.Parameter(weight)
        # self.dropout = nn.Dropout(0.1)
        # n_features = self.model.classifier.in_features
        n_features = self.model.fc.in_features
        # self.model.classifier = nn.Linear(n_features, CFG['target_size'])
        self.model.fc = nn.Linear(n_features, CFG['target_size'])
    
    def forward(self, x):
        # batch_size, _, _, _ = x.shape
        x = self.model(x)
        #x = nn.functional.adaptive_avg_pool2d(x, 1).reshape(batch_size, -1)
        #x = self.dropout(x)
        return x

# Helper Functions

In [None]:
class AverageMeter(object):
    def __init__(self):
        self.reset()
        
    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0
        
    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count
        
def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)


def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (remain %s)' % (asMinutes(s), asMinutes(rs))


def train_fn(train_loader, model, criterion, optimizer, epoch, scheduler, device):
    scaler = GradScaler()
    
    # Measurements
    batch_time = AverageMeter() # to measure elapsed time per batch
    data_time = AverageMeter() # to measure data loading time
    losses = AverageMeter()
    scores = AverageMeter()
    
    model.train()
    start = end = time.time()
    global_step = 0
    
    for step, (images, labels) in enumerate(train_loader):
        
        data_time.update(time.time() - end)
        
        images = images.to(device)
        labels = labels.to(device)
        batch_size = labels.size(0)
        
        with autocast():
            y_preds = model(images)
            loss = criterion(y_preds, labels)
        
        losses.update(loss.item(), batch_size)
        
        if CFG['gradient_accumulation_steps'] > 1:
            loss = loss / CFG['gradient_accumulation_steps']
            
        scaler.scale(loss).backward()
        # grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), CFG['max_grad_norm'])
        grad_norm = 0
        
        if (step + 1) % CFG['gradient_accumulation_steps'] == 0:
            scaler.step(optimizer)
            scaler.update()
            optimizer.zero_grad()
            global_step += 1
            
        batch_time.update(time.time() - end)
        end = time.time()
        
        if step % CFG['print_freq'] == 0 or step == (len(train_loader)-1):
            print('Epoch: [{0}][{1}/{2}] '
                  'Data {data_time.val:.3f} ({data_time.avg:.3f}) '
                  'Elapsed {remain:s} '
                  'Loss: {loss.val:.4f}({loss.avg:.4f}) '
                  'Grad: {grad_norm:.4f}  '
                  #'LR: {lr:.6f}  '
                  .format(
                   epoch+1, step, len(train_loader), batch_time=batch_time,
                   data_time=data_time, loss=losses,
                   remain=timeSince(start, float(step+1)/len(train_loader)),
                   grad_norm=grad_norm,
                   #lr=scheduler.get_lr()[0],
                   ))
    return losses.avg

def valid_fn(valid_loader, model, criterion, device):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    scores = AverageMeter()
    
    model.eval()
    preds = []
    start = end = time.time()
    for step, (images, labels) in enumerate(valid_loader):
        data_time.update(time.time() - end)
        images = images.to(device)
        labels = labels.to(device)
        batch_size = labels.size(0)
        
        with torch.no_grad():
            y_preds = model(images)
        loss = criterion(y_preds, labels)
        losses.update(loss.item(), batch_size)
        
        preds.append(y_preds.sigmoid().to('cpu').numpy())
        if CFG['gradient_accumulation_steps'] > 1:
            loss = loss / CFG['gradient_accumulation_steps']

        batch_time.update(time.time() - end)
        end = time.time()
        if step % CFG['print_freq'] == 0 or step == (len(valid_loader)-1):
            print('EVAL: [{0}/{1}] '
                  'Data {data_time.val:.3f} ({data_time.avg:.3f}) '
                  'Elapsed {remain:s} '
                  'Loss: {loss.val:.4f}({loss.avg:.4f}) '
                  .format(
                   step, len(valid_loader), batch_time=batch_time,
                   data_time=data_time, loss=losses,
                   remain=timeSince(start, float(step+1)/len(valid_loader)),
                   ))
    predictions = np.concatenate(preds)
    return losses.avg, predictions

# Train Loop

In [None]:
def train_loop(folds, fold):
    
    LOGGER.info(f"========== fold: {fold} training ==========")

    # ====================================================
    # loader
    # ====================================================
    train_idx = folds[folds['fold'] != fold].index
    valid_idx = folds[folds['fold'] == fold].index
    
    train_folds = folds.loc[train_idx].reset_index(drop=True)
    valid_folds = folds.loc[valid_idx].reset_index(drop=True)
    valid_labels = valid_folds[CFG['target_cols']].values
    
    train_dataset = TrainDataset(train_folds, transform=get_transforms(mode='train'))
    valid_dataset = TrainDataset(valid_folds, transform=get_transforms(mode='valid'))
    
    train_loader = DataLoader(train_dataset, 
                              batch_size=CFG['batch_size'], 
                              shuffle=True, 
                              num_workers=CFG['num_workers'], pin_memory=True, drop_last=True)
    valid_loader = DataLoader(valid_dataset, 
                              batch_size=CFG['batch_size'], 
                              shuffle=False, 
                              num_workers=CFG['num_workers'], pin_memory=True, drop_last=False)
    
    # ====================================================
    # scheduler
    # ====================================================
    def get_scheduler(optimizer):
        if CFG['scheduler'] == 'ReduceLROnPlateau':
            scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=CFG['factor'], patience=CFG['patience'], verbose=True, eps=CFG['eps'])
        elif CFG['scheduler'] == 'CosineAnnealingLR':
            scheduler = CosineAnnealingLR(optimizer, T_max=CFG['T_max'], eta_min=CFG['min_lr'], last_epoch=-1)
        elif CFG['scheduler'] == 'CosineAnnealingWarmRestarts':
            scheduler = CosineAnnealingWarmRestarts(optimizer, T_0=CFG['T_0'], T_mult=1, eta_min=CFG['min_lr'], last_epoch=-1)
        return scheduler
    
    # ====================================================
    # model & optimizer
    # ====================================================
    model = RanzcrModel(model_name=CFG['model_arch'], pretrained=True)
    model.to(device)
    
    optimizer = Adam(model.parameters(), lr=CFG['lr'], weight_decay=CFG['weight_decay'], amsgrad=False)
    
    scheduler = get_scheduler(optimizer)
    
    # ====================================================
    # loop
    # ====================================================
    criterion = nn.BCEWithLogitsLoss()

    best_score = 0.
    best_loss = np.inf
    
    for epoch in range(CFG['epochs']):
        
        start_time = time.time()
        
        # train
        # train
        avg_loss = train_fn(train_loader, model, criterion, optimizer, epoch, scheduler, device)

        # eval
        avg_val_loss, preds = valid_fn(valid_loader, model, criterion, device)
        
        if isinstance(scheduler, ReduceLROnPlateau):
            scheduler.step(avg_val_loss)
        elif isinstance(scheduler, CosineAnnealingLR):
            scheduler.step()
        elif isinstance(scheduler, CosineAnnealingWarmRestarts):
            scheduler.step()

        # scoring
        score, scores = get_score(valid_labels, preds)

        elapsed = time.time() - start_time

        LOGGER.info(f'Epoch {epoch+1} - avg_train_loss: {avg_loss:.4f}  avg_val_loss: {avg_val_loss:.4f}  time: {elapsed:.0f}s')
        LOGGER.info(f'Epoch {epoch+1} - Score: {score:.4f}  Scores: {np.round(scores, decimals=4)}')

        """
        if score > best_score:
            best_score = score
            LOGGER.info(f'Epoch {epoch+1} - Save Best Score: {best_score:.4f} Model')
            torch.save({'model': model.state_dict(), 
                        'preds': preds},
                        OUTPUT_DIR+f'{CFG.model_arch}_fold{fold}_best.pth')
        """
        
        if avg_val_loss < best_loss:
            best_loss = avg_val_loss
            LOGGER.info(f'Epoch {epoch+1} - Save Best Loss: {best_loss:.4f} Model')
            torch.save({'model': model.state_dict(), 
                        'preds': preds}, 
                       OUTPUT_DIR+CFG['model_arch']+f'_fold{fold}_best.pth')
    
    check_point = torch.load(OUTPUT_DIR+CFG['model_arch']+f'_fold{fold}_best.pth')
    for c in [f'pred_{c}' for c in CFG['target_cols']]:
        valid_folds[c] = np.nan
    valid_folds[[f'pred_{c}' for c in CFG['target_cols']]] = check_point['preds']

    return valid_folds

In [None]:
def main():

    def get_result(result_df):
        preds = result_df[[f'pred_{c}' for c in CFG['target_cols']]].values
        labels = result_df[CFG['target_cols']].values
        score, scores = get_score(labels, preds)
        LOGGER.info(f'Score: {score:<.4f}  Scores: {np.round(scores, decimals=4)}')
    
    if CFG['train']:
        # train 
        oof_df = pd.DataFrame()
        for fold in range(CFG['fold_num']):
            if fold in CFG['trn_fold']:
                _oof_df = train_loop(folds, fold)
                oof_df = pd.concat([oof_df, _oof_df])
                LOGGER.info(f"========== fold: {fold} result ==========")
                get_result(_oof_df)
        # CV result
        LOGGER.info(f"========== CV ==========")
        get_result(oof_df)
        # save result
        oof_df.to_csv(OUTPUT_DIR+'oof_df.csv', index=False)

In [None]:
if __name__ == '__main__':
    main()