## Train a EfficientNet baseline model with timm

In [None]:
# Competition: https://www.kaggle.com/c/ranzcr-clip-catheter-line-classification

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

import os
import sys
import gc
import math
import pickle
import random
import time
import psutil
import pytz
from datetime import datetime
from collections import defaultdict
from contextlib import contextmanager

import warnings
warnings.filterwarnings('ignore')  # warnings.filterwarnings(action='once')

from tqdm import tqdm, tqdm_notebook

import numpy as np
import pandas as pd
_ = np.seterr(divide='ignore', invalid='ignore')

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import matplotlib.pyplot as plt
%matplotlib inline
import matplotlib.style as style
style.use('fivethirtyeight')
import seaborn as sns
from IPython.display import Image  

import lightgbm as lgb
from sklearn.metrics import roc_auc_score

import tensorflow as tf
from tensorflow import keras

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
    
def seed_all(random_seed=42):
    os.environ['PYTHONHASHSEED'] = str(random_seed)
    random.seed(random_seed)
    np.random.seed(random_seed)
    tf.random.set_seed(random_seed)
    torch.manual_seed(random_seed)
    torch.cuda.manual_seed(random_seed)
    torch.backends.cudnn.deterministic = True

sys.path.append('../input/gputil/GPUtil')
from GPUtil import showUtilization as gpu_usage

global_start_t = time.time()

In [None]:
# ==============================================
# CFG
# ==============================================
class CFG:
    debug = True  # False
    device = 'GPU'  # ['TPU', 'GPU']
    nprocs = 1  # [1, 8]
    print_freq = 100
    model_name = 'EffnetModel'
    num_workers = 4
    size = 640
    scheduler = 'CosineAnnealingLR'  # ['ReduceLROnPlateau', 'CosineAnnealingLR', 'CosineAnnealingWarmupRestarts']
    epochs = 4
    T_max = 4  # CosineAnnealingLR
    lr = 5e-4 # 1e-4
    min_lr = 1e-6
    batch_size = 16  # 64
    weight_decay = 1e-6
    gradient_accumulation_steps = 1 # 1 
    max_grad_norm = 1000
    seed = 2021
    target_size = 11
    target_cols = ['ETT - Abnormal', 'ETT - Borderline', 'ETT - Normal',
                   'NGT - Abnormal', 'NGT - Borderline', 'NGT - Incompletely Imaged', 'NGT - Normal', 
                   'CVC - Abnormal', 'CVC - Borderline', 'CVC - Normal',
                   'Swan Ganz Catheter Present']
    n_fold = 5
    trn_fold = [0] # [0, 1, 2, 3, 4]
    train = True
    
if CFG.debug:
    CFG.epochs = 1

In [None]:
# =======================================================
# Library
# =======================================================
import sys
sys.path.append('../input/pytorch-image-models/')

import ast
import copy
import shutil
from pathlib import Path
from contextlib import contextmanager
from collections import defaultdict, Counter

import scipy as sp
import numpy as np
import pandas as pd

from sklearn import preprocessing
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold, GroupKFold, KFold

from tqdm.auto import tqdm
import cv2
from PIL import Image

from torch.optim import Adam, SGD
import torchvision.models as models
from torch.nn.parameter import Parameter
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts, CosineAnnealingLR, ReduceLROnPlateau

from albumentations import (
    Compose, OneOf, Normalize, Resize, RandomResizedCrop, RandomCrop, HorizontalFlip, VerticalFlip, 
    RandomBrightness, RandomContrast, RandomBrightnessContrast, Rotate, ShiftScaleRotate, Cutout, 
    IAAAdditiveGaussianNoise, Transpose, HueSaturationValue, CoarseDropout
    )
from albumentations.pytorch import ToTensorV2
from albumentations import ImageOnlyTransform

import timm
from torch.cuda.amp import autocast, GradScaler

In [None]:
OUTPUT_DIR = './'
if not os.path.exists(OUTPUT_DIR):
    os.makedirs(OUTPUT_DIR)
    
RANDOM_SEED = 53113
seed_all(RANDOM_SEED)   # to make the fold split the same every run
    
TRAIN_PATH = '../input/ranzcr-clip-catheter-line-classification/train'
train = pd.read_csv('../input/ranzcr-clip-catheter-line-classification/train.csv')

if CFG.debug:  # for quick test
    train = train.sample(n=300, random_state=2021).reset_index(drop=True)
else:
    train = train.sample(n=len(train), random_state=2021).reset_index(drop=True)

folds = train.copy()
Fold = GroupKFold(n_splits=CFG.n_fold)
groups = folds['PatientID'].values
for n, (train_index, val_index) in enumerate(Fold.split(folds, folds[CFG.target_cols], groups)):
    folds.loc[val_index, 'fold'] = int(n)
folds['fold'] = folds['fold'].astype(int)
display(folds.groupby('fold').size())

def get_time_random_seed():
    t = int(time.time() * 1000.0)
    return  (((t & 0xff000000) >> 24) +
             ((t & 0x00ff0000) >>  8) +
             ((t & 0x0000ff00) <<  8) +
             ((t & 0x000000ff) << 24))

time_random_seed = get_time_random_seed()
print(f'time_random_seed: {time_random_seed}')
seed_all(time_random_seed)  # for model results reproducible

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'device: {device}')

In [None]:
class RANZCR_EfficientNet(nn.Module):
    def __init__(self, model_name='efficientnet_b3', out_dim=11, pretrained=False):  
        '''
        model_name: ['efficientnet_b1', 'efficientnet_b2', 'efficientnet_b3', 
                     'efficientnet_b1_pruned', 'efficientnet_b2_pruned', 'efficientnet_b3_pruned', ...]
        For more, checkout https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/efficientnet.py
        '''
        super().__init__()
        self.model = timm.create_model(model_name, pretrained)
        n_features = self.model.classifier.in_features
        self.model.global_pool = nn.Identity()
        self.model.classifier = nn.Identity()
        self.pooling = nn.AdaptiveAvgPool2d(1)
        self.fc = nn.Linear(n_features, out_dim)

    def forward(self, x):
        bs = x.size(0)
        features = self.model(x)
        pooled_features = self.pooling(features).view(bs, -1)
        output = self.fc(pooled_features)
        return output

model_name = 'efficientnet_b3'
global_model = RANZCR_EfficientNet(model_name, pretrained=True).to(device)

# show total number of parameters in the model
model_param_num_sum = sum(p.numel() for p in global_model.parameters())
model_trainable_param_num_sum = sum(p.numel() for p in global_model.parameters() if p.requires_grad)

model_name, model_param_num_sum, model_trainable_param_num_sum 

In [None]:
def get_auc_score(y_true, y_pred):
    scores = []
    for i in range(y_true.shape[1]):
        if len(np.unique(y_true[:, i]))==1:  # if there is just one value, set the auc to default 0.5
            score = 0.5
        else:
            score = roc_auc_score(y_true[:, i], y_pred[:, i])
        scores.append(score)
    avg_score = np.mean(scores)
    return avg_score, scores

def get_accuracy_score(y_true, y_pred):
    y_pred = (y_pred >= 0.5).astype(int)
    scores = []
    for i in range(y_true.shape[1]):
        score = (y_true[:, i]==y_pred[:, i]).sum()/y_true.shape[0]
        scores.append(score)
    avg_score = np.mean(scores)
    return avg_score, scores

class TrainDataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df
        self.file_names = df['StudyInstanceUID'].values
        self.labels = df[CFG.target_cols].values
        self.transform = transform
        
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        file_name = self.file_names[idx]
        file_path = f'{TRAIN_PATH}/{file_name}.jpg'
        image = cv2.imread(file_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        if self.transform:
            augmented = self.transform(image=image)
            image = augmented['image']
        label = torch.tensor(self.labels[idx]).float()

        return image, label

In [None]:
# ====================================
# Transforms
# ====================================
def get_transforms(*, data):
    if data == 'train':
        return Compose([
            #Resize(CFG.size, CFG.size),
            RandomResizedCrop(CFG.size, CFG.size, scale=(0.85, 1.0)),
            HorizontalFlip(p=0.5),
            RandomBrightnessContrast(p=0.2, brightness_limit=(-0.2, 0.2), contrast_limit=(-0.2, 0.2)),
            HueSaturationValue(p=0.2, hue_shift_limit=0.2, sat_shift_limit=0.2, val_shift_limit=0.2),
            ShiftScaleRotate(p=0.2, shift_limit=0.0625, scale_limit=0.2, rotate_limit=20),
            CoarseDropout(p=0.2),
            Cutout(p=0.2, max_h_size=16, max_w_size=16, fill_value=(0., 0., 0.), num_holes=16),
            Normalize(
                mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225],
            ),
            ToTensorV2(),
        ])
    elif data == 'valid':
        return Compose([
            Resize(CFG.size, CFG.size),
            Normalize(
                mean = [0.485, 0.456, 0.406],
                std = [0.229, 0.224, 0.225],
            ),
            ToTensorV2(),
        ])

In [None]:
# =============================================
# Helper functions
# =============================================
class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()
        
    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0
        
    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count
        
def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return f'{m}m {s}s'

def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return f'{asMinutes(s)} (remain {asMinutes(rs)})'

def train_fn(train_loader, model, criterion, optimizer, epoch, scheduler, device):
    if CFG.device == 'GPU':
        scaler = GradScaler()
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    scores = AverageMeter()
    # switch to train mode
    model.train()
    start = end = time.time()
    global_step = 0
    len_train_loader = len(train_loader)
    print(f'in train_fn() len_train_loader: {len_train_loader}')
    for step, (images, labels) in enumerate(train_loader):
        # measure data loading time
        if step%50==0:
            print(f'in train_fn() step: {step} len_train_loader: {len_train_loader}')
            gpu_usage()
        data_time.update(time.time() - end)
        images, labels = images.to(device), labels.to(device)
        batch_size = labels.size(0)
        if CFG.device == 'GPU':
            with autocast():
                y_preds = model(images)
                loss = criterion(y_preds, labels)
                # record loss
                losses.update(loss.item(), batch_size)
                if CFG.gradient_accumulation_steps > 1:
                    loss = loss / CFG.gradient_accumulation_steps
                scaler.scale(loss).backward()
                grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), CFG.max_grad_norm)
                if (step + 1) % CFG.gradient_accumulation_steps == 0:
                    scaler.step(optimizer)
                    scaler.update()
                    optimizer.zero_grad()
                    global_step += 1

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()
        if CFG.device == 'GPU':
            if step % CFG.print_freq == 0 or step == (len(train_loader)-1):
                print('Epoch: [{0}][{1}/{2}] '
                      'Data {data_time.val:.3f} ({data_time.avg:.3f}) '
                      'Elapsed {remain:s} '
                      'Loss: {loss.val:.4f}({loss.avg:.4f}) '
                      'Grad: {grad_norm:.4f}  '
                      #'LR: {lr:.6f}  '
                      .format(
                       epoch+1, step, len(train_loader), batch_time=batch_time,
                       data_time=data_time, loss=losses,
                       remain=timeSince(start, float(step+1)/len(train_loader)),
                       grad_norm=grad_norm,
                       #lr=scheduler.get_lr()[0],
                       ))
                
    return losses.avg


def valid_fn(valid_loader, model, criterion, device):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    scores = AverageMeter()
    # switch to evaluation mode
    model.eval()
    trues, preds = [], []
    start = end = time.time()
    for step, (images, labels) in enumerate(valid_loader):
        # measure data loading time
        data_time.update(time.time() - end)
        images, labels = images.to(device), labels.to(device)
        batch_size = labels.size(0)
        # compute loss
        with torch.no_grad():
            y_preds = model(images)
        loss = criterion(y_preds, labels)
        losses.update(loss.item(), batch_size)
        # record accuracy
        trues.append(labels.to('cpu').numpy())
        preds.append(y_preds.sigmoid().to('cpu').numpy())
        if CFG.gradient_accumulation_steps > 1:
            loss = loss / CFG.gradient_accumulation_steps
        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()
        if CFG.device == 'GPU':
            if step % CFG.print_freq == 0 or step == (len(valid_loader)-1):
                print('EVAL: [{0}/{1}] '
                      'Data {data_time.val:.3f} ({data_time.avg:.3f}) '
                      'Elapsed {remain:s} '
                      'Loss: {loss.val:.4f}({loss.avg:.4f}) '
                      .format(
                       step, len(valid_loader), batch_time=batch_time,
                       data_time=data_time, loss=losses,
                       remain=timeSince(start, float(step+1)/len(valid_loader)),
                       ))
                
    trues = np.concatenate(trues)
    predictions = np.concatenate(preds)
    return losses.avg, predictions, trues

In [None]:
def get_optimizer_lr(optimizer):
    for param_group in optimizer.param_groups:
        return param_group['lr']
    
# ==================================================
# Train loop
# ==================================================
def train_loop(folds, fold):
    print('in train_loop()')
    gpu_usage()
    if CFG.device == 'GPU':
        print(f'========== fold: {fold} training ============')
            
    # =============================================
    # loader
    # =============================================
    trn_idx = folds[folds['fold'] != fold].index
    val_idx = folds[folds['fold'] == fold].index
    
    train_folds = folds.loc[trn_idx].reset_index(drop=True)
    valid_folds = folds.loc[val_idx].reset_index(drop=True)
    
    valid_labels = valid_folds[CFG.target_cols].values
    
    train_dataset = TrainDataset(train_folds, transform=get_transforms(data='train'))
    valid_dataset = TrainDataset(valid_folds, transform=get_transforms(data='valid'))
    
    if CFG.device == 'GPU':
        train_loader = DataLoader(train_dataset,
                                  batch_size=CFG.batch_size,
                                  shuffle=True, num_workers=CFG.num_workers, 
                                  pin_memory=True, drop_last=True)
        valid_loader = DataLoader(valid_dataset,
                                  batch_size=CFG.batch_size * 2,
                                  shuffle=False, num_workers=CFG.num_workers, 
                                  pin_memory=True, drop_last=False)
        
    # ==============================================
    # scheduler
    # ==============================================
    def get_scheduler(optimizer):
        if CFG.scheduler=='ReduceLROnPlateau':
            scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=CFG.factor, patience=CFG.patience, verbose=True, eps=CFG.eps)
        elif CFG.scheduler=='CosineAnnealingLR':
            scheduler = CosineAnnealingLR(optimizer, T_max=CFG.T_max, eta_min=CFG.min_lr, last_epoch=-1)
        elif CFG.scheduler=='CosineAnnealingWarmpRestarts':
            scheduler = CosineAnnealingWarmRestarts(optimizer, T_0=CFG.T_0, T_mult=1, eta_min=CFG.min_lr, last_epoch=-1)
        return scheduler
    
    # ==============================================
    # model & optimizer
    # ==============================================
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print('device: ', device)
    
    model = global_model
    
    optimizer = Adam(model.parameters(), lr=CFG.lr, weight_decay=CFG.weight_decay, amsgrad=False)
    scheduler = get_scheduler(optimizer)
    
    # =============================================
    # loop
    # =============================================
    criterion = nn.BCEWithLogitsLoss()
    best_score, best_loss = 0., np.inf
    
    for epoch in range(CFG.epochs):
        start_time = time.time()
        
        # train
        avg_loss = train_fn(train_loader, model, criterion, optimizer, epoch, scheduler, device)
            
        # eval
        avg_val_loss, preds, _ = valid_fn(valid_loader, model, criterion, device)
        
        if isinstance(scheduler, ReduceLROnPlateau):
            scheduler.step(avg_val_loss)
        elif isinstance(scheduler, CosineAnnealingLR):
            scheduler.step()
        elif isinstance(scheduler, CosineAnnealingWarmRestarts):
            scheduler.step()
            
        # scoring
        score, scores = get_auc_score(valid_labels, preds)
        accuracy_score, accuracy_scores = get_accuracy_score(valid_labels, preds)
        
        print(f'score:{score}, scores: {scores}')
        print(f'accuracy_score:{accuracy_score}, accuracy_scores: {accuracy_scores}')
    
        elapsed = time.time() - start_time

        if CFG.device == 'GPU':
            print(f'Epoch {epoch+1} - avg_train_loss: {avg_loss:.4f}  avg_val_loss: {avg_val_loss:.4f}  time: {elapsed:.3f}s')
            print(f'Epoch {epoch+1} - Score: {score:.4f}  Scores: {np.round(scores, decimals=4)}')
                
        if score > best_score:
            best_score = score
            if CFG.device == 'GPU':
                print(f'Epoch {epoch+1} - Save Best Score: {best_score:.4f} Model')
                torch.save({'model': model.state_dict(), 
                            'preds': preds, 
                            'scheduler': scheduler.state_dict(), 
                            'optimizer': optimizer.state_dict()},
                           OUTPUT_DIR + f'{CFG.model_name}_fold{fold}_best_score.pth')
                print('save best_score model, epoch: ', epoch, 
                      'lr_optimizer',  get_optimizer_lr(optimizer), 
                      'scheduler.last_lr is ', scheduler.get_last_lr())
                
        if avg_val_loss < best_loss:
            best_loss = avg_val_loss
            if CFG.device == 'GPU':
                print(f'Epoch {epoch+1} - Save Best Loss: {best_loss:.4f} Model')
                torch.save({'model': model.state_dict(), 
                            'preds': preds, 
                            'scheduler': scheduler.state_dict(), 
                            'optimizer': optimizer.state_dict()},
                           OUTPUT_DIR + f'{CFG.model_name}_fold{fold}_best_loss.pth')
                print('save best_loss model, epoch: ', epoch, 
                      'lr_optimizer',  get_optimizer_lr(optimizer), 
                      'scheduler.last_lr is ', scheduler.get_last_lr())
    if CFG.nprocs != 8:
        check_point = torch.load(OUTPUT_DIR + f'{CFG.model_name}_fold{fold}_best_score.pth')
        for c in [f'pred_{c}' for c in CFG.target_cols]:
            valid_folds[c] = np.nan
        valid_folds[[f'pred_{c}' for c in CFG.target_cols]] = check_point['preds']
    
    return valid_folds

In [None]:
def main():
    for fold in range(CFG.n_fold):
        if fold in CFG.trn_fold:
            train_loop(folds, fold)

if __name__=='__main__':
    print('before main()')
    main_start_t = time.time()
    gpu_usage()
    main()
    print(f'main total cost time: {time.time()-main_start_t: .3f}')

In [None]:
# dummy submission
test = pd.read_csv('../input/ranzcr-clip-catheter-line-classification/sample_submission.csv')
target_cols = test.iloc[:, 1:12].columns.tolist()
test[target_cols] = 0
test[['StudyInstanceUID'] + target_cols].to_csv('submission.csv', index=False)

print(f'total cost time: {time.time()-global_start_t:.5f} sec')