In [None]:
#!pip install '../input/python-packages/timm-0.3.4-py3-none-any (1).whl'

In [None]:
!pip install '../input/effdet-latestvinbigdata-wbf-fused/timm-0.3.4-py3-none-any.whl'

In [None]:
import timm

In [None]:
import random
import os
import pandas as pd
import numpy as np


#from efficientnet_pytorch import EfficientNet




import cv2
from torch.utils.data import Dataset,DataLoader
from torch.utils.data.sampler import SequentialSampler, RandomSampler
from torch.cuda.amp import autocast, GradScaler
from torch.nn.modules.loss import _WeightedLoss
import torch.nn.functional as F

import torchvision.models as models

import torch
from torch import nn
import torch.nn.functional as F
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts, CosineAnnealingLR, ReduceLROnPlateau
from torch.optim import Adam, SGD


from sklearn.metrics import roc_auc_score
from sklearn.model_selection import GroupKFold, StratifiedKFold


from albumentations import (
    HorizontalFlip, VerticalFlip, IAAPerspective, ShiftScaleRotate, CLAHE, RandomRotate90,
    Transpose, ShiftScaleRotate, Blur, OpticalDistortion, GridDistortion, HueSaturationValue,
    IAAAdditiveGaussianNoise, GaussNoise, MotionBlur, MedianBlur, IAAPiecewiseAffine, RandomResizedCrop,
    IAASharpen, IAAEmboss, RandomBrightnessContrast, Flip, OneOf, Compose, Normalize, Cutout, CoarseDropout, ShiftScaleRotate, CenterCrop, Resize
)

from albumentations.pytorch import ToTensorV2
from albumentations import ImageOnlyTransform
from tqdm import tqdm
import time
from sklearn.metrics import log_loss

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

BASE_DIR = "../input/ranzcr-clip-catheter-line-classification"


train = pd.read_csv(os.path.join(BASE_DIR,'train.csv'))
train.head()

target_cols = ['ETT - Abnormal', 'ETT - Borderline', 'ETT - Normal', 'NGT - Abnormal', 
               'NGT - Borderline', 'NGT - Incompletely Imaged', 'NGT - Normal', 'CVC - Abnormal',
               'CVC - Borderline', 'CVC - Normal', 'Swan Ganz Catheter Present']



CFG = {
    'fold_num': 5,
    'scheduler':'CosineAnnealingLR',
    'T_max':6,
    'seed': 719,
    'model_name': 'resnet200d',
    'gradient_accumulation_steps':1,
    'max_grad_norm':1000,
    'img_size': 256,
    'epochs': 1,
    'train_bs': 16,
    'valid_bs': 32,
    'T_0': 10,
    'lr': 1e-4,
    'min_lr': 1e-6,
    'weight_decay':1e-6,
    'num_workers': 4,
    'accum_iter': 2,
    'verbose_step': 1,
    'device': 'cuda:0',
    'use_amp' : True
}


In [None]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True
    
def macro_multilabel_auc(label, pred):
    aucs = []
    for i in range(len(target_cols)):
        aucs.append(roc_auc_score(label[:, i], pred[:, i]))
    print(np.round(aucs, 4))
    return np.mean(aucs)
    
def get_score(y_true, y_pred):
    scores = []
    for i in range(y_true.shape[1]):
        score = roc_auc_score(y_true[:,i], y_pred[:,i])
        scores.append(score)
    avg_score = np.mean(scores)
    return avg_score, scores
    
    
def get_transforms(*, data):
    
    if data == 'train':
        return Compose([
            RandomResizedCrop(CFG['img_size'], CFG['img_size']),
            Transpose(p=0.5),
            HorizontalFlip(p=0.5),
            VerticalFlip(p=0.5),
            ShiftScaleRotate(p=0.5),
            HueSaturationValue(hue_shift_limit=0.2, sat_shift_limit=0.2, val_shift_limit=0.2, p=0.5),
            RandomBrightnessContrast(brightness_limit=(-0.1,0.1), contrast_limit=(-0.1, 0.1), p=0.5),
            Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], max_pixel_value=255.0, p=1.0),
            CoarseDropout(p=0.5),
            Cutout(p=0.5),
            ToTensorV2(p=1.0),
        ], p=1.)
    

    elif data == 'valid':
        return Compose([
            CenterCrop(CFG['img_size'], CFG['img_size'], p=1.),
            Resize(CFG['img_size'], CFG['img_size']),
            Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], max_pixel_value=255.0, p=1.0),
            ToTensorV2(p=1.0),
        ], p=1.)
    
    
    elif data == 'test':
        return Compose([
            CenterCrop(CFG['img_size'], CFG['img_size'], p=1.),
            Resize(CFG['img_size'], CFG['img_size']),
            Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], max_pixel_value=255.0, p=1.0),
            ToTensorV2(p=1.0),
        ], p=1.)
        

    
    
class TrainDataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df
        self.file_names = df['StudyInstanceUID'].values
        self.labels = df[target_cols].values
        self.transform = transform
        
    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        file_name = self.file_names[idx]
        file_path = f'{BASE_DIR}/train/{file_name}.jpg'
        image = cv2.imread(file_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        if self.transform:
            augmented = self.transform(image=image)
            image = augmented['image']
        label = torch.tensor(self.labels[idx]).float()
        return image, label
    

class TestDataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df
        self.file_names = df['StudyInstanceUID'].values
        self.transform = transform
        
    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        file_name = self.file_names[idx]
        file_path = f'{BASE_DIR}/test/{file_name}.jpg'
        image = cv2.imread(file_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        if self.transform:
            augmented = self.transform(image=image)
            image = augmented['image']
        return image
    


def prepare_dataloader(df, trn_idx, val_idx):
    train_ = df.loc[trn_idx,:].reset_index(drop=True)
    valid_ = df.loc[val_idx,:].reset_index(drop=True)
        
    train_ds = TrainDataset(train_, transform=get_transforms(data='train'))
    valid_ds = TrainDataset(valid_, transform=get_transforms(data='valid'))
    
    train_loader = torch.utils.data.DataLoader(
        train_ds,
        batch_size=CFG['train_bs'],
        pin_memory=False,
        drop_last=False,
        shuffle=True,        
        num_workers=CFG['num_workers'],
    )
    val_loader = torch.utils.data.DataLoader(
        valid_ds, 
        batch_size=CFG['valid_bs'],
        num_workers=CFG['num_workers'],
        shuffle=False,
        pin_memory=False,
    )
    return train_loader, val_loader

class Classifier(nn.Module):
    def __init__(self, model_name, n_class, pretrained=False):
        super().__init__()
        self.model = timm.create_model(model_name, pretrained=pretrained)
        n_features = self.model.fc.in_features
        self.model.global_pool = nn.Identity()
        self.model.fc = nn.Identity()
        self.pooling = nn.AdaptiveAvgPool2d(1)
        self.fc = nn.Linear(n_features, n_class)

    def forward(self, x):
        bs = x.size(0)
        features = self.model(x)
        pooled_features = self.pooling(features).view(bs, -1)
        output = self.fc(pooled_features)
        return output
    

In [None]:
   
def train_one_epoch(train_loader, model, criterion, optimizer, epoch, scheduler, device):
    print('Training')
    model.train()
    scaler = GradScaler()
    losses = []
    for i, (image, label) in enumerate(tqdm(train_loader)):
        image, target = image.to(device).float(), label.to(device).float()
        if CFG['use_amp']:
            with torch.cuda.amp.autocast():
                logits = model(image)
                loss = criterion(logits, target)
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
            optimizer.zero_grad()
        else:
            logits = model(image)
            loss = criterion(logits, target)
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
        losses.append(loss.item()) 
        smooth_loss = np.mean(losses[-30:])

    loss_train = np.mean(losses)
    return loss_train


        
def valid_one_epoch(val_loader, model, criterion, device):
    print('Validating')
    model.eval()
    bar = tqdm(val_loader)

    PROB = []
    TARGETS = []
    losses = []
    PREDS = []
    
    with torch.no_grad():
        for batch_idx, (images, targets) in enumerate(bar):

            images, targets = images.to(device), targets.to(device)
            logits = model(images)
            PREDS += [logits.sigmoid()]
            TARGETS += [targets.detach().cpu()]
            loss = criterion(logits, targets)
            print(loss.item())
            losses.append(loss.item())
            smooth_loss = np.mean(losses[-30:])
            print(smooth_loss)
            bar.set_description(f'loss: {loss.item():.5f}, smth: {smooth_loss:.5f}')
            
    PREDS = torch.cat(PREDS).cpu().numpy()
    TARGETS = torch.cat(TARGETS).cpu().numpy()
    roc_auc = macro_multilabel_auc(TARGETS, PREDS)
    loss_valid = np.mean(losses)
    return loss_valid, roc_auc


def main():
        
    seed_everything(CFG['seed'])
    
    folds = StratifiedKFold(n_splits=CFG['fold_num'], shuffle=True, random_state=CFG['seed']).split(np.arange(train.shape[0]), train['PatientID'].values)
    
    
    for fold, (trn_idx, val_idx) in enumerate(folds):
        print('Training with {} started'.format(fold))
        print(len(trn_idx), len(val_idx))
        valid_ = train.loc[val_idx,:].reset_index(drop=True)
        valid_labels = valid_[target_cols].values
        train_loader, val_loader = prepare_dataloader(train, trn_idx, val_idx)

        def get_scheduler(optimizer):
            if CFG['scheduler']=='ReduceLROnPlateau':
                scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=CFG['factor'], patience=CFG['patience'], verbose=True, eps=CFG['eps'])
            elif CFG['scheduler']=='CosineAnnealingLR':
                scheduler = CosineAnnealingLR(optimizer, T_max=CFG['T_max'], eta_min=CFG['min_lr'], last_epoch=-1)
            elif CFG['scheduler']=='CosineAnnealingWarmRestarts':
                scheduler = CosineAnnealingWarmRestarts(optimizer, T_0=CFG['T_0'], T_mult=1, eta_min=CFG['min_lr'], last_epoch=-1)
            return scheduler

        # ====================================================
        # model & optimizer
        # ====================================================

        model = Classifier(CFG['model_name'],len(target_cols), pretrained=False)
        model.to(device)

        optimizer = Adam(model.parameters(), lr=CFG['lr'], weight_decay=CFG['weight_decay'], amsgrad=False)
        scheduler = get_scheduler(optimizer)

        # ====================================================
        # loop
        # ====================================================
        criterion = nn.BCEWithLogitsLoss()

        best_score = 0.
        best_loss = np.inf

        for epoch in range(CFG['epochs']):

            start_time = time.time()

            # train
            avg_loss = train_one_epoch(train_loader, model, criterion, optimizer, epoch, scheduler, device)

            # eval
            avg_val_loss, roc_auc = valid_one_epoch(val_loader, model, criterion, device)

            if isinstance(scheduler, ReduceLROnPlateau):
                scheduler.step(avg_val_loss)
            elif isinstance(scheduler, CosineAnnealingLR):
                scheduler.step()
            elif isinstance(scheduler, CosineAnnealingWarmRestarts):
                scheduler.step()
            
            print(avg_val_loss)
            # scoring
            #score, scores = get_score(valid_labels, preds)

            elapsed = time.time() - start_time

            #LOGGER.info(f'Epoch {epoch+1} - avg_train_loss: {avg_loss:.4f}  avg_val_loss: {avg_val_loss:.4f}  time: {elapsed:.0f}s')
            #LOGGER.info(f'Epoch {epoch+1} - Score: {score:.4f}  Scores: {np.round(scores, decimals=4)}')

            """
            if score > best_score:
                best_score = score
                LOGGER.info(f'Epoch {epoch+1} - Save Best Score: {best_score:.4f} Model')
                torch.save({'model': model.state_dict(), 
                            'preds': preds},
                            OUTPUT_DIR+f'{CFG.model_name}_fold{fold}_best.pth')
            """

            if avg_val_loss < best_loss:
                best_loss = avg_val_loss
                #LOGGER.info(f'Epoch {epoch+1} - Save Best Loss: {best_loss:.4f} Model')
                torch.save(model.state_dict(),f"{CFG['model_name']}_fold{fold}_best.pth")

        #check_point = torch.load(f"{CFG['model_name']}_fold{fold}_best.pth")
        


''' 

model = EfficientNetB5()
model.load_state_dict(torch.load(MODEL_PATH)['model'])
model.eval()
models = [model.to(device)]


test_dataset = TestDataset(test, transform=get_transforms())
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, 
                         num_workers=4 , pin_memory=True)
predictions = inference(models, test_loader, device)

target_cols = test.iloc[:, 1:12].columns.tolist()
test[target_cols] = predictions
test[['StudyInstanceUID'] + target_cols].to_csv('submission.csv', index=False)
test.head()

'''

if __name__ == '__main__':
    main()



In [None]:
test = pd.read_csv('../input/ranzcr-clip-catheter-line-classification/sample_submission.csv')
#print(test.shape)
#test.head()
test_dataset = TestDataset(test, transform=get_transforms(data='test'))
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=CFG['valid_bs'], shuffle=False,  num_workers=24)
def inference_func(test_loader):
    model.eval()
    bar = tqdm(test_loader)
    LOGITS = []
    PREDS = []

    with torch.no_grad():
        for batch_idx, images in enumerate(bar):
            x = images.to(device)
            logits = model(x)
            LOGITS.append(logits.cpu())
            PREDS += [logits.sigmoid().detach().cpu()]
        PREDS = torch.cat(PREDS).cpu().numpy()
        LOGITS = torch.cat(LOGITS).cpu().numpy()
    return PREDS

model_path = ['../input/pretrainedefficient/resnet200d_fold0_best.pth',
              '../input/pretrainedefficient/resnet200d_fold1_best.pth',
              '../input/pretrainedefficient/resnet200d_fold2_best.pth',
              '../input/pretrainedefficient/resnet200d_fold3_best.pth',
              '../input/pretrainedefficient/resnet200d_fold4_best.pth'
             ]
test_preds_1 = []
for i in range(5):
    print('resnet200d loaded')
    model = Classifier(CFG['model_name'], len(target_cols))
    model = model.to(device)
    model.load_state_dict(torch.load(model_path[i], map_location='cuda:0'))                            
    test_preds_1 += [inference_func(test_loader)]


    




In [None]:
'''
def inference(model, test_loader, device):
    model.eval()
    tk0 = tqdm(enumerate(test_loader), total=len(test_loader))
    probs = []
    for i, (image) in tk0:
        images = image.to(device)
        with torch.no_grad():
            y_pred = model(images)
            pred = y_pred.sigmoid().detach().cpu()
            probs.append(pred)
    probs = np.concatenate(probs)
    return probs

model = Classifier(CFG['model_name'],len(target_cols),pretrained=False)
model = model.to(device)
model.load_state_dict(torch.load('../input/pretrainedefficient/resnet200d_fold0_best.pth'))
test_dataset = TestDataset(test, transform=get_transforms(data='valid'))
test_loader = DataLoader(test_dataset,
                         batch_size=CFG['valid_bs'],
                         shuffle=False, 
                         num_workers=CFG['num_workers'], pin_memory=True)

predictions = inference(model,test_loader, device)

test[target_cols] = predictions
'''

In [None]:
submission = pd.read_csv('../input/ranzcr-clip-catheter-line-classification/sample_submission.csv')
submission[target_cols] = np.mean(test_preds_1, axis=0)

In [None]:
submission.to_csv('submission.csv', index=False)