In [None]:
from albumentations.pytorch import ToTensorV2
import albumentations as albu

import pandas as pd
import numpy as np
import os
import math
from tqdm import tqdm
import cv2
from PIL import Image
import random

import torch
from torch import optim, nn
from torch.optim import Adam
from torchvision import models, transforms
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from torch.cuda.amp import autocast, GradScaler
from sklearn.model_selection import StratifiedKFold, GroupKFold, KFold
from sklearn.metrics import roc_auc_score

import sys
sys.path.append('../input/pytorch-image-models/pytorch-image-models-master')
import timm

device = torch.device('cuda:0' if torch.cuda.is_available() else "cpu")


def seed_torch(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True

seed_torch(seed=42)

In [None]:
target_cols=['ETT - Abnormal', 'ETT - Borderline', 'ETT - Normal',
                 'NGT - Abnormal', 'NGT - Borderline', 'NGT - Incompletely Imaged', 'NGT - Normal', 
                 'CVC - Abnormal', 'CVC - Borderline', 'CVC - Normal',
                 'Swan Ganz Catheter Present']
TRAIN_PATH = '../input/ranzcr-clip-catheter-line-classification/train'

class CustomResNext(nn.Module):
    def __init__(self, model_name='resnext50_32x4d', pretrained=False):
        super().__init__()
        self.model = timm.create_model(model_name, pretrained=pretrained)
        n_features = self.model.fc.in_features
        self.model.fc = nn.Linear(n_features, 11)

    def forward(self, x):
        x = self.model(x)
        return x


class TrainDataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df
        self.file_names = df['StudyInstanceUID'].values
        self.labels = df[target_cols].values
        self.transform = transform
        
    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        file_name = self.file_names[idx]
        file_path = f'{TRAIN_PATH}/{file_name}.jpg'
        image = cv2.imread(file_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        if self.transform:
            augmented = self.transform(image=image)
            image = augmented['image']
            
        label = torch.tensor(self.labels[idx]).float()
        return image, label
    

size = 512    

def get_transforms(data = ''):
    if data == 'train':
        return albu.Compose([
                            albu.RandomResizedCrop(size, size, scale=(0.85, 1), p=1),
                            albu.HorizontalFlip(p=0.5),
                            #albu.ShiftScaleRotate(p=0.2),
                            #albu.Cutout(max_h_size=int(size * 0.03), max_w_size=int(size * 0.03), num_holes=5, p=0.1),
                            albu.Normalize(
                                        mean=[0.485, 0.456, 0.406],
                                        std=[0.229, 0.224, 0.225],
                                    ),
                            ToTensorV2()
                        ])
    elif data == 'valid':
        return albu.Compose([
                            albu.Resize(size, size),
                            albu.Normalize(
                                        mean=[0.485, 0.456, 0.406],
                                        std=[0.229, 0.224, 0.225],
                                    ),
                            ToTensorV2()
                        ])

In [None]:
def get_score(y_true, y_pred):
    scores = []
    for i in range(y_true.shape[1]):
        score = roc_auc_score(y_true[:,i], y_pred[:,i])
        scores.append(score)
    avg_score = np.mean(scores)
    return avg_score, scores


class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count


def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)


def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (remain %s)' % (asMinutes(s), asMinutes(rs))


def train_fn(train_loader, model, criterion, optimizer, epoch, scheduler, device):
    scaler = GradScaler()
    losses = AverageMeter()
    scores = AverageMeter()

    model.train()
    global_step = 0
    for step, (images, labels) in enumerate(train_loader):
        images = images.to(device)
        labels = labels.to(device)
        batch_size = labels.size(0)
        
        with autocast():
            y_preds = model(images)
            loss = criterion(y_preds, labels)
        
        losses.update(loss.item(), batch_size)
            
        scaler.scale(loss).backward()
        grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), 1000)
        
        scaler.step(optimizer)
        scaler.update()
        optimizer.zero_grad()
        global_step += 1
        
        if step % 100 == 0 or step == (len(train_loader) - 1):
            print(f'Epoch: {epoch+1}  [{step} / {len(train_loader)}]')
            print(f'Loss: {losses.val} {losses.avg}')

        
    return losses.avg


def valid_fn(valid_loader, model, criterion, device):
    losses = AverageMeter()
    scores = AverageMeter()
    model.eval()
    
    preds = []
    
        
    for step, (images, labels) in enumerate(valid_loader):
        images = images.to(device)
        labels = labels.to(device)
        
        batch_size = labels.size(0)
        with torch.no_grad():
            y_preds = model(images)
            
        loss = criterion(y_preds, labels)
        losses.update(loss.item(), batch_size)
                
        preds.append(y_preds.sigmoid().to("cpu").numpy())
        
        if step % 100 == 0 or step == (len(valid_loader)-1):
            print(f'EVAL: {step} / {len(valid_loader)}')
            print(f'Loss: {losses.val} {losses.avg}')
        

    predictions = np.concatenate(preds)
    return losses.avg, predictions



def train_loop(folds, fold):
    
    trn_idx = folds[folds['fold'] != fold].index
    val_idx = folds[folds['fold'] == fold].index
    
    train_folds = folds.loc[trn_idx].reset_index(drop=True)
    valid_folds = folds.loc[val_idx].reset_index(drop=True)
    valid_labels = valid_folds[target_cols].values
    
    model = CustomResNext("resnext50_32x4d", pretrained=True)
    model.to(device)
    criterion = torch.nn.BCEWithLogitsLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-4, weight_decay=1e-6, amsgrad=False)
    #scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.15)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=6, eta_min=1e-6, last_epoch=-1)
    

    
    train_dataset = TrainDataset(train_folds, transform=get_transforms(data='train'))
    valid_dataset = TrainDataset(valid_folds, transform=get_transforms(data='valid'))
    
    train_loader = DataLoader(train_dataset,
                             batch_size=34,
                             num_workers=4,
                             shuffle=True,
                             drop_last=False,
                             pin_memory=True)
    
    valid_loader = DataLoader(valid_dataset,
                             batch_size=32,
                             num_workers=4,
                             shuffle=False,
                             drop_last=False,
                             pin_memory=True)
    
    
    best_score = 0.
    best_loss = np.inf
    
    for epoch in range(6):
        print()
        print(f'Start {epoch+1} EPOCH!!!')
        avg_loss = train_fn(train_loader, model, criterion, optimizer, epoch, scheduler, device)
        avg_val_loss, preds = valid_fn(valid_loader, model, criterion, device)
        
        if avg_val_loss < best_loss:
            best_loss = avg_val_loss
            torch.save(model, f'My_resnext50_fold0_best{epoch+1}.pth')
        
        scheduler.step()
        
        score, scores = get_score(valid_labels, preds)
        print(f"Epoch {epoch+1} - avg_train_loss: {avg_loss} avg_val_loss: {avg_val_loss}")
        print(f"Score: {score} Scores: {np.round(scores, decimals=4)}")
        
    return model

target_cols = ['ETT - Abnormal', 'ETT - Borderline', 'ETT - Normal', 'NGT - Abnormal', 
               'NGT - Borderline', 'NGT - Incompletely Imaged', 'NGT - Normal', 'CVC - Abnormal',
               'CVC - Borderline', 'CVC - Normal', 'Swan Ganz Catheter Present']


def main():
    
    my_df = pd.read_csv('../input/ranzcr-clip-catheter-line-classification/train.csv')  
    
    folds = my_df.copy()
    Fold = GroupKFold(n_splits=4)
    groups = folds["PatientID"].values
    for n, (train_index, val_index) in enumerate(Fold.split(folds, folds[target_cols], groups)):
        folds.loc[val_index, 'fold'] = int(n)
    folds['fold'] = folds['fold'].astype(int)
    display(folds.groupby('fold').size())
    
    for fold in range(4):
            if fold in [0, 1, 2, 3]:
                model = train_loop(folds, fold)

In [None]:
model = main()