In [18]:
from libauc.losses import AUCMLoss, CrossEntropyLoss, AUCM_MultiLabel
from libauc.optimizers import PESG, Adam
from libauc.models import DenseNet121, DenseNet169
from libauc.datasets import CheXpert

import torch 
from PIL import Image
import numpy as np
import torchvision.transforms as transforms
from torch.utils.data import Dataset
from sklearn.metrics import roc_auc_score

In [2]:
def set_all_seeds(SEED):
    # REPRODUCIBILITY
    torch.manual_seed(SEED)
    np.random.seed(SEED)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

In [4]:
# dataloader
root = '../CheXpert-v1.0-small/'
# Index: -1 denotes multi-label mode including 5 diseases
traindSet = CheXpert(csv_path=root+'train.csv', image_root_path=root, use_upsampling=False, use_frontal=True, image_size=224, mode='train', class_index=-1)
testSet =  CheXpert(csv_path=root+'valid.csv',  image_root_path=root, use_upsampling=False, use_frontal=True, image_size=224, mode='valid', class_index=-1)
trainloader =  torch.utils.data.DataLoader(traindSet, batch_size=32, num_workers=2, shuffle=True)
testloader =  torch.utils.data.DataLoader(testSet, batch_size=32, num_workers=2, shuffle=False)

Multi-label mode: True, Number of classes: [5]
------------------------------
Found 191027 images in total, 23385 positive images, 167642 negative images
Cardiomegaly(C0): imbalance ratio is 0.1224

Found 191027 images in total, 61493 positive images, 129534 negative images
Edema(C1): imbalance ratio is 0.3219

Found 191027 images in total, 12983 positive images, 178044 negative images
Consolidation(C2): imbalance ratio is 0.0680

Found 191027 images in total, 59583 positive images, 131444 negative images
Atelectasis(C3): imbalance ratio is 0.3119

Found 191027 images in total, 76899 positive images, 114128 negative images
Pleural Effusion(C4): imbalance ratio is 0.4026

Multi-label mode: True, Number of classes: [5]
------------------------------
Found 202 images in total, 66 positive images, 136 negative images
Cardiomegaly(C0): imbalance ratio is 0.3267

Found 202 images in total, 42 positive images, 160 negative images
Edema(C1): imbalance ratio is 0.2079

Found 202 images in total

In [5]:
# paramaters
SEED = 123
BATCH_SIZE = 32
lr = 1e-4
weight_decay = 1e-5

# model
set_all_seeds(SEED)
model = DenseNet121(pretrained=True, last_activation=None, activations='relu', num_classes=5)
model = model.cuda()

# define loss & optimizer
CELoss = CrossEntropyLoss()
optimizer = Adam(model.parameters(), lr=lr, weight_decay=weight_decay)

In [6]:
# training
best_val_auc = 0 
for epoch in range(1):
    for idx, data in enumerate(trainloader):
        train_data, train_labels = data
        train_data, train_labels  = train_data.cuda(), train_labels.cuda()
        y_pred = model(train_data)
        loss = CELoss(y_pred, train_labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # validation  
        if idx % 400 == 0:
            model.eval()
            with torch.no_grad():    
                test_pred = []
                test_true = [] 
                for jdx, data in enumerate(testloader):
                    test_data, test_labels = data
                    test_data = test_data.cuda()
                    y_pred = model(test_data)
                    test_pred.append(y_pred.cpu().detach().numpy())
                    test_true.append(test_labels.numpy())

                test_true = np.concatenate(test_true)
                test_pred = np.concatenate(test_pred)
                val_auc_mean =  roc_auc_score(test_true, test_pred) 
                model.train()

                if best_val_auc < val_auc_mean:
                    best_val_auc = val_auc_mean
                    torch.save(model.state_dict(), 'ce_pretrained_model.pth')

                print('Epoch=%s, BatchID=%s, Val_AUC=%.4f, Best_Val_AUC=%.4f'%(epoch, idx, val_auc_mean, best_val_auc ))

Epoch=0, BatchID=0, Val_AUC=0.5061, Best_Val_AUC=0.5061
Epoch=0, BatchID=400, Val_AUC=0.8558, Best_Val_AUC=0.8558
Epoch=0, BatchID=800, Val_AUC=0.8749, Best_Val_AUC=0.8749
Epoch=0, BatchID=1200, Val_AUC=0.8654, Best_Val_AUC=0.8749
Epoch=0, BatchID=1600, Val_AUC=0.8767, Best_Val_AUC=0.8767
Epoch=0, BatchID=2000, Val_AUC=0.8755, Best_Val_AUC=0.8767
Epoch=0, BatchID=2400, Val_AUC=0.8808, Best_Val_AUC=0.8808
Epoch=0, BatchID=2800, Val_AUC=0.8805, Best_Val_AUC=0.8808
Epoch=0, BatchID=3200, Val_AUC=0.8769, Best_Val_AUC=0.8808
Epoch=0, BatchID=3600, Val_AUC=0.8828, Best_Val_AUC=0.8828
Epoch=0, BatchID=4000, Val_AUC=0.8854, Best_Val_AUC=0.8854
Epoch=0, BatchID=4400, Val_AUC=0.8623, Best_Val_AUC=0.8854
Epoch=0, BatchID=4800, Val_AUC=0.8761, Best_Val_AUC=0.8854
Epoch=0, BatchID=5200, Val_AUC=0.8709, Best_Val_AUC=0.8854
Epoch=0, BatchID=5600, Val_AUC=0.8707, Best_Val_AUC=0.8854


In [16]:
# dataloader
root = '../CheXpert-v1.0-small/'
# Index: -1 denotes multi-label mode including 5 diseases
traindSet = CheXpert(csv_path=root+'train.csv', image_root_path=root, use_upsampling=False, use_frontal=True, image_size=224, mode='train', class_index=-1, verbose=False)
testSet =  CheXpert(csv_path=root+'valid.csv',  image_root_path=root, use_upsampling=False, use_frontal=True, image_size=224, mode='valid', class_index=-1, verbose=False)
trainloader =  torch.utils.data.DataLoader(traindSet, batch_size=32, num_workers=2, shuffle=True)
testloader =  torch.utils.data.DataLoader(testSet, batch_size=32, num_workers=2, shuffle=False)

In [19]:
# paramaters
SEED = 123
BATCH_SIZE = 32
 
lr = 0.1 
gamma = 500
imratio = traindSet.imratio_list 
weight_decay = 1e-5
margin = 1.0

# model
set_all_seeds(SEED)
model = DenseNet121(pretrained=True, last_activation=None, activations='relu', num_classes=5)
model = model.cuda()

# define loss & optimizer
Loss = AUCM_MultiLabel(imratio=imratio, num_classes=5)
optimizer = PESG(model, 
                 a=Loss.a, 
                 b=Loss.b, 
                 alpha=Loss.alpha, 
                 lr=lr, 
                 gamma=gamma, 
                 margin=margin, 
                 weight_decay=weight_decay, device='cuda')

In [20]:
best_val_auc = 0
for epoch in range(5):
    if epoch > 0:
        optimizer.update_regularizer(decay_factor=10)
    for idx, data in enumerate(trainloader):
        train_data, train_labels = data
        train_data, train_labels = train_data.cuda(), train_labels.cuda()
        y_pred = model(train_data)
        loss = Loss(y_pred, train_labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # validation
        if idx % 400 == 0:
            model.eval()
            with torch.no_grad():    
                test_pred = []
                test_true = [] 
                for jdx, data in enumerate(testloader):
                    test_data, test_label = data
                    test_data = test_data.cuda()
                    y_pred = model(test_data)
                    test_pred.append(y_pred.cpu().detach().numpy())
                    test_true.append(test_label.numpy())

                test_true = np.concatenate(test_true)
                test_pred = np.concatenate(test_pred)
                val_auc =  roc_auc_score(test_true, test_pred) 
                model.train()

                if best_val_auc < val_auc:
                    torch.save(model.state_dict(), 'full_model.pth')
                    best_val_auc = val_auc

            print('Epoch=%s, BatchID=%s, Val_AUC=%.4f, lr=%.4f'%(epoch, idx, val_auc,  optimizer.lr))

print ('Best Val_AUC is %.4f'%best_val_auc)

Epoch=0, BatchID=0, Val_AUC=0.5042, lr=0.1000
Epoch=0, BatchID=400, Val_AUC=0.5416, lr=0.1000
Epoch=0, BatchID=800, Val_AUC=0.5527, lr=0.1000
Epoch=0, BatchID=1200, Val_AUC=0.5929, lr=0.1000
Epoch=0, BatchID=1600, Val_AUC=0.7006, lr=0.1000
Epoch=0, BatchID=2000, Val_AUC=0.6790, lr=0.1000
Epoch=0, BatchID=2400, Val_AUC=0.7236, lr=0.1000
Epoch=0, BatchID=2800, Val_AUC=0.5861, lr=0.1000
Epoch=0, BatchID=3200, Val_AUC=0.7258, lr=0.1000
Epoch=0, BatchID=3600, Val_AUC=0.7654, lr=0.1000
Epoch=0, BatchID=4000, Val_AUC=0.7181, lr=0.1000
Epoch=0, BatchID=5200, Val_AUC=0.7849, lr=0.1000
Epoch=0, BatchID=5600, Val_AUC=0.7505, lr=0.1000
Reducing learning rate to 0.01000 @ T=5970!
Updating regularizer @ T=5970!
Epoch=1, BatchID=0, Val_AUC=0.7997, lr=0.0100
Epoch=1, BatchID=400, Val_AUC=0.8164, lr=0.0100
Epoch=1, BatchID=800, Val_AUC=0.8202, lr=0.0100
Epoch=1, BatchID=1200, Val_AUC=0.8165, lr=0.0100
Epoch=1, BatchID=1600, Val_AUC=0.8045, lr=0.0100
Epoch=1, BatchID=2000, Val_AUC=0.8199, lr=0.0100
Epoc

In [None]:
# Test the model on extra_valid_images:
# dataloader
root = '../data/extraValid'
# Index: -1 denotes multi-label mode including 5 diseases
traindSet = CheXpert(csv_path=root+'train.csv', image_root_path=root, use_upsampling=False, use_frontal=True, image_size=224, mode='train', class_index=-1, verbose=False)
testSet =  CheXpert(csv_path=root+'valid.csv',  image_root_path=root, use_upsampling=False, use_frontal=True, image_size=224, mode='valid', class_index=-1, verbose=False)
trainloader =  torch.utils.data.DataLoader(traindSet, batch_size=32, num_workers=2, shuffle=True)
testloader =  torch.utils.data.DataLoader(testSet, batch_size=32, num_workers=2, shuffle=False)