# Import

In [1]:
from LibAUC.libauc.losses import MultiLabelAUCMLoss,CrossEntropyLoss
from LibAUC.libauc.optimizers import PESG,Adam
from LibAUC.libauc.models import densenet121 as DenseNet121
from LibAUC.libauc.datasets import CheXpert
from LibAUC.libauc.metrics import auc_roc_score # for multi-task

from PIL import Image
import numpy as np
import pandas as pd
import torch 
import torchvision.transforms as transforms
from torch.utils.data import Dataset
import torch.nn.functional as F
import os
import cv2
import shutil
import warnings
from datetime import datetime
from datetime import timedelta

In [2]:
def set_all_seeds(SEED):
    # REPRODUCIBILITY
    torch.manual_seed(SEED)
    np.random.seed(SEED)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
warnings.simplefilter(action='ignore',category=FutureWarning) # Delete Future Warning

In [3]:
# paramaters
SEED = 123
BATCH_SIZE = 32
lr = 0.1
epoch_decay = 2e-3
weight_decay = 1e-5
margin = 1.0
total_epochs = 6
os.makedirs(os.path.join(os.getcwd(),'pth_files'),exist_ok=True)

In [4]:
print(torch.__version__)

2.1.1+cu118


# Origin

In [5]:
root = os.path.join(os.getcwd(),'CheXpert_origin\\')
# Index=-1 denotes multi-label with 5 diseases
trainSet = CheXpert(csv_path=root+'train.csv',image_root_path=root,use_upsampling=False,use_frontal=True,image_size=224,mode='train',class_index=-1,verbose=True)
testSet =  CheXpert(csv_path=root+'valid.csv',image_root_path=root,use_upsampling=False,use_frontal=True,image_size=224,mode='valid',class_index=-1,verbose=True)
trainloader =  torch.utils.data.DataLoader(trainSet,batch_size=BATCH_SIZE,num_workers=2,shuffle=True)
testloader =  torch.utils.data.DataLoader(testSet,batch_size=BATCH_SIZE,num_workers=2,shuffle=False)

# model
set_all_seeds(SEED)
model = DenseNet121(pretrained=True,last_activation=None,activations='relu',num_classes=5)
model = model.cuda()

# define loss & optimizer
loss_fn = MultiLabelAUCMLoss(num_labels=5)
optimizer = PESG(model.parameters(),
                 loss_fn=loss_fn,
                 lr=lr,
                 margin=margin,
                 epoch_decay=epoch_decay,
                 weight_decay=weight_decay)

Multi-label mode: True, Number of classes: [5]
------------------------------
Found 133781 images in total, 16297 positive images, 117484 negative images
Cardiomegaly(C0): imbalance ratio is 0.1218

Found 133781 images in total, 42794 positive images, 90987 negative images
Edema(C1): imbalance ratio is 0.3199

Found 133781 images in total, 9055 positive images, 124726 negative images
Consolidation(C2): imbalance ratio is 0.0677

Found 133781 images in total, 41919 positive images, 91862 negative images
Atelectasis(C3): imbalance ratio is 0.3133

Found 133781 images in total, 53675 positive images, 80106 negative images
Pleural Effusion(C4): imbalance ratio is 0.4012

Multi-label mode: True, Number of classes: [5]
------------------------------
Found 19090 images in total, 2242 positive images, 16848 negative images
Cardiomegaly(C0): imbalance ratio is 0.1174

Found 19090 images in total, 6161 positive images, 12929 negative images
Edema(C1): imbalance ratio is 0.3227

Found 19090 image

In [6]:
# training
print ('Start Training')
print ('-'*30)
begin_time = datetime.now()

best_val_auc = 0 
for epoch in range(total_epochs):
    if epoch > 0:
        optimizer.update_regularizer(decay_factor=10)    

    for idx,data in enumerate(trainloader):
        train_data,train_labels = data
        train_data,train_labels = train_data.cuda(),train_labels.cuda()
        y_pred = model(train_data)
        y_pred = torch.sigmoid(y_pred)
        loss = loss_fn(y_pred,train_labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
            
        # validation  
        if idx % 400 == 0 or idx == len(trainloader)-1:
            model.eval()
            with torch.no_grad():
                test_pred = []
                test_true = [] 
                for jdx,data in enumerate(testloader):
                    test_data,test_labels = data
                    test_data = test_data.cuda()
                    y_pred = model(test_data)
                    y_pred = torch.sigmoid(y_pred)
                    test_pred.append(y_pred.cpu().detach().numpy())
                    test_true.append(test_labels.numpy())
            
                test_true = np.concatenate(test_true)
                test_pred = np.concatenate(test_pred)
                val_auc_mean = np.mean(auc_roc_score(test_true,test_pred)) 
                model.train()
                
                if best_val_auc < val_auc_mean:
                    best_val_auc = val_auc_mean
                    # torch.save(model.state_dict(),os.path.join(os.getcwd(),'pth_files','origin_model.pth'))
                    torch.save(model.state_dict(),os.path.join(os.getcwd(),'pth_files','origin_model_densenet121.pth'))
                after_time = datetime.now()
                time_gap = after_time-begin_time
                time_gap = time_gap.total_seconds()
                hours,remainders = divmod(time_gap,3600)
                minutes,seconds = divmod(remainders,60)
                hours = int(hours)
                minutes = int(minutes)
                seconds = int(seconds)
                print(f'({hours:02d}:{minutes:02d}:{seconds:02d})Epoch={epoch},BatchID={idx},Val_AUC={val_auc_mean:.4f},Best_Val_AUC={best_val_auc:.4f}')

Start Training
------------------------------


KeyboardInterrupt: 

# Male

In [None]:
root = os.path.join(os.getcwd(),'CheXpert_male\\')
# Index=-1 denotes multi-label with 5 diseases
trainSet = CheXpert(csv_path=root+'train.csv',image_root_path=root,use_upsampling=False,use_frontal=True,image_size=224,mode='train',class_index=-1,verbose=True)
testSet =  CheXpert(csv_path=root+'valid.csv',image_root_path=root,use_upsampling=False,use_frontal=True,image_size=224,mode='valid',class_index=-1,verbose=True)
trainloader =  torch.utils.data.DataLoader(trainSet,batch_size=BATCH_SIZE,num_workers=2,shuffle=True)
testloader =  torch.utils.data.DataLoader(testSet,batch_size=BATCH_SIZE,num_workers=2,shuffle=False)

# model
set_all_seeds(SEED)
model = DenseNet121(pretrained=True,last_activation=None,activations='relu',num_classes=5)
model = model.cuda()

# define loss & optimizer
loss_fn = MultiLabelAUCMLoss(num_labels=5)
optimizer = PESG(model.parameters(),
                 loss_fn=loss_fn,
                 lr=lr,
                 margin=margin,
                 epoch_decay=epoch_decay,
                 weight_decay=weight_decay)

In [None]:
# training
print ('Start Training')
print ('-'*30)
begin_time = datetime.now()

best_val_auc = 0 
for epoch in range(total_epochs):
    if epoch > 0:
        optimizer.update_regularizer(decay_factor=10)    

    for idx,data in enumerate(trainloader):
        train_data,train_labels = data
        train_data,train_labels  = train_data.cuda(),train_labels.cuda()
        y_pred = model(train_data)
        y_pred = torch.sigmoid(y_pred)
        loss = loss_fn(y_pred,train_labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
            
        # validation  
        if idx % 400 == 0 or idx == len(trainloader)-1:
            model.eval()
            with torch.no_grad():    
                test_pred = []
                test_true = [] 
                for jdx,data in enumerate(testloader):
                    test_data,test_labels = data
                    test_data = test_data.cuda()
                    y_pred = model(test_data)
                    y_pred = torch.sigmoid(y_pred)
                    test_pred.append(y_pred.cpu().detach().numpy())
                    test_true.append(test_labels.numpy())
            
                test_true = np.concatenate(test_true)
                test_pred = np.concatenate(test_pred)
                val_auc_mean = np.mean(auc_roc_score(test_true,test_pred)) 
                model.train()

                if best_val_auc < val_auc_mean:
                    best_val_auc = val_auc_mean
                    # torch.save(model.state_dict(),os.path.join(os.getcwd(),'pth_files','male_model.pth'))
                    torch.save(model.state_dict(),os.path.join(os.getcwd(),'pth_files','male_model_densenet121.pth'))
                after_time = datetime.now()
                time_gap = after_time-begin_time
                time_gap = time_gap.total_seconds()
                hours,remainders = divmod(time_gap,3600)
                minutes,seconds = divmod(remainders,60)
                hours = int(hours)
                minutes = int(minutes)
                seconds = int(seconds)
                print(f'({hours:02d}:{minutes:02d}:{seconds:02d})Epoch={epoch},BatchID={idx},Val_AUC={val_auc_mean:.4f},Best_Val_AUC={best_val_auc:.4f}')

# Female

In [None]:
root = os.path.join(os.getcwd(),'CheXpert_female\\')
# Index=-1 denotes multi-label with 5 diseases
trainSet = CheXpert(csv_path=root+'train.csv',image_root_path=root,use_upsampling=False,use_frontal=True,image_size=224,mode='train',class_index=-1,verbose=True)
testSet =  CheXpert(csv_path=root+'valid.csv',image_root_path=root,use_upsampling=False,use_frontal=True,image_size=224,mode='valid',class_index=-1,verbose=True)
trainloader =  torch.utils.data.DataLoader(trainSet,batch_size=BATCH_SIZE,num_workers=2,shuffle=True)
testloader =  torch.utils.data.DataLoader(testSet,batch_size=BATCH_SIZE,num_workers=2,shuffle=False)

# model
set_all_seeds(SEED)
model = DenseNet121(pretrained=True,last_activation=None,activations='relu',num_classes=5)
model = model.cuda()

# define loss & optimizer
loss_fn = MultiLabelAUCMLoss(num_labels=5)
optimizer = PESG(model.parameters(),
                 loss_fn=loss_fn,
                 lr=lr,
                 margin=margin,
                 epoch_decay=epoch_decay,
                 weight_decay=weight_decay)

In [None]:
# training
print ('Start Training')
print ('-'*30)
begin_time = datetime.now()

best_val_auc = 0 
for epoch in range(total_epochs):
    if epoch > 0:
        optimizer.update_regularizer(decay_factor=10)    

    for idx,data in enumerate(trainloader):
        train_data,train_labels = data
        train_data,train_labels  = train_data.cuda(),train_labels.cuda()
        y_pred = model(train_data)
        y_pred = torch.sigmoid(y_pred)
        loss = loss_fn(y_pred,train_labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
            
        # validation  
        if idx % 400 == 0 or idx == len(trainloader)-1:
            model.eval()
            with torch.no_grad():    
                test_pred = []
                test_true = [] 
                for jdx,data in enumerate(testloader):
                    test_data,test_labels = data
                    test_data = test_data.cuda()
                    y_pred = model(test_data)
                    y_pred = torch.sigmoid(y_pred)
                    test_pred.append(y_pred.cpu().detach().numpy())
                    test_true.append(test_labels.numpy())
            
                test_true = np.concatenate(test_true)
                test_pred = np.concatenate(test_pred)
                val_auc_mean = np.mean(auc_roc_score(test_true,test_pred)) 
                model.train()

                if best_val_auc < val_auc_mean:
                    best_val_auc = val_auc_mean
                    # torch.save(model.state_dict(),os.path.join(os.getcwd(),'pth_files','female_model.pth'))
                    torch.save(model.state_dict(),os.path.join(os.getcwd(),'pth_files','female_model_densenet121.pth'))
                after_time = datetime.now()
                time_gap = after_time-begin_time
                time_gap = time_gap.total_seconds()
                hours,remainders = divmod(time_gap,3600)
                minutes,seconds = divmod(remainders,60)
                hours = int(hours)
                minutes = int(minutes)
                seconds = int(seconds)
                print(f'({hours:02d}:{minutes:02d}:{seconds:02d})Epoch={epoch},BatchID={idx},Val_AUC={val_auc_mean:.4f},Best_Val_AUC={best_val_auc:.4f}')

# before40

In [None]:
root = os.path.join(os.getcwd(),'CheXpert_before40\\')
# Index=-1 denotes multi-label with 5 diseases
trainSet = CheXpert(csv_path=root+'train.csv',image_root_path=root,use_upsampling=False,use_frontal=True,image_size=224,mode='train',class_index=-1,verbose=True)
testSet =  CheXpert(csv_path=root+'valid.csv',image_root_path=root,use_upsampling=False,use_frontal=True,image_size=224,mode='valid',class_index=-1,verbose=True)
trainloader =  torch.utils.data.DataLoader(trainSet,batch_size=BATCH_SIZE,num_workers=2,shuffle=True)
testloader =  torch.utils.data.DataLoader(testSet,batch_size=BATCH_SIZE,num_workers=2,shuffle=False)

# model
set_all_seeds(SEED)
model = DenseNet121(pretrained=True,last_activation=None,activations='relu',num_classes=5)
model = model.cuda()

# define loss & optimizer
loss_fn = MultiLabelAUCMLoss(num_labels=5)
optimizer = PESG(model.parameters(),
                 loss_fn=loss_fn,
                 lr=lr,
                 margin=margin,
                 epoch_decay=epoch_decay,
                 weight_decay=weight_decay)

In [None]:
# training
print ('Start Training')
print ('-'*30)
begin_time = datetime.now()

best_val_auc = 0 
for epoch in range(total_epochs):
    if epoch > 0:
        optimizer.update_regularizer(decay_factor=10)    

    for idx,data in enumerate(trainloader):
        train_data,train_labels = data
        train_data,train_labels  = train_data.cuda(),train_labels.cuda()
        y_pred = model(train_data)
        y_pred = torch.sigmoid(y_pred)
        loss = loss_fn(y_pred,train_labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
            
        # validation  
        if idx % 400 == 0 or idx == len(trainloader)-1:
            model.eval()
            with torch.no_grad():    
                test_pred = []
                test_true = [] 
                for jdx,data in enumerate(testloader):
                    test_data,test_labels = data
                    test_data = test_data.cuda()
                    y_pred = model(test_data)
                    y_pred = torch.sigmoid(y_pred)
                    test_pred.append(y_pred.cpu().detach().numpy())
                    test_true.append(test_labels.numpy())
            
                test_true = np.concatenate(test_true)
                test_pred = np.concatenate(test_pred)
                val_auc_mean = np.mean(auc_roc_score(test_true,test_pred)) 
                model.train()

                if best_val_auc < val_auc_mean:
                    best_val_auc = val_auc_mean
                    # torch.save(model.state_dict(),os.path.join(os.getcwd(),'pth_files','before40_model.pth'))
                    torch.save(model.state_dict(),os.path.join(os.getcwd(),'pth_files','before40_model_densenet121.pth'))
                after_time = datetime.now()
                time_gap = after_time-begin_time
                time_gap = time_gap.total_seconds()
                hours,remainders = divmod(time_gap,3600)
                minutes,seconds = divmod(remainders,60)
                hours = int(hours)
                minutes = int(minutes)
                seconds = int(seconds)
                print(f'({hours:02d}:{minutes:02d}:{seconds:02d})Epoch={epoch},BatchID={idx},Val_AUC={val_auc_mean:.4f},Best_Val_AUC={best_val_auc:.4f}')

# after40

In [None]:
root = os.path.join(os.getcwd(),'CheXpert_after40\\')
# Index=-1 denotes multi-label with 5 diseases
trainSet = CheXpert(csv_path=root+'train.csv',image_root_path=root,use_upsampling=False,use_frontal=True,image_size=224,mode='train',class_index=-1,verbose=True)
testSet =  CheXpert(csv_path=root+'valid.csv',image_root_path=root,use_upsampling=False,use_frontal=True,image_size=224,mode='valid',class_index=-1,verbose=True)
trainloader =  torch.utils.data.DataLoader(trainSet,batch_size=BATCH_SIZE,num_workers=2,shuffle=True)
testloader =  torch.utils.data.DataLoader(testSet,batch_size=BATCH_SIZE,num_workers=2,shuffle=False)

# model
set_all_seeds(SEED)
model = DenseNet121(pretrained=True,last_activation=None,activations='relu',num_classes=5)
model = model.cuda()

# define loss & optimizer
loss_fn = MultiLabelAUCMLoss(num_labels=5)
optimizer = PESG(model.parameters(),
                 loss_fn=loss_fn,
                 lr=lr,
                 margin=margin,
                 epoch_decay=epoch_decay,
                 weight_decay=weight_decay)


In [None]:
# training
print ('Start Training')
print ('-'*30)
begin_time = datetime.now()

best_val_auc = 0 
for epoch in range(total_epochs):
    if epoch > 0:
        optimizer.update_regularizer(decay_factor=10)    

    for idx,data in enumerate(trainloader):
        train_data,train_labels = data
        train_data,train_labels  = train_data.cuda(),train_labels.cuda()
        y_pred = model(train_data)
        y_pred = torch.sigmoid(y_pred)
        loss = loss_fn(y_pred,train_labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
            
        # validation  
        if idx % 400 == 0 or idx == len(trainloader)-1:
            model.eval()
            with torch.no_grad():    
                test_pred = []
                test_true = [] 
                for jdx,data in enumerate(testloader):
                    test_data,test_labels = data
                    test_data = test_data.cuda()
                    y_pred = model(test_data)
                    y_pred = torch.sigmoid(y_pred)
                    test_pred.append(y_pred.cpu().detach().numpy())
                    test_true.append(test_labels.numpy())
            
                test_true = np.concatenate(test_true)
                test_pred = np.concatenate(test_pred)
                val_auc_mean = np.mean(auc_roc_score(test_true,test_pred)) 
                model.train()

                if best_val_auc < val_auc_mean:
                    best_val_auc = val_auc_mean
                    # torch.save(model.state_dict(),os.path.join(os.getcwd(),'pth_files','after40_model.pth'))
                    torch.save(model.state_dict(),os.path.join(os.getcwd(),'pth_files','after40_model_densenet121.pth'))
                after_time = datetime.now()
                time_gap = after_time-begin_time
                time_gap = time_gap.total_seconds()
                hours,remainders = divmod(time_gap,3600)
                minutes,seconds = divmod(remainders,60)
                hours = int(hours)
                minutes = int(minutes)
                seconds = int(seconds)
                print(f'({hours:02d}:{minutes:02d}:{seconds:02d})Epoch={epoch},BatchID={idx},Val_AUC={val_auc_mean:.4f},Best_Val_AUC={best_val_auc:.4f}')