# Import

In [1]:
from LibAUC.libauc.losses import MultiLabelAUCMLoss,CrossEntropyLoss
from LibAUC.libauc.optimizers import PESG,Adam
from LibAUC.libauc.models import densenet121 as DenseNet121
from LibAUC.libauc.models import resnet34 as Resnet34
from LibAUC.libauc.models import resnet50 as Resnet50
from LibAUC.libauc.datasets import CheXpert
from LibAUC.libauc.metrics import auc_roc_score # for multi-task

from PIL import Image
import numpy as np
import pandas as pd
import torch 
import torchvision.transforms as transforms
from torch.utils.data import Dataset
import torch.nn.functional as F
import os
import cv2
import shutil
import warnings
from datetime import datetime
from datetime import timedelta

In [2]:
def set_all_seeds(SEED):
    # REPRODUCIBILITY
    torch.manual_seed(SEED)
    np.random.seed(SEED)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
warnings.simplefilter(action='ignore',category=FutureWarning) # Delete Future Warning

In [3]:
# paramaters
SEED = 123
BATCH_SIZE = 32
lr = 0.1
epoch_decay = 2e-3
weight_decay = 1e-5
margin = 1.0
total_epochs = 6
os.makedirs(os.path.join(os.getcwd(),'pth_files'),exist_ok=True)
print(torch.__version__)

2.1.1+cu118


# Densenet121

## Origin

In [4]:
root = os.path.join(os.getcwd(),'CheXpert_origin\\')
# Index=-1 denotes multi-label with 5 diseases
trainSet = CheXpert(csv_path=root+'train.csv',image_root_path=root,use_upsampling=False,use_frontal=True,image_size=224,mode='train',class_index=-1,verbose=True)
testSet =  CheXpert(csv_path=root+'valid.csv',image_root_path=root,use_upsampling=False,use_frontal=True,image_size=224,mode='valid',class_index=-1,verbose=True)
trainloader =  torch.utils.data.DataLoader(trainSet,batch_size=BATCH_SIZE,num_workers=2,shuffle=True)
testloader =  torch.utils.data.DataLoader(testSet,batch_size=BATCH_SIZE,num_workers=2,shuffle=False)

# model
set_all_seeds(SEED)
model = DenseNet121(pretrained=True,last_activation=None,activations='relu',num_classes=5)
model = model.cuda()

# define loss & optimizer
loss_fn = MultiLabelAUCMLoss(num_labels=5)
optimizer = PESG(model.parameters(),
                 loss_fn=loss_fn,
                 lr=lr,
                 margin=margin,
                 epoch_decay=epoch_decay,
                 weight_decay=weight_decay)

print ('Start Training')
print ('-'*30)
begin_time = datetime.now()

best_val_auc = 0 
for epoch in range(total_epochs):
    if epoch > 0:
        optimizer.update_regularizer(decay_factor=10)    

    for idx,data in enumerate(trainloader):
        train_data,train_labels = data
        train_data,train_labels = train_data.cuda(),train_labels.cuda()
        y_pred = model(train_data)
        y_pred = torch.sigmoid(y_pred)
        loss = loss_fn(y_pred,train_labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
            
        # validation  
        if idx % 400 == 0 or idx == len(trainloader)-1:
            model.eval()
            with torch.no_grad():
                test_pred = []
                test_true = [] 
                for jdx,data in enumerate(testloader):
                    test_data,test_labels = data
                    test_data = test_data.cuda()
                    y_pred = model(test_data)
                    y_pred = torch.sigmoid(y_pred)
                    test_pred.append(y_pred.cpu().detach().numpy())
                    test_true.append(test_labels.numpy())
            
                test_true = np.concatenate(test_true)
                test_pred = np.concatenate(test_pred)
                val_auc_mean = np.mean(auc_roc_score(test_true,test_pred)) 
                model.train()
                
                if best_val_auc < val_auc_mean:
                    best_val_auc = val_auc_mean
                    # torch.save(model.state_dict(),os.path.join(os.getcwd(),'pth_files','origin_model.pth'))
                    torch.save(model.state_dict(),os.path.join(os.getcwd(),'pth_files','origin_model_densenet121.pth'))
                after_time = datetime.now()
                time_gap = after_time-begin_time
                time_gap = time_gap.total_seconds()
                hours,remainders = divmod(time_gap,3600)
                minutes,seconds = divmod(remainders,60)
                hours = int(hours)
                minutes = int(minutes)
                seconds = int(seconds)
                print(f'({hours:02d}:{minutes:02d}:{seconds:02d})Epoch={epoch},BatchID={idx},Val_AUC={val_auc_mean:.4f},Best_Val_AUC={best_val_auc:.4f}')

Multi-label mode: True, Number of classes: [5]
------------------------------
Found 133781 images in total, 16297 positive images, 117484 negative images
Cardiomegaly(C0): imbalance ratio is 0.1218

Found 133781 images in total, 42794 positive images, 90987 negative images
Edema(C1): imbalance ratio is 0.3199

Found 133781 images in total, 9055 positive images, 124726 negative images
Consolidation(C2): imbalance ratio is 0.0677

Found 133781 images in total, 41919 positive images, 91862 negative images
Atelectasis(C3): imbalance ratio is 0.3133

Found 133781 images in total, 53675 positive images, 80106 negative images
Pleural Effusion(C4): imbalance ratio is 0.4012

Multi-label mode: True, Number of classes: [5]
------------------------------
Found 19090 images in total, 2242 positive images, 16848 negative images
Cardiomegaly(C0): imbalance ratio is 0.1174

Found 19090 images in total, 6161 positive images, 12929 negative images
Edema(C1): imbalance ratio is 0.3227

Found 19090 image

## Male

In [5]:
root = os.path.join(os.getcwd(),'CheXpert_male\\')
# Index=-1 denotes multi-label with 5 diseases
trainSet = CheXpert(csv_path=root+'train.csv',image_root_path=root,use_upsampling=False,use_frontal=True,image_size=224,mode='train',class_index=-1,verbose=True)
testSet =  CheXpert(csv_path=root+'valid.csv',image_root_path=root,use_upsampling=False,use_frontal=True,image_size=224,mode='valid',class_index=-1,verbose=True)
trainloader =  torch.utils.data.DataLoader(trainSet,batch_size=BATCH_SIZE,num_workers=2,shuffle=True)
testloader =  torch.utils.data.DataLoader(testSet,batch_size=BATCH_SIZE,num_workers=2,shuffle=False)

# model
set_all_seeds(SEED)
model = DenseNet121(pretrained=True,last_activation=None,activations='relu',num_classes=5)
model = model.cuda()

# define loss & optimizer
loss_fn = MultiLabelAUCMLoss(num_labels=5)
optimizer = PESG(model.parameters(),
                 loss_fn=loss_fn,
                 lr=lr,
                 margin=margin,
                 epoch_decay=epoch_decay,
                 weight_decay=weight_decay)

print ('Start Training')
print ('-'*30)
begin_time = datetime.now()

best_val_auc = 0 
for epoch in range(total_epochs):
    if epoch > 0:
        optimizer.update_regularizer(decay_factor=10)    

    for idx,data in enumerate(trainloader):
        train_data,train_labels = data
        train_data,train_labels  = train_data.cuda(),train_labels.cuda()
        y_pred = model(train_data)
        y_pred = torch.sigmoid(y_pred)
        loss = loss_fn(y_pred,train_labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
            
        # validation  
        if idx % 400 == 0 or idx == len(trainloader)-1:
            model.eval()
            with torch.no_grad():    
                test_pred = []
                test_true = [] 
                for jdx,data in enumerate(testloader):
                    test_data,test_labels = data
                    test_data = test_data.cuda()
                    y_pred = model(test_data)
                    y_pred = torch.sigmoid(y_pred)
                    test_pred.append(y_pred.cpu().detach().numpy())
                    test_true.append(test_labels.numpy())
            
                test_true = np.concatenate(test_true)
                test_pred = np.concatenate(test_pred)
                val_auc_mean = np.mean(auc_roc_score(test_true,test_pred)) 
                model.train()

                if best_val_auc < val_auc_mean:
                    best_val_auc = val_auc_mean
                    # torch.save(model.state_dict(),os.path.join(os.getcwd(),'pth_files','male_model.pth'))
                    torch.save(model.state_dict(),os.path.join(os.getcwd(),'pth_files','male_model_densenet121.pth'))
                after_time = datetime.now()
                time_gap = after_time-begin_time
                time_gap = time_gap.total_seconds()
                hours,remainders = divmod(time_gap,3600)
                minutes,seconds = divmod(remainders,60)
                hours = int(hours)
                minutes = int(minutes)
                seconds = int(seconds)
                print(f'({hours:02d}:{minutes:02d}:{seconds:02d})Epoch={epoch},BatchID={idx},Val_AUC={val_auc_mean:.4f},Best_Val_AUC={best_val_auc:.4f}')

Multi-label mode: True, Number of classes: [5]
------------------------------
Found 78644 images in total, 9943 positive images, 68701 negative images
Cardiomegaly(C0): imbalance ratio is 0.1264

Found 78644 images in total, 24591 positive images, 54053 negative images
Edema(C1): imbalance ratio is 0.3127

Found 78644 images in total, 5314 positive images, 73330 negative images
Consolidation(C2): imbalance ratio is 0.0676

Found 78644 images in total, 24764 positive images, 53880 negative images
Atelectasis(C3): imbalance ratio is 0.3149

Found 78644 images in total, 31305 positive images, 47339 negative images
Pleural Effusion(C4): imbalance ratio is 0.3981

Multi-label mode: True, Number of classes: [5]
------------------------------
Found 11159 images in total, 1392 positive images, 9767 negative images
Cardiomegaly(C0): imbalance ratio is 0.1247

Found 11159 images in total, 3541 positive images, 7618 negative images
Edema(C1): imbalance ratio is 0.3173

Found 11159 images in total

## Female

In [6]:
root = os.path.join(os.getcwd(),'CheXpert_female\\')
# Index=-1 denotes multi-label with 5 diseases
trainSet = CheXpert(csv_path=root+'train.csv',image_root_path=root,use_upsampling=False,use_frontal=True,image_size=224,mode='train',class_index=-1,verbose=True)
testSet =  CheXpert(csv_path=root+'valid.csv',image_root_path=root,use_upsampling=False,use_frontal=True,image_size=224,mode='valid',class_index=-1,verbose=True)
trainloader =  torch.utils.data.DataLoader(trainSet,batch_size=BATCH_SIZE,num_workers=2,shuffle=True)
testloader =  torch.utils.data.DataLoader(testSet,batch_size=BATCH_SIZE,num_workers=2,shuffle=False)

# model
set_all_seeds(SEED)
model = DenseNet121(pretrained=True,last_activation=None,activations='relu',num_classes=5)
model = model.cuda()

# define loss & optimizer
loss_fn = MultiLabelAUCMLoss(num_labels=5)
optimizer = PESG(model.parameters(),
                 loss_fn=loss_fn,
                 lr=lr,
                 margin=margin,
                 epoch_decay=epoch_decay,
                 weight_decay=weight_decay)

print ('Start Training')
print ('-'*30)
begin_time = datetime.now()

best_val_auc = 0 
for epoch in range(total_epochs):
    if epoch > 0:
        optimizer.update_regularizer(decay_factor=10)    

    for idx,data in enumerate(trainloader):
        train_data,train_labels = data
        train_data,train_labels  = train_data.cuda(),train_labels.cuda()
        y_pred = model(train_data)
        y_pred = torch.sigmoid(y_pred)
        loss = loss_fn(y_pred,train_labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
            
        # validation  
        if idx % 400 == 0 or idx == len(trainloader)-1:
            model.eval()
            with torch.no_grad():    
                test_pred = []
                test_true = [] 
                for jdx,data in enumerate(testloader):
                    test_data,test_labels = data
                    test_data = test_data.cuda()
                    y_pred = model(test_data)
                    y_pred = torch.sigmoid(y_pred)
                    test_pred.append(y_pred.cpu().detach().numpy())
                    test_true.append(test_labels.numpy())
            
                test_true = np.concatenate(test_true)
                test_pred = np.concatenate(test_pred)
                val_auc_mean = np.mean(auc_roc_score(test_true,test_pred)) 
                model.train()

                if best_val_auc < val_auc_mean:
                    best_val_auc = val_auc_mean
                    # torch.save(model.state_dict(),os.path.join(os.getcwd(),'pth_files','female_model.pth'))
                    torch.save(model.state_dict(),os.path.join(os.getcwd(),'pth_files','female_model_densenet121.pth'))
                after_time = datetime.now()
                time_gap = after_time-begin_time
                time_gap = time_gap.total_seconds()
                hours,remainders = divmod(time_gap,3600)
                minutes,seconds = divmod(remainders,60)
                hours = int(hours)
                minutes = int(minutes)
                seconds = int(seconds)
                print(f'({hours:02d}:{minutes:02d}:{seconds:02d})Epoch={epoch},BatchID={idx},Val_AUC={val_auc_mean:.4f},Best_Val_AUC={best_val_auc:.4f}')

Multi-label mode: True, Number of classes: [5]
------------------------------
Found 55137 images in total, 6354 positive images, 48783 negative images
Cardiomegaly(C0): imbalance ratio is 0.1152

Found 55137 images in total, 18203 positive images, 36934 negative images
Edema(C1): imbalance ratio is 0.3301

Found 55137 images in total, 3741 positive images, 51396 negative images
Consolidation(C2): imbalance ratio is 0.0678

Found 55137 images in total, 17155 positive images, 37982 negative images
Atelectasis(C3): imbalance ratio is 0.3111

Found 55137 images in total, 22370 positive images, 32767 negative images
Pleural Effusion(C4): imbalance ratio is 0.4057

Multi-label mode: True, Number of classes: [5]
------------------------------
Found 7931 images in total, 850 positive images, 7081 negative images
Cardiomegaly(C0): imbalance ratio is 0.1072

Found 7931 images in total, 2620 positive images, 5311 negative images
Edema(C1): imbalance ratio is 0.3303

Found 7931 images in total, 58

## before40

In [7]:
root = os.path.join(os.getcwd(),'CheXpert_before40\\')
# Index=-1 denotes multi-label with 5 diseases
trainSet = CheXpert(csv_path=root+'train.csv',image_root_path=root,use_upsampling=False,use_frontal=True,image_size=224,mode='train',class_index=-1,verbose=True)
testSet =  CheXpert(csv_path=root+'valid.csv',image_root_path=root,use_upsampling=False,use_frontal=True,image_size=224,mode='valid',class_index=-1,verbose=True)
trainloader =  torch.utils.data.DataLoader(trainSet,batch_size=BATCH_SIZE,num_workers=2,shuffle=True)
testloader =  torch.utils.data.DataLoader(testSet,batch_size=BATCH_SIZE,num_workers=2,shuffle=False)

# model
set_all_seeds(SEED)
model = DenseNet121(pretrained=True,last_activation=None,activations='relu',num_classes=5)
model = model.cuda()

# define loss & optimizer
loss_fn = MultiLabelAUCMLoss(num_labels=5)
optimizer = PESG(model.parameters(),
                 loss_fn=loss_fn,
                 lr=lr,
                 margin=margin,
                 epoch_decay=epoch_decay,
                 weight_decay=weight_decay)

print ('Start Training')
print ('-'*30)
begin_time = datetime.now()

best_val_auc = 0 
for epoch in range(total_epochs):
    if epoch > 0:
        optimizer.update_regularizer(decay_factor=10)    

    for idx,data in enumerate(trainloader):
        train_data,train_labels = data
        train_data,train_labels  = train_data.cuda(),train_labels.cuda()
        y_pred = model(train_data)
        y_pred = torch.sigmoid(y_pred)
        loss = loss_fn(y_pred,train_labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
            
        # validation  
        if idx % 400 == 0 or idx == len(trainloader)-1:
            model.eval()
            with torch.no_grad():    
                test_pred = []
                test_true = [] 
                for jdx,data in enumerate(testloader):
                    test_data,test_labels = data
                    test_data = test_data.cuda()
                    y_pred = model(test_data)
                    y_pred = torch.sigmoid(y_pred)
                    test_pred.append(y_pred.cpu().detach().numpy())
                    test_true.append(test_labels.numpy())
            
                test_true = np.concatenate(test_true)
                test_pred = np.concatenate(test_pred)
                val_auc_mean = np.mean(auc_roc_score(test_true,test_pred)) 
                model.train()

                if best_val_auc < val_auc_mean:
                    best_val_auc = val_auc_mean
                    # torch.save(model.state_dict(),os.path.join(os.getcwd(),'pth_files','before40_model.pth'))
                    torch.save(model.state_dict(),os.path.join(os.getcwd(),'pth_files','before40_model_densenet121.pth'))
                after_time = datetime.now()
                time_gap = after_time-begin_time
                time_gap = time_gap.total_seconds()
                hours,remainders = divmod(time_gap,3600)
                minutes,seconds = divmod(remainders,60)
                hours = int(hours)
                minutes = int(minutes)
                seconds = int(seconds)
                print(f'({hours:02d}:{minutes:02d}:{seconds:02d})Epoch={epoch},BatchID={idx},Val_AUC={val_auc_mean:.4f},Best_Val_AUC={best_val_auc:.4f}')

Multi-label mode: True, Number of classes: [5]
------------------------------
Found 19562 images in total, 1817 positive images, 17745 negative images
Cardiomegaly(C0): imbalance ratio is 0.0929

Found 19562 images in total, 4292 positive images, 15270 negative images
Edema(C1): imbalance ratio is 0.2194

Found 19562 images in total, 1250 positive images, 18312 negative images
Consolidation(C2): imbalance ratio is 0.0639

Found 19562 images in total, 4759 positive images, 14803 negative images
Atelectasis(C3): imbalance ratio is 0.2433

Found 19562 images in total, 5784 positive images, 13778 negative images
Pleural Effusion(C4): imbalance ratio is 0.2957

Multi-label mode: True, Number of classes: [5]
------------------------------
Found 2729 images in total, 228 positive images, 2501 negative images
Cardiomegaly(C0): imbalance ratio is 0.0835

Found 2729 images in total, 562 positive images, 2167 negative images
Edema(C1): imbalance ratio is 0.2059

Found 2729 images in total, 181 po

## after40

In [8]:
root = os.path.join(os.getcwd(),'CheXpert_after40\\')
# Index=-1 denotes multi-label with 5 diseases
trainSet = CheXpert(csv_path=root+'train.csv',image_root_path=root,use_upsampling=False,use_frontal=True,image_size=224,mode='train',class_index=-1,verbose=True)
testSet =  CheXpert(csv_path=root+'valid.csv',image_root_path=root,use_upsampling=False,use_frontal=True,image_size=224,mode='valid',class_index=-1,verbose=True)
trainloader =  torch.utils.data.DataLoader(trainSet,batch_size=BATCH_SIZE,num_workers=2,shuffle=True)
testloader =  torch.utils.data.DataLoader(testSet,batch_size=BATCH_SIZE,num_workers=2,shuffle=False)

# model
set_all_seeds(SEED)
model = DenseNet121(pretrained=True,last_activation=None,activations='relu',num_classes=5)
model = model.cuda()

# define loss & optimizer
loss_fn = MultiLabelAUCMLoss(num_labels=5)
optimizer = PESG(model.parameters(),
                 loss_fn=loss_fn,
                 lr=lr,
                 margin=margin,
                 epoch_decay=epoch_decay,
                 weight_decay=weight_decay)

print ('Start Training')
print ('-'*30)
begin_time = datetime.now()

best_val_auc = 0 
for epoch in range(total_epochs):
    if epoch > 0:
        optimizer.update_regularizer(decay_factor=10)    

    for idx,data in enumerate(trainloader):
        train_data,train_labels = data
        train_data,train_labels  = train_data.cuda(),train_labels.cuda()
        y_pred = model(train_data)
        y_pred = torch.sigmoid(y_pred)
        loss = loss_fn(y_pred,train_labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
            
        # validation  
        if idx % 400 == 0 or idx == len(trainloader)-1:
            model.eval()
            with torch.no_grad():    
                test_pred = []
                test_true = [] 
                for jdx,data in enumerate(testloader):
                    test_data,test_labels = data
                    test_data = test_data.cuda()
                    y_pred = model(test_data)
                    y_pred = torch.sigmoid(y_pred)
                    test_pred.append(y_pred.cpu().detach().numpy())
                    test_true.append(test_labels.numpy())
            
                test_true = np.concatenate(test_true)
                test_pred = np.concatenate(test_pred)
                val_auc_mean = np.mean(auc_roc_score(test_true,test_pred)) 
                model.train()

                if best_val_auc < val_auc_mean:
                    best_val_auc = val_auc_mean
                    # torch.save(model.state_dict(),os.path.join(os.getcwd(),'pth_files','after40_model.pth'))
                    torch.save(model.state_dict(),os.path.join(os.getcwd(),'pth_files','after40_model_densenet121.pth'))
                after_time = datetime.now()
                time_gap = after_time-begin_time
                time_gap = time_gap.total_seconds()
                hours,remainders = divmod(time_gap,3600)
                minutes,seconds = divmod(remainders,60)
                hours = int(hours)
                minutes = int(minutes)
                seconds = int(seconds)
                print(f'({hours:02d}:{minutes:02d}:{seconds:02d})Epoch={epoch},BatchID={idx},Val_AUC={val_auc_mean:.4f},Best_Val_AUC={best_val_auc:.4f}')

Multi-label mode: True, Number of classes: [5]
------------------------------
Found 114219 images in total, 14480 positive images, 99739 negative images
Cardiomegaly(C0): imbalance ratio is 0.1268

Found 114219 images in total, 38502 positive images, 75717 negative images
Edema(C1): imbalance ratio is 0.3371

Found 114219 images in total, 7805 positive images, 106414 negative images
Consolidation(C2): imbalance ratio is 0.0683

Found 114219 images in total, 37160 positive images, 77059 negative images
Atelectasis(C3): imbalance ratio is 0.3253

Found 114219 images in total, 47891 positive images, 66328 negative images
Pleural Effusion(C4): imbalance ratio is 0.4193

Multi-label mode: True, Number of classes: [5]
------------------------------
Found 16361 images in total, 2014 positive images, 14347 negative images
Cardiomegaly(C0): imbalance ratio is 0.1231

Found 16361 images in total, 5599 positive images, 10762 negative images
Edema(C1): imbalance ratio is 0.3422

Found 16361 images

# Resnet34

## Origin

In [9]:
root = os.path.join(os.getcwd(),'CheXpert_origin\\')
# Index=-1 denotes multi-label with 5 diseases
trainSet = CheXpert(csv_path=root+'train.csv',image_root_path=root,use_upsampling=False,use_frontal=True,image_size=224,mode='train',class_index=-1,verbose=True)
testSet =  CheXpert(csv_path=root+'valid.csv',image_root_path=root,use_upsampling=False,use_frontal=True,image_size=224,mode='valid',class_index=-1,verbose=True)
trainloader =  torch.utils.data.DataLoader(trainSet,batch_size=BATCH_SIZE,num_workers=2,shuffle=True)
testloader =  torch.utils.data.DataLoader(testSet,batch_size=BATCH_SIZE,num_workers=2,shuffle=False)

# model
set_all_seeds(SEED) 
model = Resnet34(pretrained=True,last_activation=None,activations='elu',num_classes=5)
model = model.cuda()

# define loss & optimizer
loss_fn = MultiLabelAUCMLoss(num_labels=5)
optimizer = PESG(model.parameters(),
                 loss_fn=loss_fn,
                 lr=lr,
                 margin=margin,
                 epoch_decay=epoch_decay,
                 weight_decay=weight_decay)

print ('Start Training')
print ('-'*30)
begin_time = datetime.now()

best_val_auc = 0 
for epoch in range(total_epochs):
    if epoch > 0:
        optimizer.update_regularizer(decay_factor=10)    

    for idx,data in enumerate(trainloader):
        train_data,train_labels = data
        train_data,train_labels = train_data.cuda(),train_labels.cuda()
        y_pred = model(train_data)
        y_pred = torch.sigmoid(y_pred)
        loss = loss_fn(y_pred,train_labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
            
        # validation  
        if idx % 400 == 0 or idx == len(trainloader)-1:
            model.eval()
            with torch.no_grad():
                test_pred = []
                test_true = [] 
                for jdx,data in enumerate(testloader):
                    test_data,test_labels = data
                    test_data = test_data.cuda()
                    y_pred = model(test_data)
                    y_pred = torch.sigmoid(y_pred)
                    test_pred.append(y_pred.cpu().detach().numpy())
                    test_true.append(test_labels.numpy())
            
                test_true = np.concatenate(test_true)
                test_pred = np.concatenate(test_pred)
                val_auc_mean = np.mean(auc_roc_score(test_true,test_pred)) 
                model.train()
                
                if best_val_auc < val_auc_mean:
                    best_val_auc = val_auc_mean
                    # torch.save(model.state_dict(),os.path.join(os.getcwd(),'pth_files','origin_model.pth'))
                    torch.save(model.state_dict(),os.path.join(os.getcwd(),'pth_files','origin_model_resnet34.pth'))
                after_time = datetime.now()
                time_gap = after_time-begin_time
                time_gap = time_gap.total_seconds()
                hours,remainders = divmod(time_gap,3600)
                minutes,seconds = divmod(remainders,60)
                hours = int(hours)
                minutes = int(minutes)
                seconds = int(seconds)
                print(f'({hours:02d}:{minutes:02d}:{seconds:02d})Epoch={epoch},BatchID={idx},Val_AUC={val_auc_mean:.4f},Best_Val_AUC={best_val_auc:.4f}')

Multi-label mode: True, Number of classes: [5]
------------------------------
Found 133781 images in total, 16297 positive images, 117484 negative images
Cardiomegaly(C0): imbalance ratio is 0.1218

Found 133781 images in total, 42794 positive images, 90987 negative images
Edema(C1): imbalance ratio is 0.3199

Found 133781 images in total, 9055 positive images, 124726 negative images
Consolidation(C2): imbalance ratio is 0.0677

Found 133781 images in total, 41919 positive images, 91862 negative images
Atelectasis(C3): imbalance ratio is 0.3133

Found 133781 images in total, 53675 positive images, 80106 negative images
Pleural Effusion(C4): imbalance ratio is 0.4012

Multi-label mode: True, Number of classes: [5]
------------------------------
Found 19090 images in total, 2242 positive images, 16848 negative images
Cardiomegaly(C0): imbalance ratio is 0.1174

Found 19090 images in total, 6161 positive images, 12929 negative images
Edema(C1): imbalance ratio is 0.3227

Found 19090 image

Downloading: "https://download.pytorch.org/models/resnet34-333f7ec4.pth" to C:\Users\moc90/.cache\torch\hub\checkpoints\resnet34-333f7ec4.pth
100%|█████████████████████████████████████████████████████████████████████████████| 83.3M/83.3M [00:07<00:00, 11.6MB/s]


Start Training
------------------------------
(00:01:49)Epoch=0,BatchID=0,Val_AUC=0.4747,Best_Val_AUC=0.4747
(00:03:59)Epoch=0,BatchID=400,Val_AUC=0.7122,Best_Val_AUC=0.7122
(00:06:07)Epoch=0,BatchID=800,Val_AUC=0.7286,Best_Val_AUC=0.7286
(00:08:15)Epoch=0,BatchID=1200,Val_AUC=0.7269,Best_Val_AUC=0.7286
(00:10:25)Epoch=0,BatchID=1600,Val_AUC=0.7446,Best_Val_AUC=0.7446
(00:12:33)Epoch=0,BatchID=2000,Val_AUC=0.7470,Best_Val_AUC=0.7470
(00:14:43)Epoch=0,BatchID=2400,Val_AUC=0.7474,Best_Val_AUC=0.7474
(00:16:51)Epoch=0,BatchID=2800,Val_AUC=0.7417,Best_Val_AUC=0.7474
(00:19:01)Epoch=0,BatchID=3200,Val_AUC=0.7440,Best_Val_AUC=0.7474
(00:21:11)Epoch=0,BatchID=3600,Val_AUC=0.7463,Best_Val_AUC=0.7474
(00:23:21)Epoch=0,BatchID=4000,Val_AUC=0.7457,Best_Val_AUC=0.7474
(00:24:58)Epoch=0,BatchID=4180,Val_AUC=0.7427,Best_Val_AUC=0.7474
Reducing learning rate to 0.01000 @ T=4181!
Updating regularizer @ T=4181!
(00:26:39)Epoch=1,BatchID=0,Val_AUC=0.7435,Best_Val_AUC=0.7474
(00:28:47)Epoch=1,BatchID=400

## Male

In [10]:
root = os.path.join(os.getcwd(),'CheXpert_male\\')
# Index=-1 denotes multi-label with 5 diseases
trainSet = CheXpert(csv_path=root+'train.csv',image_root_path=root,use_upsampling=False,use_frontal=True,image_size=224,mode='train',class_index=-1,verbose=True)
testSet =  CheXpert(csv_path=root+'valid.csv',image_root_path=root,use_upsampling=False,use_frontal=True,image_size=224,mode='valid',class_index=-1,verbose=True)
trainloader =  torch.utils.data.DataLoader(trainSet,batch_size=BATCH_SIZE,num_workers=2,shuffle=True)
testloader =  torch.utils.data.DataLoader(testSet,batch_size=BATCH_SIZE,num_workers=2,shuffle=False)

# model
set_all_seeds(SEED)
model = Resnet34(pretrained=True,last_activation=None,activations='elu',num_classes=5)
model = model.cuda()

# define loss & optimizer
loss_fn = MultiLabelAUCMLoss(num_labels=5)
optimizer = PESG(model.parameters(),
                 loss_fn=loss_fn,
                 lr=lr,
                 margin=margin,
                 epoch_decay=epoch_decay,
                 weight_decay=weight_decay)

print ('Start Training')
print ('-'*30)
begin_time = datetime.now()

best_val_auc = 0 
for epoch in range(total_epochs):
    if epoch > 0:
        optimizer.update_regularizer(decay_factor=10)    

    for idx,data in enumerate(trainloader):
        train_data,train_labels = data
        train_data,train_labels  = train_data.cuda(),train_labels.cuda()
        y_pred = model(train_data)
        y_pred = torch.sigmoid(y_pred)
        loss = loss_fn(y_pred,train_labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
            
        # validation  
        if idx % 400 == 0 or idx == len(trainloader)-1:
            model.eval()
            with torch.no_grad():    
                test_pred = []
                test_true = [] 
                for jdx,data in enumerate(testloader):
                    test_data,test_labels = data
                    test_data = test_data.cuda()
                    y_pred = model(test_data)
                    y_pred = torch.sigmoid(y_pred)
                    test_pred.append(y_pred.cpu().detach().numpy())
                    test_true.append(test_labels.numpy())
            
                test_true = np.concatenate(test_true)
                test_pred = np.concatenate(test_pred)
                val_auc_mean = np.mean(auc_roc_score(test_true,test_pred)) 
                model.train()

                if best_val_auc < val_auc_mean:
                    best_val_auc = val_auc_mean
                    # torch.save(model.state_dict(),os.path.join(os.getcwd(),'pth_files','male_model.pth'))
                    torch.save(model.state_dict(),os.path.join(os.getcwd(),'pth_files','male_model_resnet34.pth'))
                after_time = datetime.now()
                time_gap = after_time-begin_time
                time_gap = time_gap.total_seconds()
                hours,remainders = divmod(time_gap,3600)
                minutes,seconds = divmod(remainders,60)
                hours = int(hours)
                minutes = int(minutes)
                seconds = int(seconds)
                print(f'({hours:02d}:{minutes:02d}:{seconds:02d})Epoch={epoch},BatchID={idx},Val_AUC={val_auc_mean:.4f},Best_Val_AUC={best_val_auc:.4f}')

Multi-label mode: True, Number of classes: [5]
------------------------------
Found 78644 images in total, 9943 positive images, 68701 negative images
Cardiomegaly(C0): imbalance ratio is 0.1264

Found 78644 images in total, 24591 positive images, 54053 negative images
Edema(C1): imbalance ratio is 0.3127

Found 78644 images in total, 5314 positive images, 73330 negative images
Consolidation(C2): imbalance ratio is 0.0676

Found 78644 images in total, 24764 positive images, 53880 negative images
Atelectasis(C3): imbalance ratio is 0.3149

Found 78644 images in total, 31305 positive images, 47339 negative images
Pleural Effusion(C4): imbalance ratio is 0.3981

Multi-label mode: True, Number of classes: [5]
------------------------------
Found 11159 images in total, 1392 positive images, 9767 negative images
Cardiomegaly(C0): imbalance ratio is 0.1247

Found 11159 images in total, 3541 positive images, 7618 negative images
Edema(C1): imbalance ratio is 0.3173

Found 11159 images in total

## Female

In [11]:
root = os.path.join(os.getcwd(),'CheXpert_female\\')
# Index=-1 denotes multi-label with 5 diseases
trainSet = CheXpert(csv_path=root+'train.csv',image_root_path=root,use_upsampling=False,use_frontal=True,image_size=224,mode='train',class_index=-1,verbose=True)
testSet =  CheXpert(csv_path=root+'valid.csv',image_root_path=root,use_upsampling=False,use_frontal=True,image_size=224,mode='valid',class_index=-1,verbose=True)
trainloader =  torch.utils.data.DataLoader(trainSet,batch_size=BATCH_SIZE,num_workers=2,shuffle=True)
testloader =  torch.utils.data.DataLoader(testSet,batch_size=BATCH_SIZE,num_workers=2,shuffle=False)

# model
set_all_seeds(SEED)
model = Resnet34(pretrained=True,last_activation=None,activations='elu',num_classes=5)
model = model.cuda()

# define loss & optimizer
loss_fn = MultiLabelAUCMLoss(num_labels=5)
optimizer = PESG(model.parameters(),
                 loss_fn=loss_fn,
                 lr=lr,
                 margin=margin,
                 epoch_decay=epoch_decay,
                 weight_decay=weight_decay)

print ('Start Training')
print ('-'*30)
begin_time = datetime.now()

best_val_auc = 0 
for epoch in range(total_epochs):
    if epoch > 0:
        optimizer.update_regularizer(decay_factor=10)    

    for idx,data in enumerate(trainloader):
        train_data,train_labels = data
        train_data,train_labels  = train_data.cuda(),train_labels.cuda()
        y_pred = model(train_data)
        y_pred = torch.sigmoid(y_pred)
        loss = loss_fn(y_pred,train_labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
            
        # validation  
        if idx % 400 == 0 or idx == len(trainloader)-1:
            model.eval()
            with torch.no_grad():    
                test_pred = []
                test_true = [] 
                for jdx,data in enumerate(testloader):
                    test_data,test_labels = data
                    test_data = test_data.cuda()
                    y_pred = model(test_data)
                    y_pred = torch.sigmoid(y_pred)
                    test_pred.append(y_pred.cpu().detach().numpy())
                    test_true.append(test_labels.numpy())
            
                test_true = np.concatenate(test_true)
                test_pred = np.concatenate(test_pred)
                val_auc_mean = np.mean(auc_roc_score(test_true,test_pred)) 
                model.train()

                if best_val_auc < val_auc_mean:
                    best_val_auc = val_auc_mean
                    # torch.save(model.state_dict(),os.path.join(os.getcwd(),'pth_files','female_model.pth'))
                    torch.save(model.state_dict(),os.path.join(os.getcwd(),'pth_files','female_model_resnet34.pth'))
                after_time = datetime.now()
                time_gap = after_time-begin_time
                time_gap = time_gap.total_seconds()
                hours,remainders = divmod(time_gap,3600)
                minutes,seconds = divmod(remainders,60)
                hours = int(hours)
                minutes = int(minutes)
                seconds = int(seconds)
                print(f'({hours:02d}:{minutes:02d}:{seconds:02d})Epoch={epoch},BatchID={idx},Val_AUC={val_auc_mean:.4f},Best_Val_AUC={best_val_auc:.4f}')

Multi-label mode: True, Number of classes: [5]
------------------------------
Found 55137 images in total, 6354 positive images, 48783 negative images
Cardiomegaly(C0): imbalance ratio is 0.1152

Found 55137 images in total, 18203 positive images, 36934 negative images
Edema(C1): imbalance ratio is 0.3301

Found 55137 images in total, 3741 positive images, 51396 negative images
Consolidation(C2): imbalance ratio is 0.0678

Found 55137 images in total, 17155 positive images, 37982 negative images
Atelectasis(C3): imbalance ratio is 0.3111

Found 55137 images in total, 22370 positive images, 32767 negative images
Pleural Effusion(C4): imbalance ratio is 0.4057

Multi-label mode: True, Number of classes: [5]
------------------------------
Found 7931 images in total, 850 positive images, 7081 negative images
Cardiomegaly(C0): imbalance ratio is 0.1072

Found 7931 images in total, 2620 positive images, 5311 negative images
Edema(C1): imbalance ratio is 0.3303

Found 7931 images in total, 58

## before40

In [12]:
root = os.path.join(os.getcwd(),'CheXpert_before40\\')
# Index=-1 denotes multi-label with 5 diseases
trainSet = CheXpert(csv_path=root+'train.csv',image_root_path=root,use_upsampling=False,use_frontal=True,image_size=224,mode='train',class_index=-1,verbose=True)
testSet =  CheXpert(csv_path=root+'valid.csv',image_root_path=root,use_upsampling=False,use_frontal=True,image_size=224,mode='valid',class_index=-1,verbose=True)
trainloader =  torch.utils.data.DataLoader(trainSet,batch_size=BATCH_SIZE,num_workers=2,shuffle=True)
testloader =  torch.utils.data.DataLoader(testSet,batch_size=BATCH_SIZE,num_workers=2,shuffle=False)

# model
set_all_seeds(SEED)
model = Resnet34(pretrained=True,last_activation=None,activations='elu',num_classes=5)
model = model.cuda()

# define loss & optimizer
loss_fn = MultiLabelAUCMLoss(num_labels=5)
optimizer = PESG(model.parameters(),
                 loss_fn=loss_fn,
                 lr=lr,
                 margin=margin,
                 epoch_decay=epoch_decay,
                 weight_decay=weight_decay)

print ('Start Training')
print ('-'*30)
begin_time = datetime.now()

best_val_auc = 0 
for epoch in range(total_epochs):
    if epoch > 0:
        optimizer.update_regularizer(decay_factor=10)    

    for idx,data in enumerate(trainloader):
        train_data,train_labels = data
        train_data,train_labels  = train_data.cuda(),train_labels.cuda()
        y_pred = model(train_data)
        y_pred = torch.sigmoid(y_pred)
        loss = loss_fn(y_pred,train_labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
            
        # validation  
        if idx % 400 == 0 or idx == len(trainloader)-1:
            model.eval()
            with torch.no_grad():    
                test_pred = []
                test_true = [] 
                for jdx,data in enumerate(testloader):
                    test_data,test_labels = data
                    test_data = test_data.cuda()
                    y_pred = model(test_data)
                    y_pred = torch.sigmoid(y_pred)
                    test_pred.append(y_pred.cpu().detach().numpy())
                    test_true.append(test_labels.numpy())
            
                test_true = np.concatenate(test_true)
                test_pred = np.concatenate(test_pred)
                val_auc_mean = np.mean(auc_roc_score(test_true,test_pred)) 
                model.train()

                if best_val_auc < val_auc_mean:
                    best_val_auc = val_auc_mean
                    # torch.save(model.state_dict(),os.path.join(os.getcwd(),'pth_files','before40_model.pth'))
                    torch.save(model.state_dict(),os.path.join(os.getcwd(),'pth_files','before40_model_resnet34.pth'))
                after_time = datetime.now()
                time_gap = after_time-begin_time
                time_gap = time_gap.total_seconds()
                hours,remainders = divmod(time_gap,3600)
                minutes,seconds = divmod(remainders,60)
                hours = int(hours)
                minutes = int(minutes)
                seconds = int(seconds)
                print(f'({hours:02d}:{minutes:02d}:{seconds:02d})Epoch={epoch},BatchID={idx},Val_AUC={val_auc_mean:.4f},Best_Val_AUC={best_val_auc:.4f}')

Multi-label mode: True, Number of classes: [5]
------------------------------
Found 19562 images in total, 1817 positive images, 17745 negative images
Cardiomegaly(C0): imbalance ratio is 0.0929

Found 19562 images in total, 4292 positive images, 15270 negative images
Edema(C1): imbalance ratio is 0.2194

Found 19562 images in total, 1250 positive images, 18312 negative images
Consolidation(C2): imbalance ratio is 0.0639

Found 19562 images in total, 4759 positive images, 14803 negative images
Atelectasis(C3): imbalance ratio is 0.2433

Found 19562 images in total, 5784 positive images, 13778 negative images
Pleural Effusion(C4): imbalance ratio is 0.2957

Multi-label mode: True, Number of classes: [5]
------------------------------
Found 2729 images in total, 228 positive images, 2501 negative images
Cardiomegaly(C0): imbalance ratio is 0.0835

Found 2729 images in total, 562 positive images, 2167 negative images
Edema(C1): imbalance ratio is 0.2059

Found 2729 images in total, 181 po

## after40

In [13]:
root = os.path.join(os.getcwd(),'CheXpert_after40\\')
# Index=-1 denotes multi-label with 5 diseases
trainSet = CheXpert(csv_path=root+'train.csv',image_root_path=root,use_upsampling=False,use_frontal=True,image_size=224,mode='train',class_index=-1,verbose=True)
testSet =  CheXpert(csv_path=root+'valid.csv',image_root_path=root,use_upsampling=False,use_frontal=True,image_size=224,mode='valid',class_index=-1,verbose=True)
trainloader =  torch.utils.data.DataLoader(trainSet,batch_size=BATCH_SIZE,num_workers=2,shuffle=True)
testloader =  torch.utils.data.DataLoader(testSet,batch_size=BATCH_SIZE,num_workers=2,shuffle=False)

# model
set_all_seeds(SEED)
model = Resnet34(pretrained=True,last_activation=None,activations='elu',num_classes=5)
model = model.cuda()

# define loss & optimizer
loss_fn = MultiLabelAUCMLoss(num_labels=5)
optimizer = PESG(model.parameters(),
                 loss_fn=loss_fn,
                 lr=lr,
                 margin=margin,
                 epoch_decay=epoch_decay,
                 weight_decay=weight_decay)

print ('Start Training')
print ('-'*30)
begin_time = datetime.now()

best_val_auc = 0 
for epoch in range(total_epochs):
    if epoch > 0:
        optimizer.update_regularizer(decay_factor=10)    

    for idx,data in enumerate(trainloader):
        train_data,train_labels = data
        train_data,train_labels  = train_data.cuda(),train_labels.cuda()
        y_pred = model(train_data)
        y_pred = torch.sigmoid(y_pred)
        loss = loss_fn(y_pred,train_labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
            
        # validation  
        if idx % 400 == 0 or idx == len(trainloader)-1:
            model.eval()
            with torch.no_grad():    
                test_pred = []
                test_true = [] 
                for jdx,data in enumerate(testloader):
                    test_data,test_labels = data
                    test_data = test_data.cuda()
                    y_pred = model(test_data)
                    y_pred = torch.sigmoid(y_pred)
                    test_pred.append(y_pred.cpu().detach().numpy())
                    test_true.append(test_labels.numpy())
            
                test_true = np.concatenate(test_true)
                test_pred = np.concatenate(test_pred)
                val_auc_mean = np.mean(auc_roc_score(test_true,test_pred)) 
                model.train()

                if best_val_auc < val_auc_mean:
                    best_val_auc = val_auc_mean
                    # torch.save(model.state_dict(),os.path.join(os.getcwd(),'pth_files','after40_model.pth'))
                    torch.save(model.state_dict(),os.path.join(os.getcwd(),'pth_files','after40_model_resnet34.pth'))
                after_time = datetime.now()
                time_gap = after_time-begin_time
                time_gap = time_gap.total_seconds()
                hours,remainders = divmod(time_gap,3600)
                minutes,seconds = divmod(remainders,60)
                hours = int(hours)
                minutes = int(minutes)
                seconds = int(seconds)
                print(f'({hours:02d}:{minutes:02d}:{seconds:02d})Epoch={epoch},BatchID={idx},Val_AUC={val_auc_mean:.4f},Best_Val_AUC={best_val_auc:.4f}')

Multi-label mode: True, Number of classes: [5]
------------------------------
Found 114219 images in total, 14480 positive images, 99739 negative images
Cardiomegaly(C0): imbalance ratio is 0.1268

Found 114219 images in total, 38502 positive images, 75717 negative images
Edema(C1): imbalance ratio is 0.3371

Found 114219 images in total, 7805 positive images, 106414 negative images
Consolidation(C2): imbalance ratio is 0.0683

Found 114219 images in total, 37160 positive images, 77059 negative images
Atelectasis(C3): imbalance ratio is 0.3253

Found 114219 images in total, 47891 positive images, 66328 negative images
Pleural Effusion(C4): imbalance ratio is 0.4193

Multi-label mode: True, Number of classes: [5]
------------------------------
Found 16361 images in total, 2014 positive images, 14347 negative images
Cardiomegaly(C0): imbalance ratio is 0.1231

Found 16361 images in total, 5599 positive images, 10762 negative images
Edema(C1): imbalance ratio is 0.3422

Found 16361 images

# Resnet50

## Origin

In [14]:
root = os.path.join(os.getcwd(),'CheXpert_origin\\')
# Index=-1 denotes multi-label with 5 diseases
trainSet = CheXpert(csv_path=root+'train.csv',image_root_path=root,use_upsampling=False,use_frontal=True,image_size=224,mode='train',class_index=-1,verbose=True)
testSet =  CheXpert(csv_path=root+'valid.csv',image_root_path=root,use_upsampling=False,use_frontal=True,image_size=224,mode='valid',class_index=-1,verbose=True)
trainloader =  torch.utils.data.DataLoader(trainSet,batch_size=BATCH_SIZE,num_workers=2,shuffle=True)
testloader =  torch.utils.data.DataLoader(testSet,batch_size=BATCH_SIZE,num_workers=2,shuffle=False)

# model
set_all_seeds(SEED) 
model = Resnet50(pretrained=True,last_activation=None,activations='elu',num_classes=5)
model = model.cuda()

# define loss & optimizer
loss_fn = MultiLabelAUCMLoss(num_labels=5)
optimizer = PESG(model.parameters(),
                 loss_fn=loss_fn,
                 lr=lr,
                 margin=margin,
                 epoch_decay=epoch_decay,
                 weight_decay=weight_decay)

print ('Start Training')
print ('-'*30)
begin_time = datetime.now()

best_val_auc = 0 
for epoch in range(total_epochs):
    if epoch > 0:
        optimizer.update_regularizer(decay_factor=10)    

    for idx,data in enumerate(trainloader):
        train_data,train_labels = data
        train_data,train_labels = train_data.cuda(),train_labels.cuda()
        y_pred = model(train_data)
        y_pred = torch.sigmoid(y_pred)
        loss = loss_fn(y_pred,train_labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
            
        # validation  
        if idx % 400 == 0 or idx == len(trainloader)-1:
            model.eval()
            with torch.no_grad():
                test_pred = []
                test_true = [] 
                for jdx,data in enumerate(testloader):
                    test_data,test_labels = data
                    test_data = test_data.cuda()
                    y_pred = model(test_data)
                    y_pred = torch.sigmoid(y_pred)
                    test_pred.append(y_pred.cpu().detach().numpy())
                    test_true.append(test_labels.numpy())
            
                test_true = np.concatenate(test_true)
                test_pred = np.concatenate(test_pred)
                val_auc_mean = np.mean(auc_roc_score(test_true,test_pred)) 
                model.train()
                
                if best_val_auc < val_auc_mean:
                    best_val_auc = val_auc_mean
                    # torch.save(model.state_dict(),os.path.join(os.getcwd(),'pth_files','origin_model.pth'))
                    torch.save(model.state_dict(),os.path.join(os.getcwd(),'pth_files','origin_model_resnet50.pth'))
                after_time = datetime.now()
                time_gap = after_time-begin_time
                time_gap = time_gap.total_seconds()
                hours,remainders = divmod(time_gap,3600)
                minutes,seconds = divmod(remainders,60)
                hours = int(hours)
                minutes = int(minutes)
                seconds = int(seconds)
                print(f'({hours:02d}:{minutes:02d}:{seconds:02d})Epoch={epoch},BatchID={idx},Val_AUC={val_auc_mean:.4f},Best_Val_AUC={best_val_auc:.4f}')

Multi-label mode: True, Number of classes: [5]
------------------------------
Found 133781 images in total, 16297 positive images, 117484 negative images
Cardiomegaly(C0): imbalance ratio is 0.1218

Found 133781 images in total, 42794 positive images, 90987 negative images
Edema(C1): imbalance ratio is 0.3199

Found 133781 images in total, 9055 positive images, 124726 negative images
Consolidation(C2): imbalance ratio is 0.0677

Found 133781 images in total, 41919 positive images, 91862 negative images
Atelectasis(C3): imbalance ratio is 0.3133

Found 133781 images in total, 53675 positive images, 80106 negative images
Pleural Effusion(C4): imbalance ratio is 0.4012

Multi-label mode: True, Number of classes: [5]
------------------------------
Found 19090 images in total, 2242 positive images, 16848 negative images
Cardiomegaly(C0): imbalance ratio is 0.1174

Found 19090 images in total, 6161 positive images, 12929 negative images
Edema(C1): imbalance ratio is 0.3227

Found 19090 image

## Male

In [15]:
root = os.path.join(os.getcwd(),'CheXpert_male\\')
# Index=-1 denotes multi-label with 5 diseases
trainSet = CheXpert(csv_path=root+'train.csv',image_root_path=root,use_upsampling=False,use_frontal=True,image_size=224,mode='train',class_index=-1,verbose=True)
testSet =  CheXpert(csv_path=root+'valid.csv',image_root_path=root,use_upsampling=False,use_frontal=True,image_size=224,mode='valid',class_index=-1,verbose=True)
trainloader =  torch.utils.data.DataLoader(trainSet,batch_size=BATCH_SIZE,num_workers=2,shuffle=True)
testloader =  torch.utils.data.DataLoader(testSet,batch_size=BATCH_SIZE,num_workers=2,shuffle=False)

# model
set_all_seeds(SEED)
model = Resnet50(pretrained=True,last_activation=None,activations='elu',num_classes=5)
model = model.cuda()

# define loss & optimizer
loss_fn = MultiLabelAUCMLoss(num_labels=5)
optimizer = PESG(model.parameters(),
                 loss_fn=loss_fn,
                 lr=lr,
                 margin=margin,
                 epoch_decay=epoch_decay,
                 weight_decay=weight_decay)

print ('Start Training')
print ('-'*30)
begin_time = datetime.now()

best_val_auc = 0 
for epoch in range(total_epochs):
    if epoch > 0:
        optimizer.update_regularizer(decay_factor=10)    

    for idx,data in enumerate(trainloader):
        train_data,train_labels = data
        train_data,train_labels  = train_data.cuda(),train_labels.cuda()
        y_pred = model(train_data)
        y_pred = torch.sigmoid(y_pred)
        loss = loss_fn(y_pred,train_labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
            
        # validation  
        if idx % 400 == 0 or idx == len(trainloader)-1:
            model.eval()
            with torch.no_grad():    
                test_pred = []
                test_true = [] 
                for jdx,data in enumerate(testloader):
                    test_data,test_labels = data
                    test_data = test_data.cuda()
                    y_pred = model(test_data)
                    y_pred = torch.sigmoid(y_pred)
                    test_pred.append(y_pred.cpu().detach().numpy())
                    test_true.append(test_labels.numpy())
            
                test_true = np.concatenate(test_true)
                test_pred = np.concatenate(test_pred)
                val_auc_mean = np.mean(auc_roc_score(test_true,test_pred)) 
                model.train()

                if best_val_auc < val_auc_mean:
                    best_val_auc = val_auc_mean
                    # torch.save(model.state_dict(),os.path.join(os.getcwd(),'pth_files','male_model.pth'))
                    torch.save(model.state_dict(),os.path.join(os.getcwd(),'pth_files','male_model_resnet50.pth'))
                after_time = datetime.now()
                time_gap = after_time-begin_time
                time_gap = time_gap.total_seconds()
                hours,remainders = divmod(time_gap,3600)
                minutes,seconds = divmod(remainders,60)
                hours = int(hours)
                minutes = int(minutes)
                seconds = int(seconds)
                print(f'({hours:02d}:{minutes:02d}:{seconds:02d})Epoch={epoch},BatchID={idx},Val_AUC={val_auc_mean:.4f},Best_Val_AUC={best_val_auc:.4f}')

Multi-label mode: True, Number of classes: [5]
------------------------------
Found 78644 images in total, 9943 positive images, 68701 negative images
Cardiomegaly(C0): imbalance ratio is 0.1264

Found 78644 images in total, 24591 positive images, 54053 negative images
Edema(C1): imbalance ratio is 0.3127

Found 78644 images in total, 5314 positive images, 73330 negative images
Consolidation(C2): imbalance ratio is 0.0676

Found 78644 images in total, 24764 positive images, 53880 negative images
Atelectasis(C3): imbalance ratio is 0.3149

Found 78644 images in total, 31305 positive images, 47339 negative images
Pleural Effusion(C4): imbalance ratio is 0.3981

Multi-label mode: True, Number of classes: [5]
------------------------------
Found 11159 images in total, 1392 positive images, 9767 negative images
Cardiomegaly(C0): imbalance ratio is 0.1247

Found 11159 images in total, 3541 positive images, 7618 negative images
Edema(C1): imbalance ratio is 0.3173

Found 11159 images in total

## Female

In [16]:
root = os.path.join(os.getcwd(),'CheXpert_female\\')
# Index=-1 denotes multi-label with 5 diseases
trainSet = CheXpert(csv_path=root+'train.csv',image_root_path=root,use_upsampling=False,use_frontal=True,image_size=224,mode='train',class_index=-1,verbose=True)
testSet =  CheXpert(csv_path=root+'valid.csv',image_root_path=root,use_upsampling=False,use_frontal=True,image_size=224,mode='valid',class_index=-1,verbose=True)
trainloader =  torch.utils.data.DataLoader(trainSet,batch_size=BATCH_SIZE,num_workers=2,shuffle=True)
testloader =  torch.utils.data.DataLoader(testSet,batch_size=BATCH_SIZE,num_workers=2,shuffle=False)

# model
set_all_seeds(SEED)
model = Resnet50(pretrained=True,last_activation=None,activations='elu',num_classes=5)
model = model.cuda()

# define loss & optimizer
loss_fn = MultiLabelAUCMLoss(num_labels=5)
optimizer = PESG(model.parameters(),
                 loss_fn=loss_fn,
                 lr=lr,
                 margin=margin,
                 epoch_decay=epoch_decay,
                 weight_decay=weight_decay)

print ('Start Training')
print ('-'*30)
begin_time = datetime.now()

best_val_auc = 0 
for epoch in range(total_epochs):
    if epoch > 0:
        optimizer.update_regularizer(decay_factor=10)    

    for idx,data in enumerate(trainloader):
        train_data,train_labels = data
        train_data,train_labels  = train_data.cuda(),train_labels.cuda()
        y_pred = model(train_data)
        y_pred = torch.sigmoid(y_pred)
        loss = loss_fn(y_pred,train_labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
            
        # validation  
        if idx % 400 == 0 or idx == len(trainloader)-1:
            model.eval()
            with torch.no_grad():    
                test_pred = []
                test_true = [] 
                for jdx,data in enumerate(testloader):
                    test_data,test_labels = data
                    test_data = test_data.cuda()
                    y_pred = model(test_data)
                    y_pred = torch.sigmoid(y_pred)
                    test_pred.append(y_pred.cpu().detach().numpy())
                    test_true.append(test_labels.numpy())
            
                test_true = np.concatenate(test_true)
                test_pred = np.concatenate(test_pred)
                val_auc_mean = np.mean(auc_roc_score(test_true,test_pred)) 
                model.train()

                if best_val_auc < val_auc_mean:
                    best_val_auc = val_auc_mean
                    # torch.save(model.state_dict(),os.path.join(os.getcwd(),'pth_files','female_model.pth'))
                    torch.save(model.state_dict(),os.path.join(os.getcwd(),'pth_files','female_model_resnet50.pth'))
                after_time = datetime.now()
                time_gap = after_time-begin_time
                time_gap = time_gap.total_seconds()
                hours,remainders = divmod(time_gap,3600)
                minutes,seconds = divmod(remainders,60)
                hours = int(hours)
                minutes = int(minutes)
                seconds = int(seconds)
                print(f'({hours:02d}:{minutes:02d}:{seconds:02d})Epoch={epoch},BatchID={idx},Val_AUC={val_auc_mean:.4f},Best_Val_AUC={best_val_auc:.4f}')

Multi-label mode: True, Number of classes: [5]
------------------------------
Found 55137 images in total, 6354 positive images, 48783 negative images
Cardiomegaly(C0): imbalance ratio is 0.1152

Found 55137 images in total, 18203 positive images, 36934 negative images
Edema(C1): imbalance ratio is 0.3301

Found 55137 images in total, 3741 positive images, 51396 negative images
Consolidation(C2): imbalance ratio is 0.0678

Found 55137 images in total, 17155 positive images, 37982 negative images
Atelectasis(C3): imbalance ratio is 0.3111

Found 55137 images in total, 22370 positive images, 32767 negative images
Pleural Effusion(C4): imbalance ratio is 0.4057

Multi-label mode: True, Number of classes: [5]
------------------------------
Found 7931 images in total, 850 positive images, 7081 negative images
Cardiomegaly(C0): imbalance ratio is 0.1072

Found 7931 images in total, 2620 positive images, 5311 negative images
Edema(C1): imbalance ratio is 0.3303

Found 7931 images in total, 58

## before40

In [17]:
root = os.path.join(os.getcwd(),'CheXpert_before40\\')
# Index=-1 denotes multi-label with 5 diseases
trainSet = CheXpert(csv_path=root+'train.csv',image_root_path=root,use_upsampling=False,use_frontal=True,image_size=224,mode='train',class_index=-1,verbose=True)
testSet =  CheXpert(csv_path=root+'valid.csv',image_root_path=root,use_upsampling=False,use_frontal=True,image_size=224,mode='valid',class_index=-1,verbose=True)
trainloader =  torch.utils.data.DataLoader(trainSet,batch_size=BATCH_SIZE,num_workers=2,shuffle=True)
testloader =  torch.utils.data.DataLoader(testSet,batch_size=BATCH_SIZE,num_workers=2,shuffle=False)

# model
set_all_seeds(SEED)
model = Resnet50(pretrained=True,last_activation=None,activations='elu',num_classes=5)
model = model.cuda()

# define loss & optimizer
loss_fn = MultiLabelAUCMLoss(num_labels=5)
optimizer = PESG(model.parameters(),
                 loss_fn=loss_fn,
                 lr=lr,
                 margin=margin,
                 epoch_decay=epoch_decay,
                 weight_decay=weight_decay)

print ('Start Training')
print ('-'*30)
begin_time = datetime.now()

best_val_auc = 0 
for epoch in range(total_epochs):
    if epoch > 0:
        optimizer.update_regularizer(decay_factor=10)    

    for idx,data in enumerate(trainloader):
        train_data,train_labels = data
        train_data,train_labels  = train_data.cuda(),train_labels.cuda()
        y_pred = model(train_data)
        y_pred = torch.sigmoid(y_pred)
        loss = loss_fn(y_pred,train_labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
            
        # validation  
        if idx % 400 == 0 or idx == len(trainloader)-1:
            model.eval()
            with torch.no_grad():    
                test_pred = []
                test_true = [] 
                for jdx,data in enumerate(testloader):
                    test_data,test_labels = data
                    test_data = test_data.cuda()
                    y_pred = model(test_data)
                    y_pred = torch.sigmoid(y_pred)
                    test_pred.append(y_pred.cpu().detach().numpy())
                    test_true.append(test_labels.numpy())
            
                test_true = np.concatenate(test_true)
                test_pred = np.concatenate(test_pred)
                val_auc_mean = np.mean(auc_roc_score(test_true,test_pred)) 
                model.train()

                if best_val_auc < val_auc_mean:
                    best_val_auc = val_auc_mean
                    # torch.save(model.state_dict(),os.path.join(os.getcwd(),'pth_files','before40_model.pth'))
                    torch.save(model.state_dict(),os.path.join(os.getcwd(),'pth_files','before40_model_resnet50.pth'))
                after_time = datetime.now()
                time_gap = after_time-begin_time
                time_gap = time_gap.total_seconds()
                hours,remainders = divmod(time_gap,3600)
                minutes,seconds = divmod(remainders,60)
                hours = int(hours)
                minutes = int(minutes)
                seconds = int(seconds)
                print(f'({hours:02d}:{minutes:02d}:{seconds:02d})Epoch={epoch},BatchID={idx},Val_AUC={val_auc_mean:.4f},Best_Val_AUC={best_val_auc:.4f}')

Multi-label mode: True, Number of classes: [5]
------------------------------
Found 19562 images in total, 1817 positive images, 17745 negative images
Cardiomegaly(C0): imbalance ratio is 0.0929

Found 19562 images in total, 4292 positive images, 15270 negative images
Edema(C1): imbalance ratio is 0.2194

Found 19562 images in total, 1250 positive images, 18312 negative images
Consolidation(C2): imbalance ratio is 0.0639

Found 19562 images in total, 4759 positive images, 14803 negative images
Atelectasis(C3): imbalance ratio is 0.2433

Found 19562 images in total, 5784 positive images, 13778 negative images
Pleural Effusion(C4): imbalance ratio is 0.2957

Multi-label mode: True, Number of classes: [5]
------------------------------
Found 2729 images in total, 228 positive images, 2501 negative images
Cardiomegaly(C0): imbalance ratio is 0.0835

Found 2729 images in total, 562 positive images, 2167 negative images
Edema(C1): imbalance ratio is 0.2059

Found 2729 images in total, 181 po

## after40

In [18]:
root = os.path.join(os.getcwd(),'CheXpert_after40\\')
# Index=-1 denotes multi-label with 5 diseases
trainSet = CheXpert(csv_path=root+'train.csv',image_root_path=root,use_upsampling=False,use_frontal=True,image_size=224,mode='train',class_index=-1,verbose=True)
testSet =  CheXpert(csv_path=root+'valid.csv',image_root_path=root,use_upsampling=False,use_frontal=True,image_size=224,mode='valid',class_index=-1,verbose=True)
trainloader =  torch.utils.data.DataLoader(trainSet,batch_size=BATCH_SIZE,num_workers=2,shuffle=True)
testloader =  torch.utils.data.DataLoader(testSet,batch_size=BATCH_SIZE,num_workers=2,shuffle=False)

# model
set_all_seeds(SEED)
model = Resnet50(pretrained=True,last_activation=None,activations='elu',num_classes=5)
model = model.cuda()

# define loss & optimizer
loss_fn = MultiLabelAUCMLoss(num_labels=5)
optimizer = PESG(model.parameters(),
                 loss_fn=loss_fn,
                 lr=lr,
                 margin=margin,
                 epoch_decay=epoch_decay,
                 weight_decay=weight_decay)

print ('Start Training')
print ('-'*30)
begin_time = datetime.now()

best_val_auc = 0 
for epoch in range(total_epochs):
    if epoch > 0:
        optimizer.update_regularizer(decay_factor=10)    

    for idx,data in enumerate(trainloader):
        train_data,train_labels = data
        train_data,train_labels  = train_data.cuda(),train_labels.cuda()
        y_pred = model(train_data)
        y_pred = torch.sigmoid(y_pred)
        loss = loss_fn(y_pred,train_labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
            
        # validation  
        if idx % 400 == 0 or idx == len(trainloader)-1:
            model.eval()
            with torch.no_grad():    
                test_pred = []
                test_true = [] 
                for jdx,data in enumerate(testloader):
                    test_data,test_labels = data
                    test_data = test_data.cuda()
                    y_pred = model(test_data)
                    y_pred = torch.sigmoid(y_pred)
                    test_pred.append(y_pred.cpu().detach().numpy())
                    test_true.append(test_labels.numpy())
            
                test_true = np.concatenate(test_true)
                test_pred = np.concatenate(test_pred)
                val_auc_mean = np.mean(auc_roc_score(test_true,test_pred)) 
                model.train()

                if best_val_auc < val_auc_mean:
                    best_val_auc = val_auc_mean
                    # torch.save(model.state_dict(),os.path.join(os.getcwd(),'pth_files','after40_model.pth'))
                    torch.save(model.state_dict(),os.path.join(os.getcwd(),'pth_files','after40_model_resnet50.pth'))
                after_time = datetime.now()
                time_gap = after_time-begin_time
                time_gap = time_gap.total_seconds()
                hours,remainders = divmod(time_gap,3600)
                minutes,seconds = divmod(remainders,60)
                hours = int(hours)
                minutes = int(minutes)
                seconds = int(seconds)
                print(f'({hours:02d}:{minutes:02d}:{seconds:02d})Epoch={epoch},BatchID={idx},Val_AUC={val_auc_mean:.4f},Best_Val_AUC={best_val_auc:.4f}')

Multi-label mode: True, Number of classes: [5]
------------------------------
Found 114219 images in total, 14480 positive images, 99739 negative images
Cardiomegaly(C0): imbalance ratio is 0.1268

Found 114219 images in total, 38502 positive images, 75717 negative images
Edema(C1): imbalance ratio is 0.3371

Found 114219 images in total, 7805 positive images, 106414 negative images
Consolidation(C2): imbalance ratio is 0.0683

Found 114219 images in total, 37160 positive images, 77059 negative images
Atelectasis(C3): imbalance ratio is 0.3253

Found 114219 images in total, 47891 positive images, 66328 negative images
Pleural Effusion(C4): imbalance ratio is 0.4193

Multi-label mode: True, Number of classes: [5]
------------------------------
Found 16361 images in total, 2014 positive images, 14347 negative images
Cardiomegaly(C0): imbalance ratio is 0.1231

Found 16361 images in total, 5599 positive images, 10762 negative images
Edema(C1): imbalance ratio is 0.3422

Found 16361 images