

* Author: Zhuoning Yuan
* Project: https://github.com/yzhuoning/LibAUC



# **Installing LibAUC**

In [None]:
import torch
  
print(f"Is CUDA supported by this system? {torch.cuda.is_available()}")
print(f"CUDA version: {torch.version.cuda}")
  
# Storing ID of current CUDA device
cuda_id = torch.cuda.current_device()
print(f"ID of current CUDA device:{torch.cuda.current_device()}")
        
print(f"Name of current CUDA device:{torch.cuda.get_device_name(cuda_id)}")

Is CUDA supported by this system? True
CUDA version: 11.3
ID of current CUDA device:0
Name of current CUDA device:Tesla T4


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
!pip install libauc

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


# **Downloading CheXpert**
 
*   To request dataset access, you need to apply from CheXpert website: https://stanfordmlgroup.github.io/competitions/chexpert/
*   In this tutorial, we use the smaller version of dataset with lower image resolution, i.e., *CheXpert-v1.0-small.zip*



In [None]:
# !cp /content/gdrive/MyDrive/chexpert-dataset/CheXpert-v1.0-small.zip /content/
# !mkdir CheXpert
# !unzip CheXpert-v1.0-small.zip -d /content/CheXpert/


# **Importing LibAUC**

In [None]:
%cd /content/drive/MyDrive/Deep Learning Project/CheXpert-v1.0-small

/content/drive/MyDrive/Deep Learning Project/CheXpert-v1.0-small


In [None]:
from libauc.losses import AUCMLoss, CrossEntropyLoss
from libauc.optimizers import PESG, Adam
from libauc.models import DenseNet121, DenseNet169, ResNet18,ResNet50, ResNet34, ResNet56
from libauc.datasets import CheXpert

import torch 
from PIL import Image
import numpy as np
import torchvision.transforms as transforms
from torch.utils.data import Dataset
from sklearn.metrics import roc_auc_score

In [None]:
!pip install GPUtil

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
from GPUtil import showUtilization as gpu_usage

# **Reproducibility**

In [None]:
# def set_all_seeds(SEED):
#     # REPRODUCIBILITY
#     torch.manual_seed(SEED)
#     np.random.seed(SEED)
#     torch.backends.cudnn.deterministic = True
#     torch.backends.cudnn.benchmark = False

# **Pretraining**
* Multi-label classification (5 tasks)   
* Adam + CrossEntropy Loss 
* This step is optional




In [None]:
from sklearn.metrics import accuracy_score

In [None]:
import pandas as pd


In [None]:
data_root = '/content/drive/MyDrive/Deep Learning Project/CheXpert-v1.0-small/'
df = pd.read_csv (data_root + 'train.csv')
df = df.fillna(0)
train_df = df[:3000]
# valid_df = df[1000:1500]
# test_df = df[1500:2000]
# print(train_df.shape)
# print(valid_df.shape)
# print(test_df.shape)

In [None]:
train_df.to_csv(r'Chexpert_train.csv', index=False)
# valid_df.to_csv(r'Chexpert_valid.csv', index=False)
# test_df.to_csv(r'Chexpert_test.csv', index=False)

In [None]:
# dataloader
data_root = '/content/drive/MyDrive/Deep Learning Project/CheXpert-v1.0-small/'
# Index: -1 denotes multi-label mode including 5 diseases
traindSet = CheXpert(csv_path=data_root+'Chexpert_train.csv', image_root_path=data_root, use_upsampling=False, use_frontal=True, image_size=224, mode='train', class_index=-1)
testSet =  CheXpert(csv_path=data_root+'Chexpert_valid.csv',  image_root_path=data_root, use_upsampling=False, use_frontal=True, image_size=224, mode='valid', class_index=-1)
trainloader =  torch.utils.data.DataLoader(traindSet, batch_size=10, num_workers=2, shuffle=True)
testloader =  torch.utils.data.DataLoader(testSet, batch_size=10, num_workers=2, shuffle=False)

# paramaters
SEED = 123
BATCH_SIZE = 10
lr = 1e-4
weight_decay = 1e-5

# model
#set_all_seeds(SEED)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = DenseNet121(pretrained=True, last_activation=None, activations='relu', num_classes=5)
model = model.to(device)

# define loss & optimizer
CELoss = CrossEntropyLoss()
optimizer = Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
#export PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:128
# training
best_val_auc = 0 
for epoch in range(5):
    for idx, data in enumerate(trainloader):
      #with torch.no_grad():
        #print(gpu_usage())
        torch.cuda.empty_cache()
        train_data, train_labels = data
        train_data, train_labels  = train_data.to(device), train_labels.to(device)
        y_pred = model(train_data)
        loss = CELoss(y_pred, train_labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

            
        # validation  
        if idx % 20 == 0:
            model.eval()
            with torch.no_grad():    
                test_pred = []
                test_true = [] 
                for jdx, data in enumerate(testloader):
                    test_data, test_labels = data
                    test_data = test_data.to(device)
                    y_pred = model(test_data)
                    test_pred.append(y_pred.cpu().detach().numpy())
                    test_true.append(test_labels.numpy())
                
                test_true = np.concatenate(test_true)
                test_pred = np.concatenate(test_pred)
                val_auc_mean =  roc_auc_score(test_true, test_pred) 
                model.train()
                #accuracy = accuracy_score(test_true,test_pred)
                if best_val_auc < val_auc_mean:
                    best_val_auc = val_auc_mean
                    torch.save(model.state_dict(), '/content/drive/MyDrive/Gao Independent Study/Densenet121_pretrained_model.pth')

                print ('Epoch=%s, BatchID=%s, Val_AUC=%.4f, Best_Val_AUC=%.4f'%(epoch, idx, val_auc_mean, best_val_auc))



  self.df['Path'] = self.df['Path'].str.replace('CheXpert-v1.0-small/', '')
  self.df['Path'] = self.df['Path'].str.replace('CheXpert-v1.0/', '')


Multi-label mode: True, Number of classes: [5]
------------------------------
Found 787 images in total, 91 positive images, 696 negative images
Cardiomegaly(C0): imbalance ratio is 0.1156

Found 787 images in total, 226 positive images, 561 negative images
Edema(C1): imbalance ratio is 0.2872

Found 787 images in total, 55 positive images, 732 negative images
Consolidation(C2): imbalance ratio is 0.0699

Found 787 images in total, 226 positive images, 561 negative images
Atelectasis(C3): imbalance ratio is 0.2872

Found 787 images in total, 306 positive images, 481 negative images
Pleural Effusion(C4): imbalance ratio is 0.3888

Multi-label mode: True, Number of classes: [5]
------------------------------
Found 398 images in total, 60 positive images, 338 negative images
Cardiomegaly(C0): imbalance ratio is 0.1508

Found 398 images in total, 107 positive images, 291 negative images
Edema(C1): imbalance ratio is 0.2688

Found 398 images in total, 25 positive images, 373 negative images

In [None]:
print ('Val_AUC=%.4f, Best_Val_AUC=%.4f'%(val_auc_mean, best_val_auc ))

Val_AUC=0.6633, Best_Val_AUC=0.6859


In [None]:
# # dataloader
# root = '/content/drive/MyDrive/Chexpert_Dataset/'
data_root = '/content/drive/MyDrive/Deep Learning Project/CheXpert-v1.0-small/'
traindSet = CheXpert(csv_path=data_root+'Chexpert_train.csv', image_root_path=data_root, use_upsampling=False, use_frontal=True, image_size=224, mode='train', class_index=-1)
testSet =  CheXpert(csv_path=data_root+'Chexpert_valid.csv',  image_root_path=data_root, use_upsampling=False, use_frontal=True, image_size=224, mode='valid', class_index=-1)
trainloader =  torch.utils.data.DataLoader(traindSet, batch_size=10, num_workers=2, shuffle=True)
testloader =  torch.utils.data.DataLoader(testSet, batch_size=10, num_workers=2, shuffle=False)

# # paramaters
SEED = 123
BATCH_SIZE = 10
lr = 1e-4
weight_decay = 1e-5

# # model
# set_all_seeds(SEED)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = DenseNet169(pretrained=True, last_activation=None, activations='relu', num_classes=5)
model = model.to(device)

# define loss & optimizer
CELoss = CrossEntropyLoss()
optimizer = Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
#export PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:128
# training
best_val_auc = 0 
for epoch in range(5):
    for idx, data in enumerate(trainloader):
      #with torch.no_grad():
        #print(gpu_usage())
        torch.cuda.empty_cache()
        train_data, train_labels = data
        train_data, train_labels  = train_data.to(device), train_labels.to(device)
        y_pred = model(train_data)
        loss = CELoss(y_pred, train_labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

            
        # validation  
        if idx % 20 == 0:
            model.eval()
            with torch.no_grad():    
                test_pred = []
                test_true = [] 
                for jdx, data in enumerate(testloader):
                    test_data, test_labels = data
                    test_data = test_data.to(device)
                    y_pred = model(test_data)
                    test_pred.append(y_pred.cpu().detach().numpy())
                    test_true.append(test_labels.numpy())
                
                test_true = np.concatenate(test_true)
                test_pred = np.concatenate(test_pred)
                val_auc_mean =  roc_auc_score(test_true, test_pred) 
                model.train()

                if best_val_auc < val_auc_mean:
                    best_val_auc = val_auc_mean
                    torch.save(model.state_dict(), 'Resnet34_pretrained_model.pth')

                print ('Epoch=%s, BatchID=%s, Val_AUC=%.4f, Best_Val_AUC=%.4f'%(epoch, idx, val_auc_mean, best_val_auc ))

  self.df['Path'] = self.df['Path'].str.replace('CheXpert-v1.0-small/', '')
  self.df['Path'] = self.df['Path'].str.replace('CheXpert-v1.0/', '')


Multi-label mode: True, Number of classes: [5]
------------------------------
Found 787 images in total, 91 positive images, 696 negative images
Cardiomegaly(C0): imbalance ratio is 0.1156

Found 787 images in total, 226 positive images, 561 negative images
Edema(C1): imbalance ratio is 0.2872

Found 787 images in total, 55 positive images, 732 negative images
Consolidation(C2): imbalance ratio is 0.0699

Found 787 images in total, 226 positive images, 561 negative images
Atelectasis(C3): imbalance ratio is 0.2872

Found 787 images in total, 306 positive images, 481 negative images
Pleural Effusion(C4): imbalance ratio is 0.3888

Multi-label mode: True, Number of classes: [5]
------------------------------
Found 398 images in total, 60 positive images, 338 negative images
Cardiomegaly(C0): imbalance ratio is 0.1508

Found 398 images in total, 107 positive images, 291 negative images
Edema(C1): imbalance ratio is 0.2688

Found 398 images in total, 25 positive images, 373 negative images

In [None]:
print ('Val_AUC=%.4f, Best_Val_AUC=%.4f'%(val_auc_mean, best_val_auc ))

Val_AUC=0.6524, Best_Val_AUC=0.7027


In [None]:
from libauc.models.resnet import resnet34
# # dataloader
data_root = '/content/drive/MyDrive/Deep Learning Project/CheXpert-v1.0-small/'

# Index: -1 denotes multi-label mode including 5 diseases
traindSet = CheXpert(csv_path=data_root+'Chexpert_train.csv', image_root_path=data_root, use_upsampling=False, use_frontal=True, image_size=224, mode='train', class_index=-1)
testSet =  CheXpert(csv_path=data_root+'Chexpert_valid.csv',  image_root_path=data_root, use_upsampling=False, use_frontal=True, image_size=224, mode='valid', class_index=-1)
trainloader =  torch.utils.data.DataLoader(traindSet, batch_size=10, num_workers=2, shuffle=True)
testloader =  torch.utils.data.DataLoader(testSet, batch_size=10, num_workers=2, shuffle=False)

# # paramaters
SEED = 123
BATCH_SIZE = 10
lr = 1e-4
weight_decay = 1e-5

# # model
# set_all_seeds(SEED)
# device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = resnet34(pretrained=True, last_activation=None, activations='relu', num_classes=5)
model = model.to(device)

# define loss & optimizer
CELoss = CrossEntropyLoss()
optimizer = Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
#export PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:128
# training
best_val_auc = 0 
for epoch in range(5):
    for idx, data in enumerate(trainloader):
      #with torch.no_grad():
        #print(gpu_usage())
        torch.cuda.empty_cache()
        train_data, train_labels = data
        train_data, train_labels  = train_data.to(device), train_labels.to(device)
        y_pred = model(train_data)
        loss = CELoss(y_pred, train_labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

            
        # validation  
        if idx % 20 == 0:
            model.eval()
            with torch.no_grad():    
                test_pred = []
                test_true = [] 
                for jdx, data in enumerate(testloader):
                    test_data, test_labels = data
                    test_data = test_data.to(device)
                    y_pred = model(test_data)
                    test_pred.append(y_pred.cpu().detach().numpy())
                    test_true.append(test_labels.numpy())
                
                test_true = np.concatenate(test_true)
                test_pred = np.concatenate(test_pred)
                val_auc_mean =  roc_auc_score(test_true, test_pred) 
                model.train()

                if best_val_auc < val_auc_mean:
                    best_val_auc = val_auc_mean
                    torch.save(model.state_dict(), 'ce_pretrained_model.pth')

                print ('Epoch=%s, BatchID=%s, Val_AUC=%.4f, Best_Val_AUC=%.4f'%(epoch, idx, val_auc_mean, best_val_auc ))

  self.df['Path'] = self.df['Path'].str.replace('CheXpert-v1.0-small/', '')
  self.df['Path'] = self.df['Path'].str.replace('CheXpert-v1.0/', '')


Multi-label mode: True, Number of classes: [5]
------------------------------
Found 787 images in total, 91 positive images, 696 negative images
Cardiomegaly(C0): imbalance ratio is 0.1156

Found 787 images in total, 226 positive images, 561 negative images
Edema(C1): imbalance ratio is 0.2872

Found 787 images in total, 55 positive images, 732 negative images
Consolidation(C2): imbalance ratio is 0.0699

Found 787 images in total, 226 positive images, 561 negative images
Atelectasis(C3): imbalance ratio is 0.2872

Found 787 images in total, 306 positive images, 481 negative images
Pleural Effusion(C4): imbalance ratio is 0.3888

Multi-label mode: True, Number of classes: [5]
------------------------------
Found 398 images in total, 60 positive images, 338 negative images
Cardiomegaly(C0): imbalance ratio is 0.1508

Found 398 images in total, 107 positive images, 291 negative images
Edema(C1): imbalance ratio is 0.2688

Found 398 images in total, 25 positive images, 373 negative images

# **Optimizing AUCM Loss**


*   Binary Classification
*   PESG + AUCM Loss


In [None]:
# # parameters
# class_id = 1 # 0:Cardiomegaly, 1:Edema, 2:Consolidation, 3:Atelectasis, 4:Pleural Effusion 
# root = '/content/drive/MyDrive/Chexpert_Dataset/'

# # You can set use_upsampling=True and pass the class name by upsampling_cols=['Cardiomegaly'] to do upsampling. This may improve the performance
# traindSet = CheXpert(csv_path=root+'valid.csv', image_root_path=root, use_upsampling=True, use_frontal=True, image_size=224, mode='train', class_index=class_id)
# testSet =  CheXpert(csv_path=root+'valid.csv',  image_root_path=root, use_upsampling=False, use_frontal=True, image_size=224, mode='valid', class_index=class_id)
# trainloader =  torch.utils.data.DataLoader(traindSet, batch_size=10, num_workers=2, shuffle=True)
# testloader =  torch.utils.data.DataLoader(testSet, batch_size=10, num_workers=2, shuffle=False)

# # paramaters
# SEED = 123
# BATCH_SIZE = 10
# imratio = traindSet.imratio
# lr = 0.05 # using smaller learning rate is better
# gamma = 500
# weight_decay = 1e-5
# margin = 1.0

# # model
# set_all_seeds(SEED)
# model = DenseNet121(pretrained=False, last_activation='sigmoid', activations='relu', num_classes=1)
# model = model.cuda()


# # load pretrained model
# if True:
#   PATH = 'ce_pretrained_model.pth' 
#   state_dict = torch.load(PATH)
#   state_dict.pop('classifier.weight', None)
#   state_dict.pop('classifier.bias', None) 
#   model.load_state_dict(state_dict, strict=False)


# # define loss & optimizer
# Loss = AUCMLoss(imratio=imratio)
# optimizer = PESG(model, 
#                  a=Loss.a, 
#                  b=Loss.b, 
#                  alpha=Loss.alpha, 
#                  imratio=imratio, 
#                  lr=lr, 
#                  gamma=gamma, 
#                  margin=margin, 
#                  weight_decay=weight_decay)

# best_val_auc = 0
# for epoch in range(5):
#   if epoch > 0:
#      optimizer.update_regularizer(decay_factor=10)
#   for idx, data in enumerate(trainloader):
#       train_data, train_labels = data
#       train_data, train_labels = train_data.cuda(), train_labels.cuda()
#       y_pred = model(train_data)
#       loss = Loss(y_pred, train_labels)
#       optimizer.zero_grad()
#       loss.backward()
#       optimizer.step()

#       # validation
#       if idx % 20 == 0:
#         model.eval()
#         with torch.no_grad():    
#               test_pred = []
#               test_true = [] 
#               for jdx, data in enumerate(testloader):
#                   test_data, test_label = data
#                   test_data = test_data.cuda()
#                   y_pred = model(test_data)
#                   test_pred.append(y_pred.cpu().detach().numpy())
#                   test_true.append(test_label.numpy())
              
#               test_true = np.concatenate(test_true)
#               test_pred = np.concatenate(test_pred)
#               val_auc =  roc_auc_score(test_true, test_pred) 
#               model.train()

#               if best_val_auc < val_auc:
#                  best_val_auc = val_auc
              
#         print ('Epoch=%s, BatchID=%s, Val_AUC=%.4f, lr=%.4f'%(epoch, idx, val_auc,  optimizer.lr))

# print ('Best Val_AUC is %.4f'%best_val_auc)