In [None]:
#!pip install libauc

In [None]:
from libauc.losses import AUCMLoss, CrossEntropyLoss
from libauc.optimizers import PESG, Adam
from libauc.models import DenseNet121, DenseNet169
from libauc.datasets import CheXpert

import torch
import os
import pandas as pd
from glob import glob
from PIL import Image
import numpy as np
import torchvision.transforms as transforms
from torch.utils.data import Dataset
from sklearn.metrics import roc_auc_score
import random
from torch.utils.data import Dataset, DataLoader
from torch.utils.data.sampler import SubsetRandomSampler
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from torch import nn
import dataset

In [None]:
BASE_DIR = os.getcwd()
DATA_DIR = os.path.join(BASE_DIR, '..', 'data')
CheXpert_train_hidden_features_all = np.load(os.path.join(DATA_DIR,'CheXpert_train_hidden_features_all.npy'))
CheXpert_train_labels_all = np.load(os.path.join(DATA_DIR,'CheXpert_train_labels_all.npy'))
CheXpert_valid_hidden_features_all = np.load(os.path.join(DATA_DIR,'CheXpert_valid_hidden_features_all.npy'))
CheXpert_valid_labels_all = np.load(os.path.join(DATA_DIR,'CheXpert_valid_labels_all.npy'))
extra_valid_age_sex_df = pd.read_csv(os.path.join(DATA_DIR,'extra_valid_age_sex.csv'))
extra_valid_hidden_features = np.load(os.path.join(DATA_DIR,'extra_valid_hidden_features.npy'))
extra_valid_labels = np.load(os.path.join(DATA_DIR,'extra_valid_labels.npy'))
extra_valid_images = glob(os.path.join(DATA_DIR, 'extraValid', '*'))

In [None]:
#Performing some sort of label smoothing
#U-Ones
# Map all -1's to 1's
'''
print(CheXpert_train_labels_all.shape)
CheXpert_train_labels_all[CheXpert_train_labels_all == -1] = 1
CheXpert_valid_labels_all[CheXpert_valid_labels_all == -1] = 1
'''

#U-Ones-LSR
for i in range(CheXpert_train_labels_all.shape[0]):
    for j in range(CheXpert_train_labels_all.shape[1]):
        if CheXpert_train_labels_all[i,j] == -1:
            CheXpert_train_labels_all[i,j] = random.uniform(.55, .85)
for i in range(CheXpert_valid_labels_all.shape[0]):
    for j in range(CheXpert_valid_labels_all.shape[1]):
        if CheXpert_valid_labels_all[i,j] == -1:
            CheXpert_valid_labels_all[i,j] = random.uniform(.55, .85)

In [None]:
class PrepareData(Dataset):
    def __init__(self, X, y, scale_X=True):
        if not torch.is_tensor(X):
            if scale_X:
                X = StandardScaler().fit_transform(X)
            self.X = torch.from_numpy(X)
        if not torch.is_tensor(y):
            self.y = torch.from_numpy(y)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

In [None]:
class MultiLabelClassification(nn.Module):
    def __init__(self, num_feature, num_class):
        super(MultiLabelClassification, self).__init__()
        
        # create separate classifiers for our outputs
        self.classpred = nn.Sequential(
            nn.Linear(num_feature, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Linear(512, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Dropout(p=0.2),
            nn.Linear(128, 64),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.Dropout(p=0.2),
            nn.Linear(64, num_class) 
            )
        
        self.classpred2 = nn.Sequential(
            nn.Linear(num_feature, 256),
            nn.ReLU(),
            nn.Linear(256, 192),
            nn.ReLU(),
            nn.Linear(192, 64),
            nn.ReLU(),
            nn.Linear(64, num_class)
            )
        
        self.classifier = nn.Linear(num_feature, num_class)
        self.sigmoid = nn.Sigmoid()
        
    def forward(self, x):
        out = self.classifier(x)
        out = self.sigmoid(out)
        return out

In [None]:
trainset = PrepareData(CheXpert_train_hidden_features_all, y=CheXpert_train_labels_all, scale_X=True)
testset = PrepareData(CheXpert_valid_hidden_features_all, y=CheXpert_valid_labels_all, scale_X=True)
trainloader = DataLoader(trainset,
                         batch_size=32,
                         shuffle=True,
                         num_workers=2)
testloader = DataLoader(testset,
                        batch_size=32,
                        shuffle=False,
                        num_workers=2)

In [None]:
# paramaters
SEED = 123
BATCH_SIZE = 32
imratio = 0.3424
lr = 0.05 # using smaller learning rate is better
gamma = 500
weight_decay = 1e-5
margin = 1.0

# model
model = MultiLabelClassification(num_feature=1024, num_class=5)
model = model.cuda()

# define loss & optimizer
Loss = AUCMLoss(imratio=imratio)
optimizer = PESG(model, 
                 a=Loss.a, 
                 b=Loss.b, 
                 alpha=Loss.alpha, 
                 imratio=imratio, 
                 lr=lr, 
                 gamma=gamma, 
                 margin=margin, 
                 weight_decay=weight_decay)

best_val_auc = 0
for epoch in range(4):
    if epoch > 0:
         optimizer.update_regularizer(decay_factor=10)
    for idx, data in enumerate(trainloader):
        train_data, train_labels = data
        train_data, train_labels = train_data.cuda(), train_labels.cuda()
        y_pred = model(train_data)
        loss = Loss(y_pred, train_labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # validation
        if idx % 400 == 0:
            model.eval()
            with torch.no_grad():    
                test_pred = []
                test_true = [] 
                for jdx, data in enumerate(testloader):
                    test_data, test_label = data
                    test_data = test_data.cuda()
                    y_pred = model(test_data)
                    test_pred.append(y_pred.cpu().detach().numpy())
                    test_true.append(test_label.numpy())

                test_true = np.concatenate(test_true)
                test_pred = np.concatenate(test_pred)
                #print(test_true[:100], test_pred[:100])
                val_auc =  roc_auc_score(test_true, test_pred) 
                model.train()

                if best_val_auc < val_auc:
                    best_val_auc = val_auc

            print('Epoch=%s, BatchID=%s, Val_AUC=%.4f, lr=%.4f'%(epoch, idx, val_auc,  optimizer.lr))

print ('Best Val_AUC is %.4f'%best_val_auc)