# ADJUSTING AND TESTING OF THE DENSNET121 CONVOLUTIONAL NEURAL NETWORK WITH CheXpert DATA

- Importing packages
- Importing Data
- Adjusting Densnet121 CNN
- Creation of the training algorithm
- Testing (ploting of the loss and ROC curves)

# Importing Packages

In [135]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import torchvision.models as models
from PIL import Image
from torchvision import transforms
from torch.optim.lr_scheduler import ReduceLROnPlateau
import csv
import numpy as np
from torch.utils.data.dataset import random_split
from sklearn.metrics.ranking import roc_auc_score
import sklearn.metrics as metrics

# Importing Data

In [136]:
N_PATIENTS = 64540
CLASSES = ['No Finding', 'Enlarged Cardiomediastinum', 'Cardiomegaly', 'Lung Opacity', 
           'Lung Lesion', 'Edema', 'Consolidation', 'Pneumonia', 'Atelectasis', 'Pneumothorax', 
           'Pleural Effusion', 'Pleural Other', 'Fracture', 'Support Devices']
csv_train_path = '../CheXpert-v1.0-small/train.csv'
csv_valid_path = '../CheXpert-v1.0-small/valid.csv'

# Adjusting Densenet121

In [137]:
def densenet121():
    DN121 = models.densenet121()
    n_feats = DN121.classifier.in_features
    DN121.classifier = nn.Sequential(
        nn.Linear(n_feats, 14),
        nn.Sigmoid()
    )
    return DN121

In [138]:
class DatasetGenerator(Dataset):
    
    def __init__ (self, path_table):
        #U-Ones approach
        self.imgs_path = []
        self.labels = []
        
        with open(path_table, "r") as f:
            csv_reader = csv.reader(f)
            next(csv_reader)
            for line in csv_reader:
                label = line[5:]
                for i in range(14):
                    if label[i]:
                        c = float(label[i])
                        if c == 1:
                            label[i] = 1
                        elif c == -1:
                            label[i] = 1
                        else:
                            label[i] = 0
                    else:
                        label[i] = 0

                self.imgs_path.append('../' + line[0])
                self.labels.append(label)
                
        self.preprocess = transforms.Compose([
                            transforms.Resize([256,256]),
                            transforms.ToTensor(),
                            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
                            ])
    
    
    def __getitem__(self, index):
        
        img_path = self.imgs_path[index]
        
        image_data = Image.open(img_path).convert('RGB')
        image_label= torch.FloatTensor(self.labels[index])
        
        image_data = self.preprocess(image_data)
        
        return image_data, image_label
        
        
    def __len__(self):
        return len(self.imgs_path)

# TRAINIG ALGORITHM

In [141]:
def train(model, n_epochs):

    loss = torch.nn.BCELoss(size_average = True)
    optimizer = optim.Adam(model.parameters(), lr=1e-4)
    scheduler = ReduceLROnPlateau(optimizer, factor=0.1, patience=5, mode='min')
    
    loss_mean_min = 1e100
    
    for epoch in range(n_epochs):
        print("Epoch: "+str(epoch+1))
        
        #training
        model.train()
        for batchID, (input, target) in enumerate (train_dataloader):
            varInput = torch.autograd.Variable(input)
            varTarget = torch.autograd.Variable(target)         
            varOutput = model(varInput)
            
            lossvalue = loss(varOutput, varTarget)
            print(lossvalue)
                       
            optimizer.zero_grad()
            lossvalue.backward()
            optimizer.step()
            
        #validation
        model.eval()
        counter = 0
        loss_sum = 0
        for i, (input, target) in enumerate (val_dataloader):     
            varInput = torch.autograd.Variable(input, volatile=True)
            varTarget = torch.autograd.Variable(target, volatile=True)    
            varOutput = model(varInput)
            
            losstensor = loss(varOutput, varTarget)
            loss_sum += losstensor.item()
            counter += 1
            
        loss_mean = loss_sum / counter
        time_end = time.strftime('%m%d_%H%M%S')
        
        scheduler.step(loss_mean)
        
        if loss_mean < loss_mean_min:
            loss_mean_min = loss_mean
            torch.save({'epoch': epoch + 1,
                        'state_dict': model.state_dict(),
                        'optimizer': optimizer.state_dict()},
                       './checkpoints/m_' + time_end + '.pth.tar')
            print ('Epoch [' + str(epoch+1) + '] [save] [' + time_end + '] loss= ' + str(loss_mean))
        else:
            print ('Epoch [' + str(epoch+1) + '] [----] [' + time_end + '] loss= ' + str(loss_mean))
        print('--------------------------------------------------------------------------\n')


# TESTING

In [142]:
DN121 = densenet121()

BATCH_SIZE = 16
N_EPOCHS = 3

train_test_data = DatasetGenerator(csv_train_path)
test_data, train_data = random_split(dataset, [500, len(dataset) - 500])
val_data = DatasetGenerator(csv_valid_path)
    
train_dataloader = DataLoader(train_data, BATCH_SIZE, shuffle=True, num_workers=24, pin_memory=True)
val_dataloader = DataLoader(val_data, BATCH_SIZE, shuffle=False, num_workers=24, pin_memory=True)
test_dataloader = DataLoader(test_data, BATCH_SIZE, shuffle=False, num_workers=24, pin_memory=True)

train(DN121, BATCH_SIZE, N_EPOCHS)

In [144]:
def compute_AUROC(gt, pred):
    AUROCS = []

    npgt = gt.numpy()
    nppred = pred.numpy()

    for i in range(len(CLASSES)):
        try:
            AUROCS.append(roc_auc_score(npgt[:, i], nppred[:, i]))
        except ValueError:
            pass
    return AUROCS

In [145]:
def test(model):   
    gt = torch.FloatTensor()
    pred = torch.FloatTensor()

    model.eval()

    with torch.no_grad():
        for i, (input, target) in enumerate(test_dataloader):
            target = target.cuda()
            gt = torch.cat((outGT, target), 0)

            bs, c, h, w = input.size()
            varInput = input.view(-1, c, h, w)

            output = model(varInput)
            pred = torch.cat((pred, output), 0)

    AUROCS = compute_AUROC(gt, pred)
    AUROC_mean = np.array(AUROCS).mean()

    print ('AUROC mean: '+str(AUROC_mean))

    for i in range (0, len(AUROCS)):
        print (CLASSES[i], ' ', AUROCS[i])

    return gt, pred

In [147]:
target, pred = test(DN121)

for i in range(len(CLASSES)):
    fpr, tpr, threshold = metrics.roc_curve(target[:,i], pred[:,i])
    roc_auc = metrics.auc(fpr, tpr)
    f = plt.subplot(2, 7, i+1)

    plt.title('ROC for: ' + class_names[i])
    plt.plot(fpr, tpr, label = 'U-ones: AUC = %0.2f' % roc_auc)

    plt.legend(loc = 'lower right')
    plt.plot([0, 1], [0, 1],'r--')
    plt.xlim([0, 1])
    plt.ylim([0, 1])
    plt.ylabel('True Positive Rate')
    plt.xlabel('False Positive Rate')

fig_size = plt.rcParams["figure.figsize"]
fig_size[0] = 30
fig_size[1] = 10
plt.rcParams["figure.figsize"] = fig_size
plt.show()

'target, pred = test(DN121)\n\nfor i in range(len(CLASSES)):\n    fpr, tpr, threshold = metrics.roc_curve(target[:,i], pred[:,i])\n    roc_auc = metrics.auc(fpr, tpr)\n    f = plt.subplot(2, 7, i+1)\n\n    plt.title(\'ROC for: \' + class_names[i])\n    plt.plot(fpr, tpr, label = \'U-ones: AUC = %0.2f\' % roc_auc)\n\n    plt.legend(loc = \'lower right\')\n    plt.plot([0, 1], [0, 1],\'r--\')\n    plt.xlim([0, 1])\n    plt.ylim([0, 1])\n    plt.ylabel(\'True Positive Rate\')\n    plt.xlabel(\'False Positive Rate\')\n\nfig_size = plt.rcParams["figure.figsize"]\nfig_size[0] = 30\nfig_size[1] = 10\nplt.rcParams["figure.figsize"] = fig_size\nplt.show()'