In [1]:
import torch
import os
import numpy as np
from sklearn.metrics import roc_auc_score
import torch.nn as nn
from dataloader import evaluate_singlemodel, ChestImages, device
from config import P, paths
from torch.utils.data import DataLoader
from models import ResNet18

import matplotlib.pyplot as plt
import math
import sys

In [2]:
def plot_auc(axes, probs, labels, class_names):
    for i in range(len(class_names) + 1):
        ax = axes.flatten()[i]
        if i == len(class_names):
            fpr, tpr, threshold = metrics.roc_curve(labels.flatten(), probs.flatten())
            title = "ROC for {}".format("Overall")
        else:
            fpr, tpr, threshold = metrics.roc_curve(labels[:,i], probs[:,i])
            title = "ROC for {}".format(class_names[i])

        roc_auc = metrics.auc(fpr, tpr)

        ax.set_title(title)
        ax.plot(fpr, tpr, label = "{}: AUC = {:.3f}".format(model._get_name(), roc_auc))
        ax.legend(loc = 'lower right')
        ax.plot([0, 1], [0, 1],'r--')
        ax.set_xlim([0, 1])
        ax.set_ylim([0, 1])
        if (i == 0) or (i == 3):
            ax.set_ylabel('True Positive Rate')
        if i >= 3:
            ax.set_xlabel('False Positive Rate')

In [3]:
def get_prob_labels(path):
    criterion = nn.BCEWithLogitsLoss()
    
    all_probs = []
    for file in os.listdir(path):
        checkpoint = torch.load(os.path.join(path, file), map_location=device)
        model = ResNet18(out_size=5)
        model.load_state_dict(checkpoint["model_state_dict"])
        _ = model.to(device)
        probs, labels, loss = evaluate_singlemodel(model, criterion, dataloaders["valid"])
        probs = torch.cat(probs).numpy()
        

        all_probs += [probs]
    probs_mean = np.mean(np.stack(all_probs), axis=0)
    
    class_names = checkpoint["classes"]
    labels = torch.cat(labels).numpy()
    
    return labels, probs_mean, class_names

In [7]:
if os.name == "nt":
    dir_path = "C://Users/Ashok/Documents/MS/models/"
else:
    dir_path = "/Users/ashok/Downloads/Chexpert/models/"

folders = [folder for folder in os.listdir(dir_path) if folder[0] != "."]

for folder in folders:
    print(folder)

ResNet18_ignore_all_2019.12.07.16.49.53
[Completed] ResNet18_ignore_2019.12.06.19.24.38
[Completed] ResNet18_one_2019.12.06.20.31.30
[Completed] ResNet18_one_all_2019.12.06.19.46.36
[Completed] ResNet18_zero_2019.12.06.20.42.26
[Completed] ResNet18_zero_all_2019.12.06.22.31.14


In [8]:
train_data = ChestImages(paths[os.name][P.dataset]["train_location"], 
        paths[os.name][P.dataset]["dirpath"], 
        P,
        frac=1.0)

valid_data = ChestImages(paths[os.name][P.dataset]["valid_location"], 
    paths[os.name][P.dataset]["dirpath"], 
    P,
    frac=1.0)

dataloaders = {
    "train": DataLoader(train_data, 
        batch_size=P.batch_size, 
        shuffle=False, 
        num_workers=P.num_workers),

    "valid": DataLoader(valid_data, 
        batch_size=P.batch_size, 
        shuffle=False, 
        num_workers=P.num_workers)
}

In [9]:
def overall_auc(probs, labels, classes_type):
    keys = (2, 5, 6, 8, 10)
    probs = torch.cat(probs).numpy()
    labels = torch.cat(labels).numpy()
    
    if classes_type == "all":
        probs = probs[:,keys]
        labels = labels[:, keys]
            
    probs_flat = probs.flatten()
    labels_flat = labels.flatten()
    
    return roc_auc_score(labels_flat, probs_flat)

In [None]:
criterion = nn.BCEWithLogitsLoss()

for folder in folders:
    files = os.listdir(os.path.join(dir_path, folder))
    outsize = 14 if "all" in folder else 5
    classes_type = "all" if "all" in folder else "subset"
    
    train_data = ChestImages(paths[os.name][P.dataset]["train_location"], 
        paths[os.name][P.dataset]["dirpath"], 
        P,
        frac=1.0,
        classes_type=classes_type)

    valid_data = ChestImages(paths[os.name][P.dataset]["valid_location"], 
        paths[os.name][P.dataset]["dirpath"], 
        P,
        frac=1.0,
        classes_type=classes_type)

    dataloaders = {
        "train": DataLoader(train_data, 
            batch_size=P.batch_size, 
            shuffle=False, 
            num_workers=P.num_workers),

        "valid": DataLoader(valid_data, 
            batch_size=P.batch_size, 
            shuffle=False, 
            num_workers=P.num_workers)
    }
    
    
    for file in files:
        path = os.path.join(dir_path, folder, file)
        checkpoint = torch.load(path, map_location=device)
        
        model = ResNet18(out_size=outsize)
        model.load_state_dict(checkpoint["model_state_dict"])
        _ = model.to(device)
        probs, labels, loss = evaluate_singlemodel(model, criterion, dataloaders["valid"])
        auc = overall_auc(probs, labels, classes_type)
        print("{:50}, {:20}, {:.5f}".format(folder, file, auc))

# Ensemble building 

In [6]:
bestmodels = [
    "[Completed] ResNet18_ignore_2019.12.06.19.24.38/epoch1_itr1900.pt",
    "[Completed] ResNet18_one_2019.12.06.20.31.30/epoch2_itr1300.pt",
    "[Completed] ResNet18_one_all_2019.12.06.19.46.36/epoch2_itr3000.pt",
    "[Completed] ResNet18_zero_2019.12.06.20.42.26/epoch1_itr2600.pt",
    "[Completed] ResNet18_zero_all_2019.12.06.22.31.14/epoch2_itr1500.pt",
    "ResNet18_ignore_all_2019.12.07.16.49.53/epoch1_itr2700.pt"
]

In [27]:
criterion = nn.BCEWithLogitsLoss()
keys = (2, 5, 6, 8, 10)

datatype = "train"
frac = 0.5

inputs = []

for i, bestmodel in enumerate(bestmodels):
    torch.manual_seed(0)
    np.random.seed(0)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    
    model_num = "model_{}".format(i+1)
    classes_type = "all" if "all" in bestmodel else "subset"
    
    train_data = ChestImages(paths[os.name][P.dataset]["train_location"], 
        paths[os.name][P.dataset]["dirpath"], 
        P,
        frac=frac,
        classes_type=classes_type)

    valid_data = ChestImages(paths[os.name][P.dataset]["valid_location"], 
        paths[os.name][P.dataset]["dirpath"], 
        P,
        frac=1.0,
        classes_type=classes_type)

    dataloaders = {
        "train": DataLoader(train_data, 
            batch_size=64, 
            shuffle=False, 
            num_workers=P.num_workers),

        "valid": DataLoader(valid_data, 
            batch_size=32, 
            shuffle=False, 
            num_workers=P.num_workers)
    }
    
    bestmodel_path = os.path.join(dir_path, bestmodel)
    outsize = 14 if "all" in bestmodel else 5
    model = ResNet18(out_size=outsize)
    _ = model.to(device)
    checkpoint = torch.load(bestmodel_path, map_location=device)
    model.load_state_dict(checkpoint["model_state_dict"])
    probs, labels, loss = evaluate_singlemodel(model, criterion, dataloaders[datatype])
    probs, labels = torch.cat(probs), torch.cat(labels)
    if classes_type == "all":
        probs = probs[:,keys]
        labels = labels[:,keys]
    print(probs.shape, labels.shape)
    filename = "ensemble_{}_inputs_{}".format(datatype, model_num)
    torch.save({"inputs": (probs, labels)}, filename)

torch.Size([111707, 5]) torch.Size([111707, 5])
torch.Size([111707, 5]) torch.Size([111707, 5])
torch.Size([111707, 5]) torch.Size([111707, 5])
torch.Size([111707, 5]) torch.Size([111707, 5])
torch.Size([111707, 5]) torch.Size([111707, 5])
torch.Size([111707, 5]) torch.Size([111707, 5])


In [28]:
datatype = "train"

torch.manual_seed(0)
np.random.seed(0)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

sex, frontal_lateral, ap, pa, age = [], [], [], [], []
for data in dataloaders[datatype]:
    sex += [data["sex"]]
    frontal_lateral += [data["frontal_lateral"]]
    ap += [data["ap"]]
    pa += [data["pa"]]
    age += [data["age"]]
    
inputs = torch.stack([torch.cat(sex), torch.cat(frontal_lateral), torch.cat(ap), torch.cat(pa), torch.cat(age)]).T

torch.save({"inputs": inputs}, "ensemble_{}_other.pt".format(datatype))