In [1]:
import torch
import os
import numpy as np
from sklearn.linear_model import RidgeClassifierCV, RidgeClassifier, LogisticRegression, Lasso
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import roc_auc_score
import warnings
warnings.filterwarnings("ignore")

In [2]:
dir_path = "C://Users/Ashok/Documents/MS/ensemble/"

In [3]:
train_other = torch.load(os.path.join(dir_path, "ensemble_train_other.pt"))["inputs"]
valid_other = torch.load(os.path.join(dir_path, "ensemble_valid_other.pt"))["inputs"]

In [4]:
data1 = torch.load(os.path.join(dir_path, "ensemble_train_inputs_model_1"))
data2 = torch.load(os.path.join(dir_path, "ensemble_train_inputs_model_2"))
data3 = torch.load(os.path.join(dir_path, "ensemble_train_inputs_model_3"))
data4 = torch.load(os.path.join(dir_path, "ensemble_train_inputs_model_4"))
data5 = torch.load(os.path.join(dir_path, "ensemble_train_inputs_model_5"))
data6 = torch.load(os.path.join(dir_path, "ensemble_train_inputs_model_6"))

vdata1 = torch.load(os.path.join(dir_path, "ensemble_valid_inputs_model_1"))
vdata2 = torch.load(os.path.join(dir_path, "ensemble_valid_inputs_model_2"))
vdata3 = torch.load(os.path.join(dir_path, "ensemble_valid_inputs_model_3"))
vdata4 = torch.load(os.path.join(dir_path, "ensemble_valid_inputs_model_4"))
vdata5 = torch.load(os.path.join(dir_path, "ensemble_valid_inputs_model_5"))
vdata6 = torch.load(os.path.join(dir_path, "ensemble_valid_inputs_model_6"))

In [5]:
assert torch.all(data1["inputs"][1] == data2["inputs"][1])
assert torch.all(data2["inputs"][1] == data3["inputs"][1])
assert torch.all(data3["inputs"][1] == data4["inputs"][1])
assert torch.all(data4["inputs"][1] == data5["inputs"][1])
assert torch.all(data5["inputs"][1] == data6["inputs"][1])

assert torch.all(vdata1["inputs"][1] == vdata2["inputs"][1])
assert torch.all(vdata2["inputs"][1] == vdata3["inputs"][1])
assert torch.all(vdata3["inputs"][1] == vdata4["inputs"][1])
assert torch.all(vdata4["inputs"][1] == vdata5["inputs"][1])
assert torch.all(vdata5["inputs"][1] == vdata6["inputs"][1])

In [6]:
np.random.seed(0)
all_scores = []
probs, labels = [], []
for i in range(5):
    d1 = data1["inputs"][0][:,i].unsqueeze(dim=1).numpy()
    d2 = data2["inputs"][0][:,i].unsqueeze(dim=1).numpy()
    d3 = data3["inputs"][0][:,i].unsqueeze(dim=1).numpy()
    d4 = data4["inputs"][0][:,i].unsqueeze(dim=1).numpy()
    d5 = data5["inputs"][0][:,i].unsqueeze(dim=1).numpy()
    d6 = data6["inputs"][0][:,i].unsqueeze(dim=1).numpy()

    vd1 = vdata1["inputs"][0][:,i].unsqueeze(dim=1).numpy()
    vd2 = vdata2["inputs"][0][:,i].unsqueeze(dim=1).numpy()
    vd3 = vdata3["inputs"][0][:,i].unsqueeze(dim=1).numpy()
    vd4 = vdata4["inputs"][0][:,i].unsqueeze(dim=1).numpy()
    vd5 = vdata5["inputs"][0][:,i].unsqueeze(dim=1).numpy()
    vd6 = vdata6["inputs"][0][:,i].unsqueeze(dim=1).numpy()

    X = np.hstack((d1, d2, d3, d4, d5, d6, train_other))
    X_valid = np.hstack((vd1, vd2, vd3, vd4, vd5, vd6, valid_other))

    y = data1["inputs"][1][:,i].numpy()
    y_valid = vdata1["inputs"][1][:,i].numpy()

    ridge = RidgeClassifier()
    rf = RandomForestClassifier()
    lr = LogisticRegression()
    svc = SVC(probability=True)
    lasso = Lasso()
    nb = GaussianNB()

    param_grid = {"alpha": [0.00001, 0.0001, 0.001, 0.1, 1, 10]}
    param_grid_rf = {"n_estimators": [10, 50, 100]}
    grid = GridSearchCV(lasso, param_grid=param_grid, cv=5)
    grid.fit(X, y)
    
    score_from_ensemble = roc_auc_score(y_valid, grid.predict(X_valid))
#     score_from_ensemble = roc_auc_score(y_valid, nb.predict_proba(X_valid)[:,1])
    
    scores = []
    probs.append(grid.predict(X_valid))
    labels.append(y_valid)
    for i in range(6):
        scores.append(roc_auc_score(y_valid, X_valid[:,i]))

    score_from_single = max(scores)
    
    all_scores.append((score_from_single, score_from_ensemble))

In [7]:
training_classes = ['Cardiomegaly', 'Edema', 'Consolidation', 'Atelectasis', 'Pleural Effusion']

In [8]:
for i in range(5):
    print("AUC for {:20}, Single: {:.3f}, Ensemble: {:.3f}".format(training_classes[i], all_scores[i][0], all_scores[i][1]))

AUC for Cardiomegaly        , Single: 0.862, Ensemble: 0.818
AUC for Edema               , Single: 0.918, Ensemble: 0.918
AUC for Consolidation       , Single: 0.937, Ensemble: 0.886
AUC for Atelectasis         , Single: 0.821, Ensemble: 0.817
AUC for Pleural Effusion    , Single: 0.933, Ensemble: 0.928


In [9]:
overall_auc = roc_auc_score(np.array(labels).flatten(), np.array(probs).flatten())
print("Ovrall AUC: {:.3f}".format(overall_auc))

Ovrall AUC: 0.847
