# Confusion matrices and ROC Curves

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from openpyxl import load_workbook
from glob import glob
from sklearn.metrics import confusion_matrix, accuracy_score, f1_score, roc_auc_score, roc_curve, auc, average_precision_score
from sklearn.preprocessing import OneHotEncoder
import matplotlib
from tqdm.notebook import tqdm
from sklearn.preprocessing import label_binarize
data_types = ['WSI', 'RNA', 'miRNA', 'CNV', 'DNA']

## Two sources without missing info

### ROC CURVES

In [None]:
import os
os.mkdir('../result_files/two-sources-integration/plots')

In [None]:
i = 0
j = 1

auc_late = {}
all_real = {}
late_probs = {}
late_real = {}
late_preds = {}

for d_type1 in data_types[i:]:
    for d_type2 in data_types[j:]:
        name = d_type1 + '-' + d_type2
        data_model = pd.read_excel('../result_files/two-sources-integration/data_integration_model_test_probs_'+name+'.xlsx',
              sheet_name=[0,1,2,3,4,5,6,7,8,9],engine='openpyxl')
        all_real[name] = {d_type1: [], d_type2: [], 'Integration': []}
        late_probs[name] = {d_type1: [], d_type2: [], 'Integration': []}
        for df_name, df in data_model.items():
            # take those where the two sources has data
            df_only = df.loc[(df['Has '+ d_type1] != -1) & (df['Has ' + d_type2] != -1)]
            df_dt1 = df_only.loc[df_only['Has '+ d_type1] != -1]
            df_dt2 = df_only.loc[df_only['Has '+ d_type2] != -1]
            probs = [[x,y,z] for x,y,z in zip(df_only['Integration Prob LUAD'], df_only['Integration Prob HLT'], df_only['Integration Prob LUSC'])]
            probs = np.asarray(probs)
            real = df_only['Real'].values
            all_real[name]['Integration'].append(real)
            late_probs[name]['Integration'].append(probs)
            
            # dtype 1
            probs = [[x,y,z] for x,y,z in zip(df_only[d_type1+' Prob LUAD'], df_only[d_type1+' Prob HLT'], df_only[d_type1+' Prob LUSC'])]
            probs = np.asarray(probs)
            all_real[name][d_type1].append(real)
            late_probs[name][d_type1].append(probs)
            
            # dtype 2
            probs = [[x,y,z] for x,y,z in zip(df_only[d_type2+' Prob LUAD'], df_only[d_type2+' Prob HLT'], df_only[d_type2+' Prob LUSC'])]
            probs = np.asarray(probs)
            all_real[name][d_type2].append(real)
            late_probs[name][d_type2].append(probs) 
            
            
    i += 1
    j = i + 1

In [None]:
late_probs['WSI-RNA']['RNA'].shape

In [None]:
font = {
        'weight' : 'bold',
        'size'   : 12}

matplotlib.rc('font', **font)

i=0
j=1
for d_type1 in data_types[i:]:
    for d_type2 in data_types[j:]:
        name = d_type1 + '-' + d_type2
        real_ = np.concatenate(all_real[name]['Integration'])
        real_binarized = label_binarize(real_, classes=[*range(3)])
        n_values = np.max(real_) + 1
        y_hot = np.eye(n_values)[real_]
        n_values = np.max(real_) + 1

         
        fpr_types = {'Integration': dict(), d_type1: dict(), d_type2: dict()}
        tpr_types = {'Integration': dict(), d_type1: dict(), d_type2: dict()}
        roc_auc_types = {'Integration': dict(), d_type1: dict(), d_type2: dict()}
        # Compute ROC curve and ROC area for each class
        for d_name in ['Integration', d_type1, d_type2]:
            late_probs[name][d_name] = np.concatenate(late_probs[name][d_name])
            for cl in range(3):
                fpr_types[d_name][cl], tpr_types[d_name][cl], _ = roc_curve(y_hot[:, cl], late_probs[name][d_name][:, cl])
                #roc_auc_types[d_name][cl] = auc(fpr_types[d_name][cl], tpr_types[d_name][cl])
                roc_auc_types[d_name][cl] = average_precision_score(real_binarized[:, cl], late_probs[name][d_name][:, cl])
        
        plt.figure()
        lw = 2
        plt.plot(fpr_types['Integration'][0], tpr_types['Integration'][0], color='darkorange',
        lw=lw, label='Integration ROC curve (AUPRC = %0.3f)' % roc_auc_types['Integration'][0])
        plt.plot(fpr_types[d_type1][0], tpr_types[d_type1][0], color='darkgreen',
        lw=lw, label='%s ROC curve (AUPRC = %0.3f)' % (d_type1, roc_auc_types[d_type1][0]))
        plt.plot(fpr_types[d_type2][0], tpr_types[d_type2][0], color='darkblue',
        lw=lw, label='%s ROC curve (AUPRC = %0.3f)' % (d_type2, roc_auc_types[d_type2][0]))
        plt.plot([0, 1], [0, 1], color='black', lw=lw, linestyle='--')
        plt.xlim([0.0, 1.0])
        plt.ylim([0.0, 1.05])
        plt.xlabel('False Positive Rate')
        plt.ylabel('True Positive Rate')
        plt.title('ROC for LUAD class (%s)' % name)
        plt.legend(loc="lower right")
        path_save = '../result_files/two-sources-integration/plots/'
        plt.savefig(path_save+name+'_test_auprc_LUAD.eps', dpi=300, format='eps')
        plt.show()
        plt.close()
        
        plt.figure()
        lw = 2
        plt.plot(fpr_types['Integration'][1], tpr_types['Integration'][1], color='darkorange',
        lw=lw, label='Integration ROC curve (AUPRC = %0.3f)' % roc_auc_types['Integration'][1])
        plt.plot(fpr_types[d_type1][1], tpr_types[d_type1][1], color='darkgreen',
        lw=lw, label='%s ROC curve (AUPRC = %0.3f)' % (d_type1, roc_auc_types[d_type1][1]))
        plt.plot(fpr_types[d_type2][1], tpr_types[d_type2][1], color='darkblue',
        lw=lw, label='%s ROC curve (AUPRC = %0.3f)' % (d_type2, roc_auc_types[d_type2][1]))
        plt.plot([0, 1], [0, 1], color='black', lw=lw, linestyle='--')
        plt.xlim([0.0, 1.0])
        plt.ylim([0.0, 1.05])
        plt.xlabel('False Positive Rate')
        plt.ylabel('True Positive Rate')
        plt.title('ROC for HLT class (%s)' % name)
        plt.legend(loc="lower right")
        path_save = '../result_files/two-sources-integration/plots/'
        plt.savefig(path_save+name+'_test_auprc_HLT.eps', dpi=300, format='eps')
        plt.show()
        plt.close()
        
        plt.figure()
        lw = 2
        plt.plot(fpr_types['Integration'][2], tpr_types['Integration'][2], color='darkorange',
        lw=lw, label='Integration ROC curve (AUPRC = %0.3f)' % roc_auc_types['Integration'][2])
        plt.plot(fpr_types[d_type1][2], tpr_types[d_type1][2], color='darkgreen',
        lw=lw, label='%s ROC curve (AUPRC = %0.3f)' % (d_type1, roc_auc_types[d_type1][2]))
        plt.plot(fpr_types[d_type2][2], tpr_types[d_type2][2], color='darkblue',
        lw=lw, label='%s ROC curve (AUPRC = %0.3f)' % (d_type2, roc_auc_types[d_type2][2]))
        plt.plot([0, 1], [0, 1], color='black', lw=lw, linestyle='--')
        plt.xlim([0.0, 1.0])
        plt.ylim([0.0, 1.05])
        plt.xlabel('False Positive Rate')
        plt.ylabel('True Positive Rate')
        plt.title('ROC for LUSC class (%s)' % name)
        plt.legend(loc="lower right")
        path_save = '../result_files/two-sources-integration/plots/'
        plt.savefig(path_save+name+'_test_auprc_LUSC.eps', dpi=300, format='eps')
        plt.show()
        plt.close()
        
       
            
    i += 1
    j = i + 1


### CMs

In [None]:
i = 0
j = 1

all_real = {}
late_preds = {}

for d_type1 in data_types[i:]:
    for d_type2 in data_types[j:]:
        name = d_type1 + '-' + d_type2
        data_model = pd.read_excel('../result_files/two-sources-integration/data_integration_model_test_probs_'+name+'.xlsx',
              sheet_name=[0,1,2,3,4,5,6,7,8,9],engine='openpyxl')
        all_real[name] = []
        late_preds[name] = []
        for df_name, df in data_model.items():
            # take those where the two sources has data
            df_only = df.loc[(df['Has '+ d_type1] != -1) & (df['Has ' + d_type2] != -1)]
            df_dt1 = df_only.loc[df_only['Has '+ d_type1] != -1]
            df_dt2 = df_only.loc[df_only['Has '+ d_type2] != -1]
            real = df_only['Real'].values
            all_real[name].append(real)
            late_preds[name].append(df_only['Integration Pred'].values)
            
    i += 1
    j = i + 1

In [None]:
import itertools

font = {
        'weight' : 'bold',
        'size'   : 12}

matplotlib.rc('font', **font)
def plot_cm(real_labels, preds, title, save_name):
    cm = confusion_matrix(real_labels, preds)

    cmap = None
    target_names = ['LUAD', 'Control', 'LUSC']
    normalize = False

    if cmap is None:
        cmap = plt.get_cmap('Blues')

    plt.figure(figsize=(8, 6))
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()

    if target_names is not None:
        tick_marks = np.arange(len(target_names))
        plt.xticks(tick_marks, target_names, rotation=45)
        plt.yticks(tick_marks, target_names)

    if normalize:
        cm_cnn = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]


    thresh = cm.max() / 1.5 if normalize else cm.max() / 2
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        if normalize:
            plt.text(j, i, "{:0.4f}".format(cm[i, j]),
                        horizontalalignment="center",
                        color="white" if cm[i, j] > thresh else "black")
        else:
            plt.text(j, i, "{:,}".format(cm[i, j]),
                        horizontalalignment="center",
                        color="white" if cm[i, j] > thresh else "black")


    plt.tight_layout()
    plt.ylabel('Predicted label')
    #print('Saving file...')
    plt.savefig(save_name, dpi=300, bbox_inches = "tight", format='png')
    plt.show()

In [None]:
i = 0
j = 1

path = '../result_files/two-sources-integration/plots/'
for d_type1 in data_types[i:]:
    for d_type2 in data_types[j:]:
        name = d_type1 + '-' + d_type2
        if d_type1 == 'DNA':
            d_type1 = 'metDNA'
        elif d_type2 == 'DNA':
            d_type2 = 'metDNA'
        title_name = d_type1 + '-' + d_type2
        plot_cm(np.concatenate(all_real[name]), np.concatenate(late_preds[name]), 
                title=title_name, save_name=path+name+'_cm.png')
            
    i += 1
    j = i + 1


## Three sources without missing info

In [None]:
k = 0
i = 1
j = 2

auc_late = {}
all_real = {}
late_probs = {}
late_real = {}

for d_type1 in data_types[k:]:
    for d_type2 in data_types[i:]:
        for d_type3 in data_types[j:]:
            name = d_type1 + '-' + d_type2 + '-' + d_type3
            data_model = pd.read_excel('../result_files/three-sources-integration/data_integration_model_test_probs_'+name+'.xlsx',
              sheet_name=[0,1,2,3,4,5,6,7,8,9],engine='openpyxl')
            all_real[name] = {d_type1: [], d_type2: [], d_type3: [], 'Integration': []}
            late_probs[name] = {d_type1: [], d_type2: [], d_type3: [], 'Integration': []}
            for df_name, df in data_model.items():
                # take those where the two sources has data
                df_only = df.loc[(df['Has '+ d_type1] != -1) & (df['Has ' + d_type2] != -1) & (df['Has ' + d_type3] != -1)]
                df_dt1 = df_only.loc[df_only['Has '+ d_type1] != -1]
                df_dt2 = df_only.loc[df_only['Has '+ d_type2] != -1]
                probs = [[x,y,z] for x,y,z in zip(df_only['Integration Prob LUAD'], df_only['Integration Prob HLT'], df_only['Integration Prob LUSC'])]
                probs = np.asarray(probs)
                real = df_only['Real'].values
                all_real[name]['Integration'].append(real)
                late_probs[name]['Integration'].append(probs)

                # dtype 1
                probs = [[x,y,z] for x,y,z in zip(df_only[d_type1+' Prob LUAD'], df_only[d_type1+' Prob HLT'], df_only[d_type1+' Prob LUSC'])]
                probs = np.asarray(probs)
                all_real[name][d_type1].append(real)
                late_probs[name][d_type1].append(probs)

                # dtype 2
                probs = [[x,y,z] for x,y,z in zip(df_only[d_type2+' Prob LUAD'], df_only[d_type2+' Prob HLT'], df_only[d_type2+' Prob LUSC'])]
                probs = np.asarray(probs)
                all_real[name][d_type2].append(real)
                late_probs[name][d_type2].append(probs)
                
                # dtype 3
                probs = [[x,y,z] for x,y,z in zip(df_only[d_type3+' Prob LUAD'], df_only[d_type3+' Prob HLT'], df_only[d_type3+' Prob LUSC'])]
                probs = np.asarray(probs)
                all_real[name][d_type3].append(real)
                late_probs[name][d_type3].append(probs)
                
        j += 1
    k += 1    
    i = k + 1
    j = i + 1

In [None]:
font = {
        'weight' : 'bold',
        'size'   : 12}

matplotlib.rc('font', **font)

k = 0
i = 1
j = 2

for d_type1 in data_types[k:]:
    for d_type2 in data_types[i:]:
        for d_type3 in data_types[j:]:
            name = d_type1 + '-' + d_type2 + '-' + d_type3
            real_ r= np.concatenate(all_real[name]['Integration'])
            real_binarized = label_binarize(real_, classes=[*range(3)])
            n_values = np.max(real_) + 1
            y_hot = np.eye(n_values)[real_]
            n_values = np.max(real_) + 1


            fpr_types = {'Integration': dict(), d_type1: dict(), d_type2: dict(), d_type3: dict()}
            tpr_types = {'Integration': dict(), d_type1: dict(), d_type2: dict(), d_type3: dict()}
            roc_auc_types = {'Integration': dict(), d_type1: dict(), d_type2: dict(), d_type3: dict()}
            # Compute ROC curve and ROC area for each class
            for d_name in ['Integration', d_type1, d_type2, d_type3]:
                late_probs[name][d_name] = np.concatenate(late_probs[name][d_name])
                for cl in range(3):
                    fpr_types[d_name][cl], tpr_types[d_name][cl], _ = roc_curve(y_hot[:, cl], late_probs[name][d_name][:, cl])
                    #roc_auc_types[d_name][cl] = auc(fpr_types[d_name][cl], tpr_types[d_name][cl])
                    roc_auc_types[d_name][cl] = average_precision_score(real_binarized[:, cl], late_probs[name][d_name][:, cl])
            plt.figure()
            lw = 2
            for d_name, c in zip(['Integration', d_type1, d_type2, d_type3], ['darkorange', 'darkgreen', 'darkblue', 'darkred']):
                plt.plot(fpr_types[d_name][0], tpr_types[d_name][0], color=c,
                lw=lw, label='%s ROC curve (AUPRC = %0.3f)' % (d_name, roc_auc_types[d_name][0]))
            plt.plot([0, 1], [0, 1], color='black', lw=lw, linestyle='--')
            plt.xlim([0.0, 1.0])
            plt.ylim([0.0, 1.05])
            plt.xlabel('False Positive Rate')
            plt.ylabel('True Positive Rate')
            plt.title('ROC for LUAD class (%s)' % name)
            plt.legend(loc="lower right")
            path_save = '../result_files/three-sources-integration/plots/'
            plt.savefig(path_save+name+'_test_auprc_LUAD.eps', dpi=300, format='eps')
            plt.show()
            plt.close()

            plt.figure()
            lw = 2
            for d_name, c in zip(['Integration', d_type1, d_type2, d_type3], ['darkorange', 'darkgreen', 'darkblue', 'darkred']):
                plt.plot(fpr_types[d_name][1], tpr_types[d_name][1], color=c,
                lw=lw, label='%s ROC curve (AUPRC = %0.3f)' % (d_name, roc_auc_types[d_name][1]))
            plt.plot([0, 1], [0, 1], color='black', lw=lw, linestyle='--')
            plt.xlim([0.0, 1.0])
            plt.ylim([0.0, 1.05])
            plt.xlabel('False Positive Rate')
            plt.ylabel('True Positive Rate')
            plt.title('ROC for HLT class (%s)' % name)
            plt.legend(loc="lower right")
            path_save = '../result_files/three-sources-integration/plots/'
            plt.savefig(path_save+name+'_test_auprc_HLT.eps', dpi=300, format='eps')
            plt.show()
            plt.close()

            plt.figure()
            lw = 2
            for d_name, c in zip(['Integration', d_type1, d_type2, d_type3], ['darkorange', 'darkgreen', 'darkblue', 'darkred']):
                plt.plot(fpr_types[d_name][2], tpr_types[d_name][2], color=c,
                lw=lw, label='%s ROC curve (AUPRC = %0.3f)' % (d_name, roc_auc_types[d_name][2]))
            plt.plot([0, 1], [0, 1], color='black', lw=lw, linestyle='--')
            plt.xlim([0.0, 1.0])
            plt.ylim([0.0, 1.05])
            plt.xlabel('False Positive Rate')
            plt.ylabel('True Positive Rate')
            plt.title('ROC for LUSC class (%s)' % name)
            plt.legend(loc="lower right")
            path_save = '../result_files/three-sources-integration/plots/'
            plt.savefig(path_save+name+'_test_auprc_LUSC.eps', dpi=300, format='eps')
            plt.show()
            plt.close()
        
       
            
        j += 1
    k += 1    
    i = k + 1
    j = i + 1


In [None]:


all_real = {}
late_preds = {}

k = 0
i = 1
j = 2

for d_type1 in data_types[k:]:
    for d_type2 in data_types[i:]:
        for d_type3 in data_types[j:]:
            name = d_type1 + '-' + d_type2 + '-' + d_type3
            data_model = pd.read_excel('../result_files/three-sources-integration/data_integration_model_test_probs_'+name+'.xlsx',
              sheet_name=[0,1,2,3,4,5,6,7,8,9],engine='openpyxl')
            all_real[name] = []
            late_preds[name] = []
            for df_name, df in data_model.items():
                # take those where the two sources has data
                df_only = df.loc[(df['Has '+ d_type1] != -1) & (df['Has ' + d_type2] != -1) & (df['Has ' + d_type3] != -1)]
                df_dt1 = df_only.loc[df_only['Has '+ d_type1] != -1]
                df_dt2 = df_only.loc[df_only['Has '+ d_type2] != -1]
                real = df_only['Real'].values
                all_real[name].append(real)
                late_preds[name].append(df_only['Integration Pred'].values)
            
        j += 1
    k += 1    
    i = k + 1
    j = i + 1


In [None]:
k = 0
i = 1
j = 2

path = '../result_files/three-sources-integration/plots/'
for d_type1 in data_types[k:]:
    for d_type2 in data_types[i:]:
        for d_type3 in data_types[j:]:
            name = d_type1 + '-' + d_type2 + '-' + d_type3
            if d_type1 == 'DNA':
                d_type1 = 'metDNA'
            elif d_type2 == 'DNA':
                d_type2 = 'metDNA'
            elif d_type3 == 'DNA':
                d_type3 = 'metDNA'
            title_name = d_type1 + '-' + d_type2 + '-' + d_type3
            plot_cm(np.concatenate(all_real[name]), np.concatenate(late_preds[name]), 
            title=title_name, save_name=path+name+'_cm.png')
            
        j += 1
    k += 1    
    i = k + 1
    j = i + 1

## Four sources without missing info

In [None]:
k = 0
i = 1
j = 2
z = 3

auc_late = {}
all_real = {}
late_probs = {}
late_real = {}


for d_type1 in data_types[k:]:
    for d_type2 in data_types[i:]:
        for d_type3 in data_types[j:]:
            for d_type4 in data_types[z:]:
                name = d_type1 + '-' + d_type2 + '-' + d_type3 + '-' + d_type4
                data_model = pd.read_excel('../result_files/four-sources-integration/data_integration_model_test_probs_'+name+'.xlsx',
                  sheet_name=[0,1,2,3,4,5,6,7,8,9],engine='openpyxl')
        
                all_real[name] = {d_type1: [], d_type2: [], d_type3: [], d_type4: [],'Integration': []}
                late_probs[name] = {d_type1: [], d_type2: [], d_type3: [], d_type4: [],'Integration': []}
                for df_name, df in data_model.items():
                    # take those where the two sources has data
                    df_only = df.loc[(df['Has '+ d_type1] != -1) & (df['Has ' + d_type2] != -1) & (df['Has ' + d_type3] != -1) & (df['Has ' + d_type4] != -1)]
                    df_dt1 = df_only.loc[df_only['Has '+ d_type1] != -1]
                    df_dt2 = df_only.loc[df_only['Has '+ d_type2] != -1]
                    probs = [[x,y,z] for x,y,z in zip(df_only['Integration Prob LUAD'], df_only['Integration Prob HLT'], df_only['Integration Prob LUSC'])]
                    probs = np.asarray(probs)
                    real = df_only['Real'].values
                    all_real[name]['Integration'].append(real)
                    late_probs[name]['Integration'].append(probs)

                    # dtype 1
                    probs = [[x,y,z] for x,y,z in zip(df_only[d_type1+' Prob LUAD'], df_only[d_type1+' Prob HLT'], df_only[d_type1+' Prob LUSC'])]
                    probs = np.asarray(probs)
                    all_real[name][d_type1].append(real)
                    late_probs[name][d_type1].append(probs)

                    # dtype 2
                    probs = [[x,y,z] for x,y,z in zip(df_only[d_type2+' Prob LUAD'], df_only[d_type2+' Prob HLT'], df_only[d_type2+' Prob LUSC'])]
                    probs = np.asarray(probs)
                    all_real[name][d_type2].append(real)
                    late_probs[name][d_type2].append(probs)

                    # dtype 3
                    probs = [[x,y,z] for x,y,z in zip(df_only[d_type3+' Prob LUAD'], df_only[d_type3+' Prob HLT'], df_only[d_type3+' Prob LUSC'])]
                    probs = np.asarray(probs)
                    all_real[name][d_type3].append(real)
                    late_probs[name][d_type3].append(probs)
                    
                    # dtype 4
                    probs = [[x,y,z] for x,y,z in zip(df_only[d_type4+' Prob LUAD'], df_only[d_type4+' Prob HLT'], df_only[d_type4+' Prob LUSC'])]
                    probs = np.asarray(probs)
                    all_real[name][d_type4].append(real)
                    late_probs[name][d_type4].append(probs)
            z += 1
        j += 1
    k += 1    
    i = k + 1
    j = i + 1
    z = j + 1

In [None]:
k = 0
i = 1
j = 2
z = 3

font = {
        'weight' : 'bold',
        'size'   : 10}

matplotlib.rc('font', **font)

for d_type1 in data_types[k:]:
    for d_type2 in data_types[i:]:
        for d_type3 in data_types[j:]:
            for d_type4 in data_types[z:]:
                name = d_type1 + '-' + d_type2 + '-' + d_type3 + '-' + d_type4
        
                real_ = np.concatenate(all_real[name]['Integration'])
                real_binarized = label_binarize(real_, classes=[*range(3)])
                n_values = np.max(real_) + 1
                y_hot = np.eye(n_values)[real_]
                n_values = np.max(real_) + 1


                fpr_types = {'Integration': dict(), d_type1: dict(), d_type2: dict(), d_type3: dict(), d_type4: dict()}
                tpr_types = {'Integration': dict(), d_type1: dict(), d_type2: dict(), d_type3: dict(), d_type4: dict()}
                roc_auc_types = {'Integration': dict(), d_type1: dict(), d_type2: dict(), d_type3: dict(), d_type4: dict()}
                # Compute ROC curve and ROC area for each class
                for d_name in ['Integration', d_type1, d_type2, d_type3, d_type4]:
                    late_probs[name][d_name] = np.concatenate(late_probs[name][d_name])
                    for cl in range(3):
                        fpr_types[d_name][cl], tpr_types[d_name][cl], _ = roc_curve(y_hot[:, cl], late_probs[name][d_name][:, cl])
                        #roc_auc_types[d_name][cl] = auc(fpr_types[d_name][cl], tpr_types[d_name][cl])
                        roc_auc_types[d_name][cl] = average_precision_score(real_binarized[:, cl], late_probs[name][d_name][:, cl])
                plt.figure()
                lw = 2
                for d_name, c in zip(['Integration', d_type1, d_type2, d_type3, d_type4], ['darkorange', 'darkgreen', 'darkblue', 'darkred', 'purple']):
                    print(d_name)
                    plt.plot(fpr_types[d_name][0], tpr_types[d_name][0], color=c,
                    lw=lw, label='%s ROC curve (AUPRC = %0.3f)' % (d_name, roc_auc_types[d_name][0]))
                plt.plot([0, 1], [0, 1], color='black', lw=lw, linestyle='--')
                plt.xlim([0.0, 1.0])
                plt.ylim([0.0, 1.05])
                plt.xlabel('False Positive Rate')
                plt.ylabel('True Positive Rate')
                plt.title('ROC for LUAD class (%s)' % name)
                plt.legend(loc="lower right")
                path_save = '../result_files/four-sources-integration/plots/'
                plt.savefig(path_save+name+'_test_auprc_LUAD.eps', dpi=300, format='eps')
                plt.show()
                plt.close()

                plt.figure()
                lw = 2
                for d_name, c in zip(['Integration', d_type1, d_type3, d_type2, d_type4], ['darkorange', 'darkgreen', 'darkblue', 'darkred', 'purple']):
                    plt.plot(fpr_types[d_name][1], tpr_types[d_name][1], color=c,
                    lw=lw, label='%s ROC curve (AUPRC = %0.3f)' % (d_name, roc_auc_types[d_name][1]))
                plt.plot([0, 1], [0, 1], color='black', lw=lw, linestyle='--')
                plt.xlim([0.0, 1.0])
                plt.ylim([0.0, 1.05])
                plt.xlabel('False Positive Rate')
                plt.ylabel('True Positive Rate')
                plt.title('ROC for HLT class (%s)' % name)
                plt.legend(loc="lower right")
                path_save = '../result_files/four-sources-integration/plots/'
                plt.savefig(path_save+name+'_test_auprc_HLT.eps', dpi=300, format='eps')
                plt.show()
                plt.close()

                plt.figure()
                lw = 2
                for d_name, c in zip(['Integration', d_type1, d_type2, d_type3, d_type4], ['darkorange', 'darkgreen', 'darkblue', 'darkred', 'purple']):
                    plt.plot(fpr_types[d_name][2], tpr_types[d_name][2], color=c,
                    lw=lw, label='%s ROC curve (AUPRC = %0.3f)' % (d_name, roc_auc_types[d_name][2]))
                plt.plot([0, 1], [0, 1], color='black', lw=lw, linestyle='--')
                plt.xlim([0.0, 1.0])
                plt.ylim([0.0, 1.05])
                plt.xlabel('False Positive Rate')
                plt.ylabel('True Positive Rate')
                plt.title('ROC for LUSC class (%s)' % name)
                plt.legend(loc="lower right")
                path_save = '../result_files/four-sources-integration/plots/'
                plt.savefig(path_save+name+'_test_auprc_LUSC.eps', dpi=300, format='eps')
                plt.show()
                plt.close()
            z += 1
        j += 1
    k += 1    
    i = k + 1
    j = i + 1
    z = j + 1

In [None]:
k = 0
i = 1
j = 2
z = 3

all_real = {}
late_preds = {}

for d_type1 in data_types[k:]:
    for d_type2 in data_types[i:]:
        for d_type3 in data_types[j:]:
            for d_type4 in data_types[z:]:
                name = d_type1 + '-' + d_type2 + '-' + d_type3 + '-' + d_type4
                data_model = pd.read_excel('../result_files/four-sources-integration/data_integration_model_test_probs_'+name+'.xlsx',
                  sheet_name=[0,1,2,3,4,5,6,7,8,9],engine='openpyxl')
                all_real[name] = []
                late_preds[name] = []
                for df_name, df in data_model.items():
                    # take those where the two sources has data
                    df_only = df.loc[(df['Has '+ d_type1] != -1) & (df['Has ' + d_type2] != -1) & (df['Has ' + d_type3] != -1)& (df['Has ' + d_type4] != -1)]
                    df_dt1 = df_only.loc[df_only['Has '+ d_type1] != -1]
                    df_dt2 = df_only.loc[df_only['Has '+ d_type2] != -1]
                    real = df_only['Real'].values
                    all_real[name].append(real)
                    late_preds[name].append(df_only['Integration Pred'].values)
                
            z += 1
        j += 1
    k += 1    
    i = k + 1
    j = i + 1
    z = j + 1

In [None]:
k = 0
i = 1
j = 2
z = 3

font = {
        'weight' : 'bold',
        'size'   : 12}

matplotlib.rc('font', **font)

path = '../result_files/four-sources-integration/plots/'
for d_type1 in data_types[k:]:
    for d_type2 in data_types[i:]:
        for d_type3 in data_types[j:]:
            for d_type4 in data_types[z:]:
                name = d_type1 + '-' + d_type2 + '-' + d_type3 + '-' + d_type4
                if d_type1 == 'DNA':
                    d_type1 = 'metDNA'
                elif d_type2 == 'DNA':
                    d_type2 = 'metDNA'
                elif d_type3 == 'DNA':
                    d_type3 = 'metDNA'
                elif d_type4 == 'DNA':
                    d_type4 = 'metDNA'
                plot_cm(np.concatenate(all_real[name]), np.concatenate(late_preds[name]), 
                        title=title_name, save_name=path+name+'_cm.eps')
                
            z += 1
        j += 1
    k += 1    
    i = k + 1
    j = i + 1
    z = j + 1

## All sources without missing info

In [None]:
d_type1 = 'WSI'
d_type2 = 'RNA'
d_type3 = 'miRNA'
d_type4 = 'CNV'
d_type5 = 'DNA'
name = d_type1 + '-' + d_type2 + '-' + d_type3 + '-' + d_type4 + '-' + d_type5
data_model = pd.read_excel('../result_files/data_integration_model_test_probs_SGD-all_sources.xlsx',
sheet_name=[0,1,2,3,4,5,6,7,8,9],engine='openpyxl')
all_real = []
late_preds = []
for df_name, df in data_model.items():
    # take those where the two sources has data
    df_only = df.loc[(df['Has '+ d_type1] != -1) & (df['Has ' + d_type2] != -1) & (df['Has ' + d_type3] != -1)& (df['Has ' + d_type4] != -1) & (df['Has ' + d_type5] != -1)]
    real = df_only['Real'].values
    all_real.append(real)
    late_preds.append(df_only['Integration Pred'].values)


In [None]:
path = '../result_files/'
title_name = d_type1 + '-' + d_type2 + '-' + d_type3 + '-' + d_type4 + '-' + 'metDNA'
plot_cm(np.concatenate(all_real), np.concatenate(late_preds), 
        title=title_name, save_name=path+name+'_cm.png')

## Two sources with missing infro

In [None]:
preds_dtype = {}

real_labels = {}

i = 0
j = 1

for d_type1 in data_types[i:]:
    for d_type2 in data_types[j:]:
        name = d_type1 + '-' + d_type2
        print(name)
        data_model = pd.read_excel('../result_files/two-sources-integration/data_integration_model_test_probs_'+name+'.xlsx',
              sheet_name=[0,1,2,3,4,5,6,7,8,9],engine='openpyxl')
        preds_dtype[name] = []
        real_labels[name] = []
        for df_name, df in data_model.items():
            df_only = df.loc[(df['Has '+ d_type1] != -1) | (df['Has ' + d_type2] != -1)]
                
            preds = df_only['Integration Pred'].values
            
            real = df_only['Real'].values
            preds_dtype[name] = np.concatenate([preds_dtype[name],preds], axis=0)
            real_labels[name] = np.concatenate([real_labels[name],real], axis=0)
    i += 1
    j = i + 1

In [None]:
import matplotlib.pyplot as plt
import matplotlib
import numpy as np
import itertools
from sklearn.metrics import confusion_matrix, f1_score
import pandas as pd

font = {
        'weight' : 'bold',
        'size'   : 16}

matplotlib.rc('font', **font)

w = 0
z = 1

for d_type1 in data_types[w:]:
    for d_type2 in data_types[z:]:
        name = d_type1 + '-' + d_type2
        cm = confusion_matrix(real_labels[name], preds_dtype[name])

        accuracy = np.trace(cm) / float(np.sum(cm))
        f1_score_cnn = f1_score(real_labels[name], preds_dtype[name], average='weighted')
        cmap = None
        target_names = ['LUAD', 'Healthy', 'LUSC']
        normalize = False
        title = name

        if cmap is None:
            cmap = plt.get_cmap('Blues')

        plt.figure(figsize=(8, 6))
        plt.imshow(cm, interpolation='nearest', cmap=cmap)
        plt.title(title)
        plt.colorbar()

        if target_names is not None:
            tick_marks = np.arange(len(target_names))
            plt.xticks(tick_marks, target_names, rotation=45)
            plt.yticks(tick_marks, target_names)

        if normalize:
            cm_cnn = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]


        thresh = cm.max() / 1.5 if normalize else cm.max() / 2
        for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
            if normalize:
                plt.text(j, i, "{:0.4f}".format(cm[i, j]),
                            horizontalalignment="center",
                            color="white" if cm[i, j] > thresh else "black")
            else:
                plt.text(j, i, "{:,}".format(cm[i, j]),
                            horizontalalignment="center",
                            color="white" if cm[i, j] > thresh else "black")


        plt.tight_layout()
        plt.ylabel('Predicted label')
        #plt.xlabel('True label\naccuracy={:0.4f}; f1-score={:0.4f}'.format(accuracy,f1_score_cnn))
        plt.savefig('results_SGD/two-sources-integration/cm_'+name+'_10cv_all.png', dpi=300, bbox_inches = "tight")
        #plt.savefig('cm_cnn_10cv.png', dpi=300, bbox_inches = "tight")
        plt.show()
    w += 1
    z = w + 1

## Three sources with missing info

In [None]:
preds_dtype = {}

real_labels = {}

k = 0
i = 1
j = 2

for d_type1 in data_types[k:]:
    for d_type2 in data_types[i:]:
        for d_type3 in data_types[j:]:
            name = d_type1 + '-' + d_type2 + '-' + d_type3
            print(name)
            data_model = pd.read_excel('results_SGD/three-sources-integration/data_integration_model_test_probs_'+name+'.xlsx',
                  sheet_name=[0,1,2,3,4,5,6,7,8,9],engine='openpyxl')
            preds_dtype[name] = []
            real_labels[name] = []
            for df_name, df in data_model.items():
                df_only = df.loc[(df['Has '+ d_type1] != -1) | (df['Has ' + d_type2] != -1) | (df['Has ' + d_type3] != -1)]

                preds = df_only['Integration Pred'].values

                real = df_only['Real'].values
                preds_dtype[name] = np.concatenate([preds_dtype[name],preds], axis=0)
                real_labels[name] = np.concatenate([real_labels[name],real], axis=0)
        j += 1
    k += 1    
    i = k + 1
    j = i + 1

In [None]:
import matplotlib.pyplot as plt
import matplotlib
import numpy as np
import itertools
from sklearn.metrics import confusion_matrix, f1_score
import pandas as pd

font = {
        'weight' : 'bold',
        'size'   : 16}

matplotlib.rc('font', **font)

k = 0
w = 1
z = 2

for d_type1 in data_types[k:]:
    for d_type2 in data_types[w:]:
        for d_type3 in data_types[z:]:
            name = d_type1 + '-' + d_type2 + '-' + d_type3
            cm = confusion_matrix(real_labels[name], preds_dtype[name])

            accuracy = np.trace(cm) / float(np.sum(cm))
            print(accuracy)
            f1_score_cnn = f1_score(real_labels[name], preds_dtype[name], average='weighted')
            cmap = None
            target_names = ['LUAD', 'Healthy', 'LUSC']
            normalize = False
            title = name

            if cmap is None:
                cmap = plt.get_cmap('Blues')

            plt.figure(figsize=(8, 6))
            plt.imshow(cm, interpolation='nearest', cmap=cmap)
            plt.title(title)
            plt.colorbar()

            if target_names is not None:
                tick_marks = np.arange(len(target_names))
                plt.xticks(tick_marks, target_names, rotation=45)
                plt.yticks(tick_marks, target_names)

            if normalize:
                cm_cnn = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]


            thresh = cm.max() / 1.5 if normalize else cm.max() / 2
            for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
                if normalize:
                    plt.text(j, i, "{:0.4f}".format(cm[i, j]),
                                horizontalalignment="center",
                                color="white" if cm[i, j] > thresh else "black")
                else:
                    plt.text(j, i, "{:,}".format(cm[i, j]),
                                horizontalalignment="center",
                                color="white" if cm[i, j] > thresh else "black")


            plt.tight_layout()
            plt.ylabel('Predicted label')
            #plt.xlabel('True label\naccuracy={:0.4f}; f1-score={:0.4f}'.format(accuracy,f1_score_cnn))
            plt.savefig('results_SGD/three-sources-integration/plots/cm_'+name+'_10cv.png', dpi=300, bbox_inches = "tight")
            plt.show()
        z += 1
    k += 1
    w = k + 1
    z = w + 1

## Four sources with missing info

In [None]:
# Confusion matrix
from sklearn.metrics import confusion_matrix, accuracy_score, roc_auc_score, f1_score

preds_dtype = {}

real_labels = {}

k = 0
i = 1
j = 2
z = 3

for d_type1 in data_types[k:]:
    for d_type2 in data_types[i:]:
        for d_type3 in data_types[j:]:
            for d_type4 in data_types[z:]:
                name = d_type1 + '-' + d_type2 + '-' + d_type3 + '-' + d_type4
                print(name)
                data_model = pd.read_excel('results_SGD/four-sources-integration/data_integration_model_test_probs_'+name+'.xlsx',
                      sheet_name=[0,1,2,3,4,5,6,7,8,9],engine='openpyxl')
                preds_dtype[name] = []
                real_labels[name] = []
                for df_name, df in data_model.items():
                    df_only = df.loc[(df['Has '+ d_type1] != -1) | (df['Has ' + d_type2] != -1) | (df['Has ' + d_type3] != -1) | (df['Has ' + d_type4] != -1)]

                    preds = df_only['Integration Pred'].values

                    real = df_only['Real'].values
                    preds_dtype[name] = np.concatenate([preds_dtype[name],preds], axis=0)
                    real_labels[name] = np.concatenate([real_labels[name],real], axis=0)
            z += 1
        j += 1
    k += 1    
    i = k + 1
    j = i + 1
    z = j + 1

In [None]:
import matplotlib.pyplot as plt
import matplotlib
import numpy as np
import itertools
from sklearn.metrics import confusion_matrix, f1_score
import pandas as pd

font = {
        'weight' : 'bold',
        'size'   : 16}

matplotlib.rc('font', **font)

k = 0
w = 1
z = 2
h = 3

for d_type1 in data_types[k:]:
    for d_type2 in data_types[w:]:
        for d_type3 in data_types[z:]:
            for d_type4 in data_types[h:]:
                name = d_type1 + '-' + d_type2 + '-' + d_type3 + '-' + d_type4
                cm = confusion_matrix(real_labels[name], preds_dtype[name])

                accuracy = np.trace(cm) / float(np.sum(cm))
                print(accuracy)
                f1_score_cnn = f1_score(real_labels[name], preds_dtype[name], average='weighted')
                cmap = None
                target_names = ['LUAD', 'Healthy', 'LUSC']
                normalize = False
                title = name

                if cmap is None:
                    cmap = plt.get_cmap('Blues')

                plt.figure(figsize=(8, 6))
                plt.imshow(cm, interpolation='nearest', cmap=cmap)
                plt.title(title)
                plt.colorbar()

                if target_names is not None:
                    tick_marks = np.arange(len(target_names))
                    plt.xticks(tick_marks, target_names, rotation=45)
                    plt.yticks(tick_marks, target_names)

                if normalize:
                    cm_cnn = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]


                thresh = cm.max() / 1.5 if normalize else cm.max() / 2
                for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
                    if normalize:
                        plt.text(j, i, "{:0.4f}".format(cm[i, j]),
                                    horizontalalignment="center",
                                    color="white" if cm[i, j] > thresh else "black")
                    else:
                        plt.text(j, i, "{:,}".format(cm[i, j]),
                                    horizontalalignment="center",
                                    color="white" if cm[i, j] > thresh else "black")


                plt.tight_layout()
                plt.ylabel('Predicted label')
                #plt.xlabel('True label\naccuracy={:0.4f}; f1-score={:0.4f}'.format(accuracy,f1_score_cnn))
                plt.savefig('results_SGD/four-sources-integration/plots/cm_'+name+'_10cv_all.png', dpi=300, bbox_inches = "tight")
                plt.show()
            h += 1
        z += 1
    k += 1    
    w = k + 1
    z = w + 1
    h = z + 1

## All sources with missing info

In [None]:
d_type1 = 'WSI'
d_type2 = 'RNA'
d_type3 = 'miRNA'
d_type4 = 'CNV'
d_type5 = 'DNA'
name = d_type1 + '-' + d_type2 + '-' + d_type3 + '-' + d_type4 + '-' + d_type5
data_model = pd.read_excel('../result_files/data_integration_model_test_probs_SGD-all_sources.xlsx',
sheet_name=[0,1,2,3,4,5,6,7,8,9],engine='openpyxl')
all_real = []
late_preds = []
for df_name, df in data_model.items():
    # take those where the two sources has data
    real = df['Real'].values
    all_real.append(real)
    late_preds.append(df['Integration Pred'].values)

In [None]:
path = '../result_files/'
title_name = d_type1 + '-' + d_type2 + '-' + d_type3 + '-' + d_type4 + '-' + 'metDNA'
plot_cm(np.concatenate(all_real), np.concatenate(late_preds), 
        title=title_name, save_name=path+name+'_cm_wNA.png')

In [None]:
d_type1 = 'WSI'
d_type2 = 'RNA'
d_type3 = 'miRNA'
d_type4 = 'CNV'
d_type5 = 'DNA'

auc_late = {}
all_real = {}
late_probs = {}
late_real = {}


data_model = pd.read_excel('../result_files/data_integration_model_test_probs_SGD-all_sources.xlsx',
sheet_name=[0,1,2,3,4,5,6,7,8,9],engine='openpyxl')

all_real = {d_type1: [], d_type2: [], d_type3: [], d_type4: [], d_type5: [], 'Integration': []}
late_probs = {d_type1: [], d_type2: [], d_type3: [], d_type4: [], d_type5: [], 'Integration': []}
for df_name, df in data_model.items():
    probs = [[x,y,z] for x,y,z in zip(df['Integration Prob LUAD'], df['Integration Prob HLT'], df['Integration Prob LUSC'])]
    probs = np.asarray(probs)
    real = df['Real'].values
    all_real['Integration'].append(real)
    late_probs['Integration'].append(probs)

    # dtype 1
    df_dt1 = df.loc[df['Has '+ d_type1] != -1]
    probs = [[x,y,z] for x,y,z in zip(df_dt1[d_type1+' Prob LUAD'], df_dt1[d_type1+' Prob HLT'], df_dt1[d_type1+' Prob LUSC'])]
    probs = np.asarray(probs)
    real_dt1 = df_dt1['Real'].values
    all_real[d_type1].append(real_dt1)
    late_probs[d_type1].append(probs)

    # dtype 2
    df_dt2 = df.loc[df['Has '+ d_type2] != -1]
    probs = [[x,y,z] for x,y,z in zip(df_dt2[d_type2+' Prob LUAD'], df_dt2[d_type2+' Prob HLT'], df_dt2[d_type2+' Prob LUSC'])]
    probs = np.asarray(probs)
    real_dt2 = df_dt2['Real'].values
    all_real[d_type2].append(real_dt2)
    late_probs[d_type2].append(probs)

    # dtype 3
    df_dt3 = df.loc[df['Has '+ d_type3] != -1]
    probs = [[x,y,z] for x,y,z in zip(df_dt3[d_type3+' Prob LUAD'], df_dt3[d_type3+' Prob HLT'], df_dt3[d_type3+' Prob LUSC'])]
    probs = np.asarray(probs)
    real_dt3 = df_dt3['Real'].values
    all_real[d_type3].append(real_dt3)
    late_probs[d_type3].append(probs)

    # dtype 4
    df_dt4 = df.loc[df['Has '+ d_type4] != -1]
    probs = [[x,y,z] for x,y,z in zip(df_dt4[d_type4+' Prob LUAD'], df_dt4[d_type4+' Prob HLT'], df_dt4[d_type4+' Prob LUSC'])]
    probs = np.asarray(probs)
    real_dt4 = df_dt4['Real'].values
    all_real[d_type4].append(real_dt4)
    late_probs[d_type4].append(probs)
    
    # dtype 5
    df_dt5 = df.loc[df['Has '+ d_type5] != -1]
    probs = [[x,y,z] for x,y,z in zip(df_dt5[d_type5+' Prob LUAD'], df_dt5[d_type5+' Prob HLT'], df_dt5[d_type5+' Prob LUSC'])]
    probs = np.asarray(probs)
    real_dt5 = df_dt5['Real'].values
    all_real[d_type5].append(real_dt5)
    late_probs[d_type5].append(probs)

In [None]:
font = {
        'weight' : 'bold',
        'size'   : 12}

matplotlib.rc('font', **font)

name = d_type1 + '-' + d_type2 + '-' + d_type3 + '-' + d_type4 + '-' + 'metDNA'


fpr_types = {'Integration': dict(), d_type1: dict(), d_type2: dict(), d_type3: dict(),
            d_type4: dict(), d_type5: dict()}
tpr_types = {'Integration': dict(), d_type1: dict(), d_type2: dict(), d_type3: dict(),
            d_type4: dict(), d_type5: dict()}
roc_auc_types = {'Integration': dict(), d_type1: dict(), d_type2: dict(), d_type3: dict(),
            d_type4: dict(), d_type5: dict()}

# Compute ROC curve and ROC area for each class
for d_name in ['Integration', d_type1, d_type2, d_type3, d_type4, d_type5]:
    late_probs[d_name] = np.concatenate(late_probs[d_name])
    real_ = np.concatenate(all_real[d_name])
    real_binarized = label_binarize(real_, classes=[*range(3)])
    n_values = np.max(real_) + 1
    y_hot = np.eye(n_values)[real_]
    n_values = np.max(real_) + 1
    for cl in range(3):
        fpr_types[d_name][cl], tpr_types[d_name][cl], _ = roc_curve(y_hot[:, cl], late_probs[d_name][:, cl])
        #roc_auc_types[d_name][cl] = auc(fpr_types[d_name][cl], tpr_types[d_name][cl])
        roc_auc_types[d_name][cl] = average_precision_score(real_binarized[:, cl], late_probs[d_name][:, cl])
plt.figure()
lw = 2
for d_name, c in zip(['Integration', d_type1, d_type2, d_type3, d_type4, d_type5], ['darkorange', 'darkgreen', 'darkblue', 'darkred', 'lightcoral', 'lime']):
    if d_name == d_type5:
        pt_name = 'metDNA'
    else:
        pt_name = d_name
    plt.plot(fpr_types[d_name][0], tpr_types[d_name][0], color=c,
    lw=lw, label='%s ROC curve (AUPRC = %0.3f)' % (pt_name, roc_auc_types[d_name][0]))
plt.plot([0, 1], [0, 1], color='black', lw=lw, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC for LUAD class (%s)' % name)
plt.legend(loc="lower right")
path_save = '../result_files/'
plt.savefig(path_save+name+'_test_auprc_LUAD.png', dpi=300, format='png')
plt.show()
plt.close()

plt.figure()
lw = 2
for d_name, c in zip(['Integration', d_type1, d_type2, d_type3, d_type4, d_type5], ['darkorange', 'darkgreen', 'darkblue', 'darkred', 'lightcoral', 'lime']):
    if d_name == d_type5:
        pt_name = 'metDNA'
    else:
        pt_name = d_name
    plt.plot(fpr_types[d_name][1], tpr_types[d_name][1], color=c,
    lw=lw, label='%s ROC curve (AUPRC = %0.3f)' % (pt_name, roc_auc_types[d_name][1]))
plt.plot([0, 1], [0, 1], color='black', lw=lw, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC for Control class (%s)' % name)
plt.legend(loc="lower right")
path_save = '../result_files/'
plt.savefig(path_save+name+'_test_auprc_control.png', dpi=300, format='png')
plt.show()
plt.close()

plt.figure()
lw = 2
for d_name, c in zip(['Integration', d_type1, d_type2, d_type3, d_type4, d_type5], ['darkorange', 'darkgreen', 'darkblue', 'darkred', 'lightcoral', 'lime']):
    if d_name == d_type5:
        pt_name = 'metDNA'
    else:
        pt_name = d_name
    plt.plot(fpr_types[d_name][2], tpr_types[d_name][2], color=c,
    lw=lw, label='%s ROC curve (AUPRC = %0.3f)' % (pt_name, roc_auc_types[d_name][2]))
plt.plot([0, 1], [0, 1], color='black', lw=lw, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC for LUSC class (%s)' % name)
plt.legend(loc="lower right")
path_save = '../result_files/'
plt.savefig(path_save+name+'_test_auprc_LUSC.png', dpi=300, format='png')
plt.show()
plt.close()
