In [1]:
from __future__ import division
from sys import argv
import numpy as np
import matplotlib.pyplot as plt
%matplotlib
from matplotlib import style
import pickle

from sklearn import cross_validation, metrics
from sklearn.metrics import roc_curve, auc
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.feature_selection import SelectFromModel

style.use('ggplot')

Using matplotlib backend: TkAgg




In [10]:
results = pickle.load(open('BOW_vs_W2V.pkl', 'rb'))
rf_labs = results[2]
bow_labs = results[5]
w2v_labs_strict = results[8]
w2v_labs_gen = results[11]
w2v_labs_be = results[14]
true_test_labels = results[15]

rf_accuracy = results[0] + [np.mean(results[0])]
rf_auc = results[1] + [np.mean(results[1])]
rf_precision = [0.61, 0.71, 0.62, 0.75, 0.70, 0.70, 0.71, 0.64, 0.70, 0.65] + [metrics.precision_score(true_test_labels, rf_labs, average='micro')]
rf_recall = [0.53, 0.70, 0.62, 0.67, 0.70, 0.67, 0.68, 0.64, 0.69, 0.66] + [metrics.recall_score(true_test_labels, rf_labs, average='micro')]

bow_accuracy = results[3] + [np.mean(results[3])]
bow_auc = results[4] + [np.mean(results[4])]
bow_precision = [0.60, 0.76, 0.52, 0.67, 0.73, 0.64, 0.77, 0.58, 0.66, 0.61] + [metrics.precision_score(true_test_labels, bow_labs, average='micro')]
bow_recall = [0.44, 0.70, 0.53, 0.53, 0.72, 0.64, 0.77, 0.56, 0.63, 0.63] + [metrics.recall_score(true_test_labels, bow_labs, average='micro')]

w2v_accuracy_strict = results[6] + [np.mean(results[6])]
w2v_auc_strict = results[7] + [np.mean(results[7])]
w2v_precision_strict = [0.61, 0.68, 0.60, 0.69, 0.71, 0.60, 0.80, 0.62, 0.69, 0.65] + [metrics.precision_score(true_test_labels, w2v_labs_strict, average='micro')] 
w2v_recall_strict = [0.49, 0.63, 0.60, 0.59, 0.70, 0.58, 0.77, 0.62, 0.69, 0.66] + [metrics.recall_score(true_test_labels, w2v_labs_strict, average='micro')]

w2v_accuracy_gen = results[9] + [np.mean(results[9])]
w2v_auc_gen = results[10] + [np.mean(results[10])]
w2v_precision_gen = [0.62, 0.74, 0.74, 0.71, 0.79, 0.63, 0.80, 0.62, 0.73, 0.73] + [metrics.precision_score(true_test_labels, w2v_labs_gen, average='micro')] 
w2v_recall_gen = [0.51, 0.65, 0.73, 0.64, 0.79, 0.56, 0.75, 0.62, 0.74, 0.73] + [metrics.recall_score(true_test_labels, w2v_labs_gen, average='micro')]

w2v_accuracy_be = results[12] + [np.mean(results[12])]
w2v_auc_be = results[13] + [np.mean(results[13])]
w2v_precision_be = [0.63, 0.69, 0.74, 0.70, 0.77, 0.57, 0.83, 0.68, 0.69, 0.69] + [metrics.precision_score(true_test_labels, w2v_labs_be, average='micro')] 
w2v_recall_be = [0.54, 0.65, 0.73, 0.63, 0.77, 0.53, 0.81, 0.67, 0.69, 0.69] + [metrics.recall_score(true_test_labels, w2v_labs_be, average='micro')]

accu_list = [rf_accuracy, bow_accuracy, w2v_accuracy_strict, w2v_accuracy_gen, w2v_accuracy_be]
auc_list = [rf_auc, bow_auc,  w2v_auc_strict, w2v_auc_gen, w2v_auc_be]
prec_list = [rf_precision, bow_precision, w2v_precision_strict, w2v_precision_gen, w2v_precision_be]
rec_list = [rf_recall, bow_recall, w2v_recall_strict, w2v_recall_gen, w2v_recall_be]

In [11]:
fig, ax = plt.subplots(2, 2)
random_seeds = ["144", "235", "905", "2895", "3462", "4225", "5056", "5192", "7751", "7813", "AVG"]
inds = list(range(11))
for accu, auc, prec, rec, name, mar in zip(accu_list, auc_list, prec_list, rec_list, ['BOW RF', 'BOW XGB', 'W2V SR XGB', 'W2V GEN XGB', 'W2V FL XGB'], ['o', 'v', 's', '>', '^']):
    ax[0,0].scatter(inds, accu, s=90, marker=mar, label=name+' = '+str(accu[10]))
    ax[0,0].plot(inds, accu)
    ax[0,0].set_title('Accuracy')
    ax[0,0].xaxis.set_ticks(range(11))
    ax[0,0].set_xticklabels(random_seeds)
    ax[0,0].set_xlabel('Seeds')
    ax[0,0].set_ylabel('Score')
    
    ax[0,1].scatter(inds, auc, s=90, marker=mar, label=name+' = '+str(auc[10]))
    ax[0,1].plot(inds, auc)
    ax[0,1].set_title('AUC Score')
    ax[0,1].xaxis.set_ticks(range(11))
    ax[0,1].set_xticklabels(random_seeds)
    ax[0,1].set_xlabel('Seeds')
    ax[0,1].set_ylabel('Score')
    
    ax[1,0].scatter(inds, prec, s=90, marker=mar, label=name+' = '+str(prec[10]))
    ax[1,0].plot(inds, prec)
    ax[1,0].set_title('Precision')
    ax[1,0].xaxis.set_ticks(range(11))
    ax[1,0].set_xticklabels(random_seeds)
    ax[1,0].set_xlabel('Seeds')
    ax[1,0].set_ylabel('Score')
    
    ax[1,1].scatter(inds, rec, s=90, marker=mar, label=name+' = '+str(rec[10]))
    ax[1,1].plot(inds, rec)
    ax[1,1].set_title('Recall')
    ax[1,1].xaxis.set_ticks(range(11))
    ax[1,1].set_xticklabels(random_seeds)
    ax[1,1].set_xlabel('Seeds')
    ax[1,1].set_ylabel('Score')
    
ax[0,0].legend(loc='best')
ax[0,1].legend(loc='best')
ax[1,0].legend(loc='best')
ax[1,1].legend(loc='best')
fig.suptitle('Previous Work Comparison, Organism - Yeast', fontsize=20)      

<matplotlib.text.Text at 0x7f5e4a537eb8>

In [4]:
def open_metrics(organism, metric):
    SR_metrics = []
    GEN_metrics = []
    BE_metrics = []
    for i in random_seeds[:10]:
        if organism == 'human':
            new_metric = pickle.load(open('../../Results/Final_Results/{0}/computerome_human_runs/Results/{0}/metrics/{0}_{1}_pickle_{2}.pkl'.format(organism, metric, i), 'rb'))
        else:
            new_metric = pickle.load(open('../../Results/Final_Results/{0}/Results/{0}/metrics/{0}_{1}_pickle_{2}.pkl'.format(organism, metric, i), 'rb'))
        SR_metrics.extend(new_metric[0])
        GEN_metrics.extend(new_metric[1])
        BE_metrics.extend(new_metric[2])
        
    return SR_metrics, GEN_metrics, BE_metrics

In [5]:
SR_drosophila_accu, GEN_drosophila_accu, BE_drosophila_accu = open_metrics('drosophila', 'accuracy')
SR_drosophila_auc, GEN_drosophila_auc, BE_drosophila_auc = open_metrics('drosophila', 'auc_score')
SR_drosophila_fpr, GEN_drosophila_fpr, BE_drosophila_fpr = open_metrics('drosophila', 'fpr')
SR_drosophila_tpr, GEN_drosophila_tpr, BE_drosophila_tpr = open_metrics('drosophila', 'tpr')
SR_drosophila_report, GEN_drosophila_report, BE_drosophila_report = open_metrics('drosophila', 'report')
SR_labels, GEN_labels, BE_labels = open_metrics('drosophila', 'labels')

SR_drosophila_labels = [j for i in SR_labels for j in i]
GEN_drosophila_labels = [j for i in GEN_labels for j in i]
BE_drosophila_labels = [j for i in BE_labels for j in i]
    

    
SR_drosophila_precision = []
GEN_drosophila_precision = []
BE_drosophila_precision = []
SR_drosophila_recall = []
GEN_drosophila_recall = []
BE_drosophila_recall = []
for SR, GEN, BE in zip(SR_drosophila_report, GEN_drosophila_report, BE_drosophila_report):
    SR_drosophila_precision.append(float(SR[179:183]))
    SR_drosophila_recall.append(float(SR[189:193]))
    GEN_drosophila_precision.append(float(GEN[179:183]))
    GEN_drosophila_recall.append(float(GEN[189:193]))
    BE_drosophila_precision.append(float(BE[179:183]))
    BE_drosophila_recall.append(float(BE[189:193]))
    
    
accu_list = [SR_drosophila_accu + [np.mean(SR_drosophila_accu)],
             GEN_drosophila_accu + [np.mean(GEN_drosophila_accu)], 
             BE_drosophila_accu + [np.mean(BE_drosophila_accu)]]
auc_list = [SR_drosophila_auc + [np.mean(SR_drosophila_auc)],
             GEN_drosophila_auc + [np.mean(GEN_drosophila_auc)], 
             BE_drosophila_auc + [np.mean(BE_drosophila_auc)]]
prec_list = [SR_drosophila_precision + [np.mean(SR_drosophila_precision)],
             GEN_drosophila_precision + [np.mean(GEN_drosophila_precision)], 
             BE_drosophila_precision + [np.mean(BE_drosophila_precision)]]
rec_list = [SR_drosophila_recall + [np.mean(SR_drosophila_recall)],
             GEN_drosophila_recall + [np.mean(GEN_drosophila_recall)], 
             BE_drosophila_recall + [np.mean(BE_drosophila_recall)]]

In [6]:
fig, ax = plt.subplots(2, 2)
random_seeds = ["144", "235", "905", "2895", "3462", "4225", "5056", "5192", "7751", "7813", "AVG"]
inds = list(range(11))
for accu, auc, prec, rec, name, mar in zip(accu_list, auc_list, prec_list, rec_list, ['STRICT', 'GENERAL', 'FULL'], ['o', 'v', 's']):
    ax[0,0].scatter(inds, accu, s=90, marker=mar, label=name+' = '+str(accu[10]))
    ax[0,0].plot(inds, accu)
    ax[0,0].set_title('Accuracy')
    ax[0,0].xaxis.set_ticks(range(11))
    ax[0,0].set_xticklabels(random_seeds)
    ax[0,0].set_xlabel('Seeds')
    ax[0,0].set_ylabel('Score')
    
    ax[0,1].scatter(inds, auc, s=90, marker=mar, label=name+' = '+str(auc[10]))
    ax[0,1].plot(inds, auc)
    ax[0,1].set_title('AUC Score')
    ax[0,1].xaxis.set_ticks(range(11))
    ax[0,1].set_xticklabels(random_seeds)
    ax[0,1].set_xlabel('Seeds')
    ax[0,1].set_ylabel('Score')
    
    ax[1,0].scatter(inds, prec, s=90, marker=mar, label=name+' = '+str(prec[10]))
    ax[1,0].plot(inds, prec)
    ax[1,0].set_title('Precision')
    ax[1,0].xaxis.set_ticks(range(11))
    ax[1,0].set_xticklabels(random_seeds)
    ax[1,0].set_xlabel('Seeds')
    ax[1,0].set_ylabel('Score')
    
    ax[1,1].scatter(inds, rec, s=90, marker=mar, label=name+' = '+str(rec[10]))
    ax[1,1].plot(inds, rec)
    ax[1,1].set_title('Recall')
    ax[1,1].xaxis.set_ticks(range(11))
    ax[1,1].set_xticklabels(random_seeds)
    ax[1,1].set_xlabel('Seeds')
    ax[1,1].set_ylabel('Score')
    
ax[0,0].legend(loc='best')
ax[0,1].legend(loc='best')
ax[1,0].legend(loc='best')
ax[1,1].legend(loc='best')
fig.suptitle('Organism - Fruit Fly', fontsize=20)

<matplotlib.text.Text at 0x7fec3bde8940>

In [7]:
SR_drosophila_accu, GEN_drosophila_accu, BE_drosophila_accu = open_metrics('yeast', 'accuracy')
SR_drosophila_auc, GEN_drosophila_auc, BE_drosophila_auc = open_metrics('yeast', 'auc_score')
SR_drosophila_fpr, GEN_drosophila_fpr, BE_drosophila_fpr = open_metrics('yeast', 'fpr')
SR_drosophila_tpr, GEN_drosophila_tpr, BE_drosophila_tpr = open_metrics('yeast', 'tpr')
SR_drosophila_report, GEN_drosophila_report, BE_drosophila_report = open_metrics('yeast', 'report')
SR_labels, GEN_labels, BE_labels = open_metrics('yeast', 'labels')

SR_drosophila_labels = [j for i in SR_labels for j in i]
GEN_drosophila_labels = [j for i in GEN_labels for j in i]
BE_drosophila_labels = [j for i in BE_labels for j in i]
    

    
SR_drosophila_precision = []
GEN_drosophila_precision = []
BE_drosophila_precision = []
SR_drosophila_recall = []
GEN_drosophila_recall = []
BE_drosophila_recall = []
for SR, GEN, BE in zip(SR_drosophila_report, GEN_drosophila_report, BE_drosophila_report):
    SR_drosophila_precision.append(float(SR[179:183]))
    SR_drosophila_recall.append(float(SR[189:193]))
    GEN_drosophila_precision.append(float(GEN[179:183]))
    GEN_drosophila_recall.append(float(GEN[189:193]))
    BE_drosophila_precision.append(float(BE[179:183]))
    BE_drosophila_recall.append(float(BE[189:193]))
    
    
accu_list = [SR_drosophila_accu + [np.mean(SR_drosophila_accu)],
             GEN_drosophila_accu + [np.mean(GEN_drosophila_accu)], 
             BE_drosophila_accu + [np.mean(BE_drosophila_accu)]]
auc_list = [SR_drosophila_auc + [np.mean(SR_drosophila_auc)],
             GEN_drosophila_auc + [np.mean(GEN_drosophila_auc)], 
             BE_drosophila_auc + [np.mean(BE_drosophila_auc)]]
prec_list = [SR_drosophila_precision + [np.mean(SR_drosophila_precision)],
             GEN_drosophila_precision + [np.mean(GEN_drosophila_precision)], 
             BE_drosophila_precision + [np.mean(BE_drosophila_precision)]]
rec_list = [SR_drosophila_recall + [np.mean(SR_drosophila_recall)],
             GEN_drosophila_recall + [np.mean(GEN_drosophila_recall)], 
             BE_drosophila_recall + [np.mean(BE_drosophila_recall)]]

In [8]:
fig, ax = plt.subplots(2, 2)
random_seeds = ["144", "235", "905", "2895", "3462", "4225", "5056", "5192", "7751", "7813", "AVG"]
inds = list(range(11))
for accu, auc, prec, rec, name, mar in zip(accu_list, auc_list, prec_list, rec_list, ['STRICT', 'GENERAL', 'FULL'], ['o', 'v', 's']):
    ax[0,0].scatter(inds, accu, s=90, marker=mar, label=name+' = '+str(accu[10]))
    ax[0,0].plot(inds, accu)
    ax[0,0].set_title('Accuracy')
    ax[0,0].xaxis.set_ticks(range(11))
    ax[0,0].set_xticklabels(random_seeds)
    ax[0,0].set_xlabel('Seeds')
    ax[0,0].set_ylabel('Score')
    
    ax[0,1].scatter(inds, auc, s=90, marker=mar, label=name+' = '+str(auc[10]))
    ax[0,1].plot(inds, auc)
    ax[0,1].set_title('AUC Score')
    ax[0,1].xaxis.set_ticks(range(11))
    ax[0,1].set_xticklabels(random_seeds)
    ax[0,1].set_xlabel('Seeds')
    ax[0,1].set_ylabel('Score')
    
    ax[1,0].scatter(inds, prec, s=90, marker=mar, label=name+' = '+str(prec[10]))
    ax[1,0].plot(inds, prec)
    ax[1,0].set_title('Precision')
    ax[1,0].xaxis.set_ticks(range(11))
    ax[1,0].set_xticklabels(random_seeds)
    ax[1,0].set_xlabel('Seeds')
    ax[1,0].set_ylabel('Score')
    
    ax[1,1].scatter(inds, rec, s=90, marker=mar, label=name+' = '+str(rec[10]))
    ax[1,1].plot(inds, rec)
    ax[1,1].set_title('Recall')
    ax[1,1].xaxis.set_ticks(range(11))
    ax[1,1].set_xticklabels(random_seeds)
    ax[1,1].set_xlabel('Seeds')
    ax[1,1].set_ylabel('Score')
    
ax[0,0].legend(loc='best')
ax[0,1].legend(loc='best')
ax[1,0].legend(loc='best')
ax[1,1].legend(loc='best')
fig.suptitle('Organism - Budding Yeast', fontsize=20)

<matplotlib.text.Text at 0x7fec3bc122b0>

In [9]:
SR_drosophila_accu, GEN_drosophila_accu, BE_drosophila_accu = open_metrics('rat', 'accuracy')
SR_drosophila_auc, GEN_drosophila_auc, BE_drosophila_auc = open_metrics('rat', 'auc_score')
SR_drosophila_fpr, GEN_drosophila_fpr, BE_drosophila_fpr = open_metrics('rat', 'fpr')
SR_drosophila_tpr, GEN_drosophila_tpr, BE_drosophila_tpr = open_metrics('rat', 'tpr')
SR_drosophila_report, GEN_drosophila_report, BE_drosophila_report = open_metrics('rat', 'report')
SR_labels, GEN_labels, BE_labels = open_metrics('rat', 'labels')

SR_drosophila_labels = [j for i in SR_labels for j in i]
GEN_drosophila_labels = [j for i in GEN_labels for j in i]
BE_drosophila_labels = [j for i in BE_labels for j in i]
    

    
SR_drosophila_precision = []
GEN_drosophila_precision = []
BE_drosophila_precision = []
SR_drosophila_recall = []
GEN_drosophila_recall = []
BE_drosophila_recall = []
for SR, GEN, BE in zip(SR_drosophila_report, GEN_drosophila_report, BE_drosophila_report):
    SR_drosophila_precision.append(float(SR[179:183]))
    SR_drosophila_recall.append(float(SR[189:193]))
    GEN_drosophila_precision.append(float(GEN[179:183]))
    GEN_drosophila_recall.append(float(GEN[189:193]))
    BE_drosophila_precision.append(float(BE[179:183]))
    BE_drosophila_recall.append(float(BE[189:193]))
    
    
accu_list = [SR_drosophila_accu + [np.mean(SR_drosophila_accu)],
             GEN_drosophila_accu + [np.mean(GEN_drosophila_accu)], 
             BE_drosophila_accu + [np.mean(BE_drosophila_accu)]]
auc_list = [SR_drosophila_auc + [np.mean(SR_drosophila_auc)],
             GEN_drosophila_auc + [np.mean(GEN_drosophila_auc)], 
             BE_drosophila_auc + [np.mean(BE_drosophila_auc)]]
prec_list = [SR_drosophila_precision + [np.mean(SR_drosophila_precision)],
             GEN_drosophila_precision + [np.mean(GEN_drosophila_precision)], 
             BE_drosophila_precision + [np.mean(BE_drosophila_precision)]]
rec_list = [SR_drosophila_recall + [np.mean(SR_drosophila_recall)],
             GEN_drosophila_recall + [np.mean(GEN_drosophila_recall)], 
             BE_drosophila_recall + [np.mean(BE_drosophila_recall)]]

In [10]:
fig, ax = plt.subplots(2, 2)
random_seeds = ["144", "235", "905", "2895", "3462", "4225", "5056", "5192", "7751", "7813", "AVG"]
inds = list(range(11))
for accu, auc, prec, rec, name, mar in zip(accu_list, auc_list, prec_list, rec_list, ['STRICT', 'GENERAL', 'FULL'], ['o', 'v', 's']):
    ax[0,0].scatter(inds, accu, s=90, marker=mar, label=name+' = '+str(accu[10]))
    ax[0,0].plot(inds, accu)
    ax[0,0].set_title('Accuracy')
    ax[0,0].xaxis.set_ticks(range(11))
    ax[0,0].set_xticklabels(random_seeds)
    ax[0,0].set_xlabel('Seeds')
    ax[0,0].set_ylabel('Score')
    
    ax[0,1].scatter(inds, auc, s=90, marker=mar, label=name+' = '+str(auc[10]))
    ax[0,1].plot(inds, auc)
    ax[0,1].set_title('AUC Score')
    ax[0,1].xaxis.set_ticks(range(11))
    ax[0,1].set_xticklabels(random_seeds)
    ax[0,1].set_xlabel('Seeds')
    ax[0,1].set_ylabel('Score')
    
    ax[1,0].scatter(inds, prec, s=90, marker=mar, label=name+' = '+str(prec[10]))
    ax[1,0].plot(inds, prec)
    ax[1,0].set_title('Precision')
    ax[1,0].xaxis.set_ticks(range(11))
    ax[1,0].set_xticklabels(random_seeds)
    ax[1,0].set_xlabel('Seeds')
    ax[1,0].set_ylabel('Score')
    
    ax[1,1].scatter(inds, rec, s=90, marker=mar, label=name+' = '+str(rec[10]))
    ax[1,1].plot(inds, rec)
    ax[1,1].set_title('Recall')
    ax[1,1].xaxis.set_ticks(range(11))
    ax[1,1].set_xticklabels(random_seeds)
    ax[1,1].set_xlabel('Seeds')
    ax[1,1].set_ylabel('Score')
    
ax[0,0].legend(loc='best')
ax[0,1].legend(loc='best')
ax[1,0].legend(loc='best')
ax[1,1].legend(loc='best')
fig.suptitle('Organism - Rat', fontsize=20)

<matplotlib.text.Text at 0x7fec3b97c0f0>

In [11]:
SR_drosophila_accu, GEN_drosophila_accu, BE_drosophila_accu = open_metrics('mouse', 'accuracy')
SR_drosophila_auc, GEN_drosophila_auc, BE_drosophila_auc = open_metrics('mouse', 'auc_score')
SR_drosophila_fpr, GEN_drosophila_fpr, BE_drosophila_fpr = open_metrics('mouse', 'fpr')
SR_drosophila_tpr, GEN_drosophila_tpr, BE_drosophila_tpr = open_metrics('mouse', 'tpr')
SR_drosophila_report, GEN_drosophila_report, BE_drosophila_report = open_metrics('mouse', 'report')
SR_labels, GEN_labels, BE_labels = open_metrics('mouse', 'labels')

SR_drosophila_labels = [j for i in SR_labels for j in i]
GEN_drosophila_labels = [j for i in GEN_labels for j in i]
BE_drosophila_labels = [j for i in BE_labels for j in i]
    

    
SR_drosophila_precision = []
GEN_drosophila_precision = []
BE_drosophila_precision = []
SR_drosophila_recall = []
GEN_drosophila_recall = []
BE_drosophila_recall = []
for SR, GEN, BE in zip(SR_drosophila_report, GEN_drosophila_report, BE_drosophila_report):
    SR_drosophila_precision.append(float(SR[179:183]))
    SR_drosophila_recall.append(float(SR[189:193]))
    GEN_drosophila_precision.append(float(GEN[179:183]))
    GEN_drosophila_recall.append(float(GEN[189:193]))
    BE_drosophila_precision.append(float(BE[179:183]))
    BE_drosophila_recall.append(float(BE[189:193]))
    
    
accu_list = [SR_drosophila_accu + [np.mean(SR_drosophila_accu)],
             GEN_drosophila_accu + [np.mean(GEN_drosophila_accu)], 
             BE_drosophila_accu + [np.mean(BE_drosophila_accu)]]
auc_list = [SR_drosophila_auc + [np.mean(SR_drosophila_auc)],
             GEN_drosophila_auc + [np.mean(GEN_drosophila_auc)], 
             BE_drosophila_auc + [np.mean(BE_drosophila_auc)]]
prec_list = [SR_drosophila_precision + [np.mean(SR_drosophila_precision)],
             GEN_drosophila_precision + [np.mean(GEN_drosophila_precision)], 
             BE_drosophila_precision + [np.mean(BE_drosophila_precision)]]
rec_list = [SR_drosophila_recall + [np.mean(SR_drosophila_recall)],
             GEN_drosophila_recall + [np.mean(GEN_drosophila_recall)], 
             BE_drosophila_recall + [np.mean(BE_drosophila_recall)]]

In [12]:
fig, ax = plt.subplots(2, 2)
random_seeds = ["144", "235", "905", "2895", "3462", "4225", "5056", "5192", "7751", "7813", "AVG"]
inds = list(range(11))
for accu, auc, prec, rec, name, mar in zip(accu_list, auc_list, prec_list, rec_list, ['STRICT', 'GENERAL', 'FULL'], ['o', 'v', 's']):
    ax[0,0].scatter(inds, accu, s=90, marker=mar, label=name+' = '+str(accu[10]))
    ax[0,0].plot(inds, accu)
    ax[0,0].set_title('Accuracy')
    ax[0,0].xaxis.set_ticks(range(11))
    ax[0,0].set_xticklabels(random_seeds)
    ax[0,0].set_xlabel('Seeds')
    ax[0,0].set_ylabel('Score')
    
    ax[0,1].scatter(inds, auc, s=90, marker=mar, label=name+' = '+str(auc[10]))
    ax[0,1].plot(inds, auc)
    ax[0,1].set_title('AUC Score')
    ax[0,1].xaxis.set_ticks(range(11))
    ax[0,1].set_xticklabels(random_seeds)
    ax[0,1].set_xlabel('Seeds')
    ax[0,1].set_ylabel('Score')
    
    ax[1,0].scatter(inds, prec, s=90, marker=mar, label=name+' = '+str(prec[10]))
    ax[1,0].plot(inds, prec)
    ax[1,0].set_title('Precision')
    ax[1,0].xaxis.set_ticks(range(11))
    ax[1,0].set_xticklabels(random_seeds)
    ax[1,0].set_xlabel('Seeds')
    ax[1,0].set_ylabel('Score')
    
    ax[1,1].scatter(inds, rec, s=90, marker=mar, label=name+' = '+str(rec[10]))
    ax[1,1].plot(inds, rec)
    ax[1,1].set_title('Recall')
    ax[1,1].xaxis.set_ticks(range(11))
    ax[1,1].set_xticklabels(random_seeds)
    ax[1,1].set_xlabel('Seeds')
    ax[1,1].set_ylabel('Score')
    
ax[0,0].legend(loc='best')
ax[0,1].legend(loc='best')
ax[1,0].legend(loc='best')
ax[1,1].legend(loc='best')
fig.suptitle('Organism - Mouse', fontsize=20)

<matplotlib.text.Text at 0x7fec3b766668>

In [13]:
SR_drosophila_accu, GEN_drosophila_accu, BE_drosophila_accu = open_metrics('human', 'accuracy')
SR_drosophila_auc, GEN_drosophila_auc, BE_drosophila_auc = open_metrics('human', 'auc_score')
SR_drosophila_fpr, GEN_drosophila_fpr, BE_drosophila_fpr = open_metrics('human', 'fpr')
SR_drosophila_tpr, GEN_drosophila_tpr, BE_drosophila_tpr = open_metrics('human', 'tpr')
SR_drosophila_report, GEN_drosophila_report, BE_drosophila_report = open_metrics('human', 'report')
SR_labels, GEN_labels, BE_labels = open_metrics('human', 'labels')

SR_drosophila_labels = [j for i in SR_labels for j in i]
GEN_drosophila_labels = [j for i in GEN_labels for j in i]
BE_drosophila_labels = [j for i in BE_labels for j in i]
    

    
SR_drosophila_precision = []
GEN_drosophila_precision = []
BE_drosophila_precision = []
SR_drosophila_recall = []
GEN_drosophila_recall = []
BE_drosophila_recall = []
for SR, GEN, BE in zip(SR_drosophila_report, GEN_drosophila_report, BE_drosophila_report):
    SR_drosophila_precision.append(float(SR[179:183]))
    SR_drosophila_recall.append(float(SR[189:193]))
    GEN_drosophila_precision.append(float(GEN[179:183]))
    GEN_drosophila_recall.append(float(GEN[189:193]))
    BE_drosophila_precision.append(float(BE[179:183]))
    BE_drosophila_recall.append(float(BE[189:193]))
    
    
accu_list = [SR_drosophila_accu + [np.mean(SR_drosophila_accu)],
             GEN_drosophila_accu + [np.mean(GEN_drosophila_accu)], 
             BE_drosophila_accu + [np.mean(BE_drosophila_accu)]]
auc_list = [SR_drosophila_auc + [np.mean(SR_drosophila_auc)],
             GEN_drosophila_auc + [np.mean(GEN_drosophila_auc)], 
             BE_drosophila_auc + [np.mean(BE_drosophila_auc)]]
prec_list = [SR_drosophila_precision + [np.mean(SR_drosophila_precision)],
             GEN_drosophila_precision + [np.mean(GEN_drosophila_precision)], 
             BE_drosophila_precision + [np.mean(BE_drosophila_precision)]]
rec_list = [SR_drosophila_recall + [np.mean(SR_drosophila_recall)],
             GEN_drosophila_recall + [np.mean(GEN_drosophila_recall)], 
             BE_drosophila_recall + [np.mean(BE_drosophila_recall)]]

In [14]:
fig, ax = plt.subplots(2, 2)
random_seeds = ["144", "235", "905", "2895", "3462", "4225", "5056", "5192", "7751", "7813", "AVG"]
inds = list(range(11))
for accu, auc, prec, rec, name, mar in zip(accu_list, auc_list, prec_list, rec_list, ['STRICT', 'GENERAL', 'FULL'], ['o', 'v', 's']):
    ax[0,0].scatter(inds, accu, s=90, marker=mar, label=name+' = '+str(accu[10]))
    ax[0,0].plot(inds, accu)
    ax[0,0].set_title('Accuracy')
    ax[0,0].xaxis.set_ticks(range(11))
    ax[0,0].set_xticklabels(random_seeds)
    ax[0,0].set_xlabel('Seeds')
    ax[0,0].set_ylabel('Score')
    
    ax[0,1].scatter(inds, auc, s=90, marker=mar, label=name+' = '+str(auc[10]))
    ax[0,1].plot(inds, auc)
    ax[0,1].set_title('AUC Score')
    ax[0,1].xaxis.set_ticks(range(11))
    ax[0,1].set_xticklabels(random_seeds)
    ax[0,1].set_xlabel('Seeds')
    ax[0,1].set_ylabel('Score')
    
    ax[1,0].scatter(inds, prec, s=90, marker=mar, label=name+' = '+str(prec[10]))
    ax[1,0].plot(inds, prec)
    ax[1,0].set_title('Precision')
    ax[1,0].xaxis.set_ticks(range(11))
    ax[1,0].set_xticklabels(random_seeds)
    ax[1,0].set_xlabel('Seeds')
    ax[1,0].set_ylabel('Score')
    
    ax[1,1].scatter(inds, rec, s=90, marker=mar, label=name+' = '+str(rec[10]))
    ax[1,1].plot(inds, rec)
    ax[1,1].set_title('Recall')
    ax[1,1].xaxis.set_ticks(range(11))
    ax[1,1].set_xticklabels(random_seeds)
    ax[1,1].set_xlabel('Seeds')
    ax[1,1].set_ylabel('Score')
    
ax[0,0].legend(loc='best')
ax[0,1].legend(loc='best')
ax[1,0].legend(loc='best')
ax[1,1].legend(loc='best')
fig.suptitle('Organism - Human', fontsize=20)

<matplotlib.text.Text at 0x7fec3aabf518>