# For the results of unsupervised learning methods

In [2]:
import pandas as pd
import numpy as np
from sklearn.metrics import roc_auc_score, precision_recall_curve, auc , f1_score

In [21]:
# AGRN
ETR_result = pd.read_csv('result/Dream5_Silico/AGRN/ETR.csv', index_col=0)
RFR_result = pd.read_csv('result/Dream5_Silico/AGRN/RFR.csv', index_col=0)

gene_list = pd.read_csv('data/Dream5/Network1_Silico/net1_gene_ids.tsv', sep='\t',index_col=1)

final_auroc = []
final_auprc = []
final_f1 = []
for iter_num in range(1, 11):
    auroc_list = []
    auprc_list = []
    f1_list = []
    for num in range(5):
        # test data in supervised methods
        test_data = pd.read_csv(f'data/Dream5/Network1_Silico/processed_balanced/{iter_num}/test_data{num}.txt', sep='\t',header=None)
        test_data.columns = ['TF', 'target', 'Index']

        for  i in range(test_data.shape[0]):
            ### RegulonDB_Ecoli and Bsubtilis dataset ###
            #tf_id = gene_list.loc[test_data.loc[i, 'TF'], '#ID']
            #tg_id = gene_list.loc[test_data.loc[i, 'target'], '#ID']
            ### dream5 dataset ###
            tf_id = test_data.loc[i, 'TF']
            tg_id = test_data.loc[i, 'target']
            all_id = tf_id+'_'+tg_id
            if all_id in ETR_result.index:
                test_data.loc[i, 'importance'] = (ETR_result.loc[all_id, "shap_Proba"]+RFR_result.loc[all_id, "shap_Proba"])/2
        test_data.dropna(inplace=True)
        true_labels = np.array(test_data.Index)
        predicted_probs = np.array(test_data.importance)
        predicted_labels = (predicted_probs >= np.median(predicted_probs)).astype(int)
        auroc = roc_auc_score(true_labels, predicted_probs)
        precision, recall, _ = precision_recall_curve(true_labels, predicted_probs)
        auprc = auc(recall, precision)
        f1 = f1_score(true_labels, predicted_labels)
        auroc_list.append(auroc)
        auprc_list.append(auprc)
        f1_list.append(f1)
    final_auroc.append(np.mean(auroc_list))
    final_auprc.append(np.mean(auprc_list))
    final_f1.append(np.mean(f1_list))

print('AUROC: ', final_auroc, '\t', '{:.4f}'.format(np.mean(final_auroc)),'±','{:.4f}'.format(np.std(final_auroc)))
print('AUPRC: ', final_auprc, '\t', '{:.4f}'.format(np.mean(final_auprc)),'±','{:.4f}'.format(np.std(final_auprc)))
print('f1-score: ', final_f1, '\t', '{:.4f}'.format(np.mean(final_f1)),'±','{:.4f}'.format(np.std(final_f1)))

AUROC:  [0.8504818560766341, 0.8483333386641595, 0.848342178671017, 0.8493075911383642, 0.8498945003710914, 0.8507160262280669, 0.849152842808585, 0.8518333045068592, 0.8505528468032194, 0.8477747256625907] 	 0.8496 ± 0.0012
AUPRC:  [0.8781890883237036, 0.8768638649096492, 0.8761150757085192, 0.8777577858395167, 0.8770144454060433, 0.8782892638228252, 0.8756082165460825, 0.8786924352624809, 0.8770634852218734, 0.876829644683528] 	 0.8772 ± 0.0009
f1-score:  [0.7617763248524522, 0.7610295929693888, 0.7645172931043008, 0.761527880349173, 0.7615261720003131, 0.7620235279052517, 0.7632694784494518, 0.7637686974975746, 0.7630201018908483, 0.7585362929272839] 	 0.7621 ± 0.0016


In [22]:
# GENIE3
GENIE3_result = pd.read_csv('result/Dream5_Silico/GENIE3/Dream5_Silico_infer_net1.tsv', sep='\t')
GENIE3_result.set_index(['TF','target'],inplace=True)

final_auroc = []
final_auprc = []
final_f1 = []
for iter_num in range(1,11):
    auroc_list = []
    auprc_list = []
    f1_list = []
    for num in range(5):
        test_data = pd.read_csv(f'data/Dream5/Network1_Silico/processed_balanced/{iter_num}/test_data{num}.txt', sep='\t',header=None)
        test_data.columns = ['TF', 'target', 'Index']
        test_data.drop_duplicates(inplace=True)
        test_data.set_index(['TF','target'],inplace=True)
        idx = [x for x in test_data.index if x in GENIE3_result.index]
        tmp = GENIE3_result.loc[idx]
        result = pd.concat([tmp, test_data.loc[idx]], axis=1)
        true_labels = np.array(result.Index)
        predicted_probs = np.array(result.importance)
        predicted_labels = (predicted_probs >= np.median(predicted_probs)).astype(int)
        auroc = roc_auc_score(true_labels, predicted_probs)
        precision, recall, _ = precision_recall_curve(true_labels, predicted_probs)
        auprc = auc(recall, precision)
        f1 = f1_score(true_labels, predicted_labels)
        auroc_list.append(auroc)
        auprc_list.append(auprc)
        f1_list.append(f1)
    final_auroc.append(np.mean(auroc_list))
    final_auprc.append(np.mean(auprc_list))
    final_f1.append(np.mean(f1_list))

print('AUROC: ', final_auroc, '\t', '{:.4f}'.format(np.mean(final_auroc)),'±','{:.4f}'.format(np.std(final_auroc)))
print('AUPRC: ', final_auprc, '\t', '{:.4f}'.format(np.mean(final_auprc)),'±','{:.4f}'.format(np.std(final_auprc)))
print('f1-score: ', final_f1, '\t', '{:.4f}'.format(np.mean(final_f1)),'±','{:.4f}'.format(np.std(final_f1)))

AUROC:  [0.8210760629515835, 0.8175540088274639, 0.8159637779773672, 0.8185205227429575, 0.8209814231536194, 0.8217093450271259, 0.8186578444721381, 0.8189993388587677, 0.8201903460275439, 0.820135393643687] 	 0.8194 ± 0.0017
AUPRC:  [0.852641689616199, 0.8499732843425161, 0.8504585717113814, 0.8521417759236322, 0.85368471941962, 0.8546539496487439, 0.8509884068394074, 0.8508659346299726, 0.8512918942172872, 0.8536043562069061] 	 0.8520 ± 0.0015
f1-score:  [0.7474795119806561, 0.7408269569273782, 0.7378819456297065, 0.7420578779268202, 0.7451673973332167, 0.7445717839319099, 0.7377100431764481, 0.7410674221587581, 0.7435689608893911, 0.7411748498583174] 	 0.7422 ± 0.0030


In [24]:
# PROTIA
protia_result = np.array(pd.read_csv('result/Dream5_Silico/PROTIA/Dream5_Silico.csv', index_col=0))
gene_list = pd.read_csv('data/Dream5/Network1_Silico/net1_gene_ids.tsv', sep='\t',index_col=1)

final_auroc = []
final_auprc = []
final_f1 = []
for iter_num in range(1, 11):
    auroc_list = []
    auprc_list = []
    f1_list = []
    for num in range(5):
        test_data = pd.read_csv(f'data/Dream5/Network1_Silico/processed_balanced/{iter_num}/test_data{num}.txt', sep='\t',header=None)
        test_data.columns = ['TF', 'target', 'Index']

        for  i in range(test_data.shape[0]):
            ### RegulonDB_Ecoli and Bsubtilis dataset ###
            #tf_id = int(gene_list.loc[test_data.loc[i, 'TF'], '#ID'][1:]) - 1
            #tg_id = int(gene_list.loc[test_data.loc[i, 'target'], '#ID'][1:]) - 1
            ### Dream5 dataset ###
            tf_id = int(test_data.loc[i, 'TF'][1:]) - 1
            tg_id = int(test_data.loc[i, 'target'][1:]) - 1
            test_data.loc[i, 'importance'] = protia_result[tf_id, tg_id]

        true_labels = np.array(test_data.Index)
        predicted_probs = np.array(test_data.importance)
        predicted_labels = (predicted_probs >= np.median(predicted_probs)).astype(int)
        auroc = roc_auc_score(true_labels, predicted_probs)
        precision, recall, _ = precision_recall_curve(true_labels, predicted_probs)
        auprc = auc(recall, precision)
        f1 = f1_score(true_labels, predicted_labels)
        auroc_list.append(auroc)
        auprc_list.append(auprc)
        f1_list.append(f1)
    final_auroc.append(np.mean(auroc_list))
    final_auprc.append(np.mean(auprc_list))
    final_f1.append(np.mean(f1_list))

print('AUROC: ', final_auroc, '\t', '{:.4f}'.format(np.mean(final_auroc)),'±','{:.4f}'.format(np.std(final_auroc)))
print('AUPRC: ', final_auprc, '\t', '{:.4f}'.format(np.mean(final_auprc)),'±','{:.4f}'.format(np.std(final_auprc)))
print('f1-score: ', final_f1, '\t', '{:.4f}'.format(np.mean(final_f1)),'±','{:.4f}'.format(np.std(final_f1)))

AUROC:  [0.7933116703277567, 0.7885059546578936, 0.7914066393628727, 0.7901122352889589, 0.7892746929457752, 0.7881698725363868, 0.7897587562167274, 0.7888587332363495, 0.7917038129080487, 0.7888905540083295] 	 0.7900 ± 0.0016
AUPRC:  [0.8392142335043806, 0.8360959556410428, 0.839118018523271, 0.8364503470794826, 0.835938751655485, 0.8347881752605556, 0.8354785926949357, 0.8364063182027944, 0.8382145594766388, 0.8359128931892382] 	 0.8368 ± 0.0015
f1-score:  [0.7144300335121005, 0.710941246914526, 0.7129388974656232, 0.714180967122624, 0.7126848641132705, 0.7121870428578949, 0.7149281657105747, 0.7116913947213375, 0.7169219353744727, 0.7114406209504657] 	 0.7132 ± 0.0018
