In [1]:
import pandas as pd
import numpy as np

In [2]:
def evaluate_models(pred_file, threshold):
    pred_dti_df = pd.read_csv(pred_file, skiprows=1, usecols=[0,1,2,3])
    pdti_df = pred_dti_df.sort_values(by=["Protein_ID", "Compound_ID"]).drop_duplicates(subset = ["Protein_ID", "Compound_ID"], keep=False)
    
    pos_df = pdti_df.loc[pdti_df.label==1]
    pos_pred = sum(pos_df.predicted.values>threshold)/ len(pos_df)
    
    neg_df = pdti_df.loc[pdti_df.label==0]
    neg_pred = sum(neg_df.predicted.values<=threshold)/ len(neg_df)
    
    return(pos_pred, neg_pred)
    

In [5]:
train_p_rec = []
train_n_rec = []

for i in range(10):
    fname = "./CNN_results/CNN_results_" + str(i+1) + "/training_predictions.csv"
    pos_rec, neg_rec = evaluate_models(fname, 0.5)
    
    train_p_rec.append(pos_rec)
    train_n_rec.append(neg_rec)
    
    print('#-----------------------------#')
    print('Data split-', i+1)
    
    print('Positive recovery:', pos_rec)
    print('Negative recovery:', neg_rec)


print('#-----------------------------#')
print('Average model recovery on the split training dataset ')
    
print(np.mean(train_n_rec))
print(np.mean(train_p_rec))

#-----------------------------#
Data split- 1
Positive recovery: 0.9990990990990991
Negative recovery: 0.9925642181162686
#-----------------------------#
Data split- 2
Positive recovery: 0.9977477477477478
Negative recovery: 0.9924515547543938
#-----------------------------#
Data split- 3
Positive recovery: 0.9912162162162163
Negative recovery: 0.9944794952681388
#-----------------------------#
Data split- 4
Positive recovery: 0.9876126126126126
Negative recovery: 0.9941415051825147
#-----------------------------#
Data split- 5
Positive recovery: 0.9979729729729729
Negative recovery: 0.9942541685443894
#-----------------------------#
Data split- 6
Positive recovery: 0.9873873873873874
Negative recovery: 0.9865930599369085
#-----------------------------#
Data split- 7
Positive recovery: 0.9981981981981982
Negative recovery: 0.9789319513294277
#-----------------------------#
Data split- 8
Positive recovery: 0.9945945945945946
Negative recovery: 0.9905362776025236
#-----------------------

In [29]:
val_p_rec = []
val_n_rec = []

for i in range(10):
    fname = "./CNN_results/CNN_results_" + str(i+1) + "/validation_predictions.csv"
    pos_rec, neg_rec = evaluate_models(fname, 0.5)
    
    val_p_rec.append(pos_rec)
    val_n_rec.append(neg_rec)
    
    print('#-----------------------------#')
    print('Data split-', i)
    
    print('Positive recovery:', pos_rec)
    print('Negative recovery:', neg_rec)

print(np.mean(val_n_rec))
print(np.mean(val_p_rec))

#-----------------------------#
Data split- 0
Positive recovery: 0.8378378378378378
Negative recovery: 0.709009009009009
#-----------------------------#
Data split- 1
Positive recovery: 0.8189189189189189
Negative recovery: 0.754054054054054
#-----------------------------#
Data split- 2
Positive recovery: 0.809009009009009
Negative recovery: 0.7783783783783784
#-----------------------------#
Data split- 3
Positive recovery: 0.7702702702702703
Negative recovery: 0.8009009009009009
#-----------------------------#
Data split- 4
Positive recovery: 0.836036036036036
Negative recovery: 0.7531531531531531
#-----------------------------#
Data split- 5
Positive recovery: 0.7756756756756756
Negative recovery: 0.7621621621621621
#-----------------------------#
Data split- 6
Positive recovery: 0.845945945945946
Negative recovery: 0.7045045045045045
#-----------------------------#
Data split- 7
Positive recovery: 0.8117117117117117
Negative recovery: 0.7432432432432432
#----------------------------

In [31]:
test_p_rec = []
test_n_rec = []

for i in range(10):
    fname = "./CNN_results/CNN_results_" + str(i+1) + "/test_predictions.csv"
    pos_rec, neg_rec = evaluate_models(fname, 0.5)
    
    test_p_rec.append(pos_rec)
    test_n_rec.append(neg_rec)
    
    print('#-----------------------------#')
    print('Data split-', i)
    
    print('Positive recovery:', pos_rec)
    print('Negative recovery:', neg_rec)

    
print(np.mean(test_n_rec))
print(np.mean(test_p_rec))

#-----------------------------#
Data split- 0
Positive recovery: 0.8333333333333334
Negative recovery: 0.6972972972972973
#-----------------------------#
Data split- 1
Positive recovery: 0.845945945945946
Negative recovery: 0.727027027027027
#-----------------------------#
Data split- 2
Positive recovery: 0.8081081081081081
Negative recovery: 0.7585585585585586
#-----------------------------#
Data split- 3
Positive recovery: 0.7612612612612613
Negative recovery: 0.7747747747747747
#-----------------------------#
Data split- 4
Positive recovery: 0.8117117117117117
Negative recovery: 0.7567567567567568
#-----------------------------#
Data split- 5
Positive recovery: 0.8063063063063063
Negative recovery: 0.7747747747747747
#-----------------------------#
Data split- 6
Positive recovery: 0.8558558558558559
Negative recovery: 0.6945945945945946
#-----------------------------#
Data split- 7
Positive recovery: 0.8072072072072072
Negative recovery: 0.7207207207207207
#-------------------------