In [1]:
import pandas as pd
import numpy as np

In [2]:
def evaluate_models(pred_file, threshold):
    pred_dti_df = pd.read_csv(pred_file, skiprows=1, usecols=[0,1,2,3])
    pdti_df = pred_dti_df.sort_values(by=["Protein_ID", "Compound_ID"]).drop_duplicates(subset = ["Protein_ID", "Compound_ID"], keep=False)
    
    pos_df = pdti_df.loc[pdti_df.label==1]
    pos_pred = sum(pos_df.predicted.values>threshold)/ len(pos_df)
    
    neg_df = pdti_df.loc[pdti_df.label==0]
    neg_pred = sum(neg_df.predicted.values<=threshold)/ len(neg_df)
    
    return(pos_pred, neg_pred)
    

In [3]:
train_p_rec = []
train_n_rec = []

for i in range(10):
    fname = "./CNN_results_split_final/predictions/training_prediction_split_" + str(i+1) + ".csv"
    pos_rec, neg_rec = evaluate_models(fname, 0.5)
    
    train_p_rec.append(pos_rec)
    train_n_rec.append(neg_rec)
    
    print('#-----------------------------#')
    print('Data split-', i)
    
    print('Positive recovery:', pos_rec)
    print('Negative recovery:', neg_rec)


print(np.mean(train_n_rec))
print(np.mean(train_p_rec))

#-----------------------------#
Data split- 0
Positive recovery: 0.9886261261261261
Negative recovery: 0.9742000901306895
#-----------------------------#
Data split- 1
Positive recovery: 0.9978603603603604
Negative recovery: 0.9797205948625507
#-----------------------------#
Data split- 2
Positive recovery: 0.9951576576576576
Negative recovery: 0.9841144659756648
#-----------------------------#
Data split- 3
Positive recovery: 0.9969594594594594
Negative recovery: 0.9704821991888238
#-----------------------------#
Data split- 4
Positive recovery: 0.9851351351351352
Negative recovery: 0.9823118521856692
#-----------------------------#
Data split- 5
Positive recovery: 0.9948198198198198
Negative recovery: 0.9834384858044164
#-----------------------------#
Data split- 6
Positive recovery: 0.9881756756756757
Negative recovery: 0.9850157728706624
#-----------------------------#
Data split- 7
Positive recovery: 0.9897522522522523
Negative recovery: 0.9746507435781884
#-----------------------

In [5]:
val_p_rec = []
val_n_rec = []

for i in range(10):
    fname = "./CNN_results_split_final/predictions/validation_prediction_split_" + str(i+1) + ".csv"
    pos_rec, neg_rec = evaluate_models(fname, 0.5)
    
    val_p_rec.append(pos_rec)
    val_n_rec.append(neg_rec)
    
    print('#-----------------------------#')
    print('Data split-', i)
    
    print('Positive recovery:', pos_rec)
    print('Negative recovery:', neg_rec)

print(np.mean(val_n_rec))
print(np.mean(val_p_rec))

#-----------------------------#
Data split- 0
Positive recovery: 0.8117117117117117
Negative recovery: 0.727027027027027
#-----------------------------#
Data split- 1
Positive recovery: 0.872072072072072
Negative recovery: 0.6864864864864865
#-----------------------------#
Data split- 2
Positive recovery: 0.80990990990991
Negative recovery: 0.7306306306306306
#-----------------------------#
Data split- 3
Positive recovery: 0.836036036036036
Negative recovery: 0.7162162162162162
#-----------------------------#
Data split- 4
Positive recovery: 0.8036036036036036
Negative recovery: 0.7378378378378379
#-----------------------------#
Data split- 5
Positive recovery: 0.818018018018018
Negative recovery: 0.7315315315315315
#-----------------------------#
Data split- 6
Positive recovery: 0.8036036036036036
Negative recovery: 0.7468468468468469
#-----------------------------#
Data split- 7
Positive recovery: 0.7981981981981981
Negative recovery: 0.7477477477477478
#-----------------------------

In [6]:
test_p_rec = []
test_n_rec = []

for i in range(10):
    fname = "./CNN_results_split_final/predictions/test_prediction_split_" + str(i+1) + ".csv"
    pos_rec, neg_rec = evaluate_models(fname, 0.5)
    
    test_p_rec.append(pos_rec)
    test_n_rec.append(neg_rec)
    
    print('#-----------------------------#')
    print('Data split-', i)
    
    print('Positive recovery:', pos_rec)
    print('Negative recovery:', neg_rec)

    
print(np.mean(test_n_rec))
print(np.mean(test_p_rec))

#-----------------------------#
Data split- 0
Positive recovery: 0.8144144144144144
Negative recovery: 0.7324324324324324
#-----------------------------#
Data split- 1
Positive recovery: 0.8747747747747747
Negative recovery: 0.672972972972973
#-----------------------------#
Data split- 2
Positive recovery: 0.8216216216216217
Negative recovery: 0.7387387387387387
#-----------------------------#
Data split- 3
Positive recovery: 0.845945945945946
Negative recovery: 0.7036036036036036
#-----------------------------#
Data split- 4
Positive recovery: 0.781981981981982
Negative recovery: 0.7477477477477478
#-----------------------------#
Data split- 5
Positive recovery: 0.8252252252252252
Negative recovery: 0.7126126126126127
#-----------------------------#
Data split- 6
Positive recovery: 0.7837837837837838
Negative recovery: 0.7234234234234235
#-----------------------------#
Data split- 7
Positive recovery: 0.8045045045045045
Negative recovery: 0.7216216216216216
#--------------------------