## 反应直接预测结果分析
> 2024-11-04

### 1. 导入必要的包

In [1]:
import sys,os
sys.path.insert(0, os.path.dirname(os.path.realpath('__file__')))
sys.path.insert(1,'../')
from config import conf as cfg
import pandas as pd
from tqdm import tqdm
from tkinter import _flatten
import json
from sklearn.metrics import confusion_matrix

from concurrent.futures import ThreadPoolExecutor, as_completed
import numpy as np
import plotly.graph_objects as go

from tools import btools
from IPython.display import HTML

from pandarallel import pandarallel # 导入pandaralle
pandarallel.initialize(progress_bar=False)
%load_ext autoreload
%autoreload 2

INFO: Pandarallel will run on 128 workers.
INFO: Pandarallel will use Memory file system to transfer data between the main process and workers.


### 2. 加载测试数据集

In [4]:
# 从 JSON 文件加载反应编码字典
with open(cfg.FILE_DS_DICT_RXN2ID, "r") as json_file:
    dict_rxn2id = json.load(json_file)
    print(f'加载反应编码字典完成，共有 {len(dict_rxn2id)} 个反应。')  # 打印加载的数据

# load datasets
ds_test = pd.read_feather(cfg.FILE_DS_TEST)[['uniprot_id', 
                                             'reaction_id', 
                                             'isenzyme',
                                             'ec_number',
                                             'ec_specific_level',
                                             'label'
                                             ]].rename(columns={'reaction_id':'rxn_groundtruth','isenzyme':'isenzyme_groundtruth','ec_number':'ec_groundtruth', 'label':'lb_rxn_groundtruth'})

print(f'测试集数据量: {len(ds_test)}')

methods = ['blast', 'deepec', 'clean', 'ecrecer', 'ecpred', 'catfam', 'priam']

print(f'使用预测方法{methods}')

ds_test.head(2)


加载反应编码字典完成，共有 10479 个反应。
测试集数据量: 13515
使用预测方法['blast', 'deepec', 'clean', 'ecrecer', 'ecpred', 'catfam', 'priam']


Unnamed: 0,uniprot_id,rxn_groundtruth,isenzyme_groundtruth,ec_groundtruth,ec_specific_level,lb_rxn_groundtruth
0,A9JLI2,-,False,-,0,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
1,A9JLI3,-,False,-,0,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."


## 4. Load results from EC based method

In [9]:
def get_ec_rxn_nores(pred_detail, eckey, rxnkey):
    #没有预测结果的
    no_predcition = len(pred_detail[(pred_detail[eckey].str.contains('NO-PREDICTION'))])
    #有EC没反应的
    ecwithout_reaction = len(pred_detail[(pred_detail[rxnkey].str.contains('EC-WITHOUT-REACTION'))])
    return [len(pred_detail), no_predcition, ecwithout_reaction]

# Make one-hot encoding label for each prediction
def make_labels(resdf, src_col1, src_col2, lb1, lb2, rxn_label_dict):
    resdf[[lb1, lb2]] = resdf.apply(
        lambda row: pd.Series({
            lb1: btools.make_label(reaction_id=str(row[src_col1]), rxn_label_dict=rxn_label_dict),
            lb2: btools.make_label(reaction_id=str(row[src_col2]), rxn_label_dict=rxn_label_dict)
        }), axis=1
    )
    
    return resdf

# Function to calculate metrics
def calculate_metrics(res_df_list,item, method):
    return btools.rxn_eva_metric(eva_df=res_df_list[item], eva_name=f'{method}_fold_{item+1}', methods=[method])


# Function to display results as HTML
def display_html_results(metrics, fold_std, eva_name):
    return HTML(f"""
         <div style="float:left; width:800px;">
              <h2 style='color:blue'>{eva_name} Evaluation 10 Fold Details</h2>
              {metrics.to_html()}
         </div>
         <div  style="float:left; width:400px;" >
              <h2 style='color:blue' >{eva_name} Evaluation 10 Fold Overview</h2>
                   {fold_std.to_html()}
         </div>
         """)

In [2]:
# Read CSV files serially
def read_csv_files(file_paths):
    return [pd.read_csv(file, sep='\t') for file in file_paths]

# Function to get ec_rxn_nores
def get_ec_rxn_nores(pred_detail, eckey, rxnkey):
    no_prediction = len(pred_detail[(pred_detail[eckey].str.contains('NO-PREDICTION'))])
    ecwithout_reaction = len(pred_detail[(pred_detail[rxnkey].str.contains('EC-WITHOUT-REACTION'))])
    return [len(pred_detail), no_prediction, ecwithout_reaction]

def process_no_res(res_list, eckey, rxnkey):
    return pd.DataFrame([get_ec_rxn_nores(pred_detail=res_list[item], eckey=eckey, rxnkey=rxnkey) for item in range(10)], 
                        columns=['test_size', 'no_prediction', 'ec_without_rxn'])

# Make one-hot encoding label for each prediction
def make_labels(resdf, src_col1, src_col2, lb1, lb2, rxn_label_dict):
    resdf[[lb1, lb2]] = resdf.apply(
        lambda row: pd.Series({
            lb1: btools.make_label(reaction_id=str(row[src_col1]), rxn_label_dict=rxn_label_dict),
            lb2: btools.make_label(reaction_id=str(row[src_col2]), rxn_label_dict=rxn_label_dict)
        }), axis=1
    )
    return resdf

def apply_labels(res_list, src_col1, src_col2, lb1, lb2, rxn_label_dict):
    for i in tqdm(range(10)):
        res_list[i] = make_labels(resdf=res_list[i], src_col1=src_col1, src_col2=src_col2, lb1=lb1, lb2=lb2, rxn_label_dict=rxn_label_dict)
    return res_list

# Function to calculate metrics
def calculate_metrics(res_df_list, item, method):
    return btools.rxn_eva_metric(eva_df=res_df_list[item], eva_name=f'{method}_fold_{item+1}', methods=[method])

def process_metrics(res_list, method, max_workers=10):
    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        futures = [executor.submit(calculate_metrics, res_list, item, method) for item in range(10)]
        metrics = [future.result() for future in as_completed(futures)]
    metrics_df = pd.concat(metrics, axis=0).reset_index(drop=True)
    metrics_df['baselineName'] = metrics_df.apply(lambda x: f'fold_{x.name+1}', axis=1)
    return metrics_df

# Function to display results as HTML
def display_html_results(metrics, fold_std, eva_name):
    return HTML(f"""
         <div style="float:left; width:800px;">
              <h2 style='color:blue'>{eva_name} Evaluation 10 Fold Details</h2>
              {metrics.to_html()}
         </div>
         <div  style="float:left; width:400px;" >
              <h2 style='color:blue' >{eva_name} Evaluation 10 Fold Overview</h2>
                   {fold_std.to_html()}
         </div>
         """)

### 4.1 Blast

In [5]:
# Blast Results Processing
vali_res_blast = [f'{cfg.DIR_PROJECT_ROOT}/baselines/results/ec_methods/blast/fold{item}.tsv' for item in range(1, 11)]
res_blast = read_csv_files(vali_res_blast)
df_blast_no_pred = process_no_res(res_blast, eckey='ec_blast', rxnkey='reaction_ecblast')
res_blast = apply_labels(res_blast, 'reaction_groundtruth', 'reaction_ecblast', 'lb_rxn_groundtruth', 'lb_rxn_blast', dict_rxn2id)
res_blast_metrics = process_metrics(res_blast, 'blast')
res_blast_metrics = pd.concat([res_blast_metrics, df_blast_no_pred], axis=1)
res_blast_fold_std = res_blast_metrics[['mAccuracy', 'mPrecision', 'mRecall', 'mF1', 'no_prediction', 'ec_without_rxn']].agg(['mean', 'std'])
display_html_results(res_blast_metrics, res_blast_fold_std, 'Blast')

100%|██████████| 10/10 [01:50<00:00, 11.04s/it]


Evaluating: Reaction Predcition Results blast_fold_1
Evaluating: Reaction Predcition Results blast_fold_2
Evaluating: Reaction Predcition Results blast_fold_3
Evaluating: Reaction Predcition Results blast_fold_4
Evaluating: Reaction Predcition Results blast_fold_5
Evaluating: Reaction Predcition Results blast_fold_6
Evaluating: Reaction Predcition Results blast_fold_7
Evaluating: Reaction Predcition Results blast_fold_8
Evaluating: Reaction Predcition Results blast_fold_9
Evaluating: Reaction Predcition Results blast_fold_10


Unnamed: 0,baselineName,mAccuracy,mPrecision,mRecall,mF1,test_size,no_prediction,ec_without_rxn
0,fold_1,0.812655,0.900039,0.770326,0.780296,50858,2424,2588
1,fold_2,0.816155,0.908173,0.772592,0.782565,50858,2386,2590
2,fold_3,0.814346,0.900316,0.773433,0.783523,50858,2379,2751
3,fold_4,0.815034,0.905346,0.76795,0.777297,50858,2301,2628
4,fold_5,0.818141,0.896578,0.773133,0.782244,50858,2418,2591
5,fold_6,0.818259,0.902929,0.774894,0.784619,50858,2391,2633
6,fold_7,0.817276,0.909542,0.775508,0.785564,50858,2369,2615
7,fold_8,0.816646,0.907582,0.775516,0.785456,50858,2381,2674
8,fold_9,0.815093,0.908947,0.774655,0.785816,50858,2449,2626
9,fold_10,0.815329,0.896387,0.770627,0.78076,50858,2431,2584

Unnamed: 0,mAccuracy,mPrecision,mRecall,mF1,no_prediction,ec_without_rxn
mean,0.815893,0.903584,0.772863,0.782814,2392.9,2628.0
std,0.001757,0.005044,0.002526,0.002763,41.631852,51.415519


### 4.2 DeepEC

In [6]:
# DeepEC Results Processing
vali_res_deepec_rxn = [f'{cfg.DIR_RES_BASELINE}results/ec_methods/deepec/fold_{item}.tsv' for item in range(1, 11)]
res_ec_deepec = read_csv_files(vali_res_deepec_rxn)
df_deepec_no_pred = process_no_res(res_ec_deepec, eckey='ec_deepec', rxnkey='reaction_deepec')
res_ec_deepec = apply_labels(res_ec_deepec, 'reaction_groundtruth', 'reaction_deepec', 'lb_rxn_groundtruth', 'lb_rxn_deepec', dict_rxn2id)
res_deepec_metrics = process_metrics(res_ec_deepec, 'deepec')
res_deepec_metrics = pd.concat([res_deepec_metrics, df_deepec_no_pred], axis=1)
res_deepec_fold_std = res_deepec_metrics[['mAccuracy', 'mPrecision', 'mRecall', 'mF1', 'no_prediction', 'ec_without_rxn']].agg(['mean', 'std'])
display_html_results(res_deepec_metrics, res_deepec_fold_std, 'DeepEC')

100%|██████████| 10/10 [01:53<00:00, 11.36s/it]


Evaluating: Reaction Predcition Results deepec_fold_1
Evaluating: Reaction Predcition Results deepec_fold_2
Evaluating: Reaction Predcition Results deepec_fold_3
Evaluating: Reaction Predcition Results deepec_fold_4
Evaluating: Reaction Predcition Results deepec_fold_5
Evaluating: Reaction Predcition Results deepec_fold_6
Evaluating: Reaction Predcition Results deepec_fold_7
Evaluating: Reaction Predcition Results deepec_fold_8
Evaluating: Reaction Predcition Results deepec_fold_9
Evaluating: Reaction Predcition Results deepec_fold_10


Unnamed: 0,baselineName,mAccuracy,mPrecision,mRecall,mF1,test_size,no_prediction,ec_without_rxn
0,fold_1,0.295666,0.678831,0.314152,0.29858,50858,30918,1572
1,fold_2,0.295076,0.631594,0.312436,0.296666,50858,30958,1543
2,fold_3,0.292324,0.792598,0.3113,0.295358,50858,30749,1673
3,fold_4,0.293621,0.554215,0.312943,0.297203,50858,30916,1587
4,fold_5,0.294782,0.796569,0.314072,0.297998,50858,30799,1580
5,fold_6,0.296394,0.787736,0.31437,0.298089,50858,30908,1588
6,fold_7,0.29252,0.693832,0.310283,0.29392,50858,30984,1600
7,fold_8,0.293798,0.796867,0.313444,0.297703,50858,30991,1569
8,fold_9,0.291439,0.797269,0.312742,0.296435,50858,30938,1594
9,fold_10,0.294526,0.790948,0.31444,0.298719,50858,31044,1537

Unnamed: 0,mAccuracy,mPrecision,mRecall,mF1,no_prediction,ec_without_rxn
mean,0.294015,0.732046,0.313018,0.297067,30920.5,1584.3
std,0.001574,0.087473,0.001385,0.001518,88.39589,37.381071


### 4.3 CLEAN

In [7]:
# Clean Results Processing
vali_res_clean_rxn = [f'{cfg.DIR_RES_BASELINE}results/ec_methods/clean/fold_{item}.tsv' for item in range(1, 11)]
res_clean = read_csv_files(vali_res_clean_rxn)
df_clean_no_pred = process_no_res(res_clean, eckey='ec_clean', rxnkey='reaction_clean')
res_clean = apply_labels(res_clean, 'reaction_groundtruth', 'reaction_clean', 'lb_rxn_groundtruth', 'lb_rxn_clean', dict_rxn2id)
res_clean_metrics = process_metrics(res_clean, 'clean')
res_clean_metrics = pd.concat([res_clean_metrics, df_clean_no_pred], axis=1)
res_clean_fold_std = res_clean_metrics[['mAccuracy', 'mPrecision', 'mRecall', 'mF1', 'no_prediction', 'ec_without_rxn']].agg(['mean', 'std'])
display_html_results(res_clean_metrics, res_clean_fold_std, 'Clean')

100%|██████████| 10/10 [01:50<00:00, 11.09s/it]


Evaluating: Reaction Predcition Results clean_fold_1
Evaluating: Reaction Predcition Results clean_fold_2
Evaluating: Reaction Predcition Results clean_fold_3
Evaluating: Reaction Predcition Results clean_fold_4
Evaluating: Reaction Predcition Results clean_fold_5
Evaluating: Reaction Predcition Results clean_fold_6
Evaluating: Reaction Predcition Results clean_fold_7
Evaluating: Reaction Predcition Results clean_fold_8
Evaluating: Reaction Predcition Results clean_fold_9
Evaluating: Reaction Predcition Results clean_fold_10


Unnamed: 0,baselineName,mAccuracy,mPrecision,mRecall,mF1,test_size,no_prediction,ec_without_rxn
0,fold_1,0.327303,0.811995,0.351246,0.299933,50858,0,8422
1,fold_2,0.326242,0.820704,0.351318,0.29937,50858,0,8454
2,fold_3,0.32693,0.815005,0.352811,0.300543,50858,0,8445
3,fold_4,0.328346,0.82355,0.352276,0.30067,50858,0,8359
4,fold_5,0.330607,0.821461,0.355162,0.303075,50858,0,8505
5,fold_6,0.324531,0.817712,0.350537,0.299149,50858,0,8526
6,fold_7,0.328444,0.816711,0.352898,0.300907,50858,0,8498
7,fold_8,0.329152,0.824149,0.354325,0.302759,50858,0,8381
8,fold_9,0.325219,0.657925,0.3534,0.302657,50858,0,8542
9,fold_10,0.330135,0.812376,0.354768,0.303487,50858,0,8550

Unnamed: 0,mAccuracy,mPrecision,mRecall,mF1,no_prediction,ec_without_rxn
mean,0.327691,0.802159,0.352874,0.301255,0.0,8468.2
std,0.002015,0.05086,0.001567,0.001607,0.0,66.769587


### 4.4 ECRECer

In [None]:
# ECRECer Results Processing
vali_res_ecrecer_rxn = [f'{cfg.DIR_RES_BASELINE}results/ec_methods/ecrecer/fold_{item}.tsv' for item in range(1, 11)]
res_ecrecer = read_csv_files(vali_res_ecrecer_rxn)
df_ecrecer_no_pred = process_no_res(res_ecrecer, eckey='ec_ecrecer', rxnkey='rxn_ecrecer')
res_ecrecer = apply_labels(res_ecrecer, 'reaction_groundtruth', 'rxn_ecrecer', 'lb_rxn_groundtruth', 'lb_rxn_ecrecer', dict_rxn2id)
# res_ecrecer_metrics = process_metrics(res_ecrecer, 'ecrecer')
# res_ecrecer_metrics = pd.concat([res_ecrecer_metrics, df_ecrecer_no_pred], axis=1)
# res_ecrecer_fold_std = res_ecrecer_metrics[['mAccuracy', 'mPrecision', 'mRecall', 'mF1', 'no_prediction', 'ec_without_rxn']].agg(['mean', 'std'])
# display_html_results(res_ecrecer_metrics, res_ecrecer_fold_std, 'ECRECer')

 10%|█         | 1/10 [00:11<01:39, 11.00s/it]

In [12]:
res_ecrecer[0]

Unnamed: 0,uniprot_id,reaction_groundtruth,ec_groundtruth,ec_ecrecer,rxn_ecrecer
0,Q9UYB6,-,-,-,-
1,C1AQW9,RHEA:19669,3.6.5.-,3.6.5.-,EC-WITHOUT-REACTION
2,P64647,-,-,-,-
3,Q9MTM3,RHEA:21248,2.7.7.6,2.7.7.6,RHEA:21248
4,P45894,RHEA:17989;RHEA:46608,2.7.11.1,2.7.11.1,RHEA:46608
...,...,...,...,...,...
50853,B2A826,-,-,-,-
50854,Q9SCB9,-,-,-,-
50855,P38647,-,-,-,-
50856,A3N2P1,RHEA:16585,2.8.1.8,2.8.1.8,RHEA:16585


In [3]:
method = ['ecblast', 'deepec', 'clean', 'ecrecer', 'ecpred', 'catfam', 'priam']
res_ec_ecblast = pd.read_csv(cfg.FILE_RESULTS_BLAST_EC, sep='\t')


res_ec_deepec = pd.read_csv(cfg.FILE_RESULTS_DEEPEC, sep='\t')
res_ec_catfam = pd.read_csv(cfg.FILE_RESULTS_CATFAM, sep='\t')
res_ec_ecpred = pd.read_csv(cfg.FILE_RESULTS_ECPRED, sep='\t')
res_ec_priam = pd.read_csv(cfg.FILE_RESULTS_PRIAM, sep='\t')
res_ec_clean = pd.read_csv(cfg.FILE_RESULTS_CLEAN, sep='\t')
res_ec_ecrecer = pd.read_csv(cfg.FILE_RESULTS_ECRECER, sep='\t')

res_method_ec = ds_test

for m in method:
    res_method_ec = res_method_ec.merge(
        globals()[f'res_ec_{m}'][['uniprot_id', f'ec_{m}', f'reaction_{m}']],
        on='uniprot_id',
        how='left'
    )
res_method_ec.head(3)

Unnamed: 0,uniprot_id,rxn_groundtruth,isenzyme_groundtruth,ec_groundtruth,ec_specific_level,lb_rxn_groundtruth,ec_ecblast,reaction_ecblast,ec_deepec,reaction_deepec,ec_clean,reaction_clean,ec_ecrecer,reaction_ecrecer,ec_ecpred,reaction_ecpred,ec_catfam,reaction_catfam,ec_priam,reaction_priam
0,A9JLI2,-,False,-,0,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",-,-,NO-PREDICTION,NO-PREDICTION,3.2.2.6;1.4.3.2;4.2.3.81,RHEA:31427;RHEA:16301;RHEA:13781,-,-,-,-,-,-,NO-PREDICTION,NO-PREDICTION
1,A9JLI3,-,False,-,0,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",-,-,NO-PREDICTION,NO-PREDICTION,4.6.1.18,EC-WITHOUT-REACTION,-,-,-,-,-,-,1.14.11.51;2.3.2.27,RHEA:49524
2,A9JLI5,-,False,-,0,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",-,-,NO-PREDICTION,NO-PREDICTION,1.4.3.2,RHEA:13781,-,-,-,-,-,-,6.5.1.3,EC-WITHOUT-REACTION


In [4]:
test_sample = len(ds_test)
#没有预测结果的
blast_no_predcition = len(res_method_ec[(res_method_ec.ec_ecblast.str.contains('NO-PREDICTION'))])
#有EC没反应的
blast_ec_without_reaction = len(res_method_ec[(res_method_ec.reaction_ecblast.str.contains('EC-WITHOUT-REACTION'))])

# 有EC没有反应统计
list_ec_no_rxn = list(set(res_method_ec[(res_method_ec.reaction_ecblast.str.contains('EC-WITHOUT-REACTION'))].ec_ecblast.values))
list_ec_no_rxn = set(_flatten([item.split(cfg.SPLITER) for item in list_ec_no_rxn]))
list_ec_no_rxn = pd.DataFrame(list(list_ec_no_rxn), columns=['ec_number'])

num_ec_no_rxn = len(list_ec_no_rxn)

ec_no_rxn_incomplete = set(list_ec_no_rxn[list_ec_no_rxn.ec_number.str.contains('-')].ec_number)
ec_no_rxn_complete = set(list_ec_no_rxn[~list_ec_no_rxn.ec_number.str.contains('-')].ec_number)
num_ec_no_rxn_incomplete =len(ec_no_rxn_incomplete)
num_ec_no_rxn_complete =len(ec_no_rxn_complete)


print(f"Test sample: {test_sample}")
print(f"Blast NO-PREDICTION: {blast_no_predcition} ({blast_no_predcition/test_sample:.2%})")
print(f'''Blast EC without Reaction: {blast_ec_without_reaction} ({blast_ec_without_reaction/test_sample:.2%}), 
      Involves: {num_ec_no_rxn} distinct ECs, Incomplete ECs: {num_ec_no_rxn_incomplete} ({num_ec_no_rxn_incomplete/num_ec_no_rxn:.2%}) e.g.{list(ec_no_rxn_incomplete)[:10]}, 
      Complete ECs: {num_ec_no_rxn_complete} ({num_ec_no_rxn_complete/num_ec_no_rxn:.2%}) e.g.{list(ec_no_rxn_complete)[:10]}''')


Test sample: 13515
Blast NO-PREDICTION: 4280 (31.67%)
Blast EC without Reaction: 1278 (9.46%), 
      Involves: 154 distinct ECs, Incomplete ECs: 75 (48.70%) e.g.['3.5.1.-', '5.5.1.-', '3.1.21.-', '2.4.2.-', '2.8.1.-', '2.8.3.-', '1.3.1.-', '1.1.1.-', '1.14.18.-', '2.3.1.-'], 
      Complete ECs: 79 (51.30%) e.g.['1.6.5.11', '1.14.13.28', '1.14.13.79', '1.6.99.3', '3.6.3.16', '1.14.11.19', '1.10.2.2', '1.14.13.174', '3.3.1.1', '1.14.13.123']


## 5.Evaluation Results

### 5.1 isEnzyme prediction

In [16]:
print('isEnzyme Predcition Results - EC METHODs')
method = ['ecblast', 'deepec', 'clean', 'ecrecer', 'ecpred', 'catfam', 'priam']
resl = []
for m in method:
    res_item = btools.eva_isenzyme(baselineName=m, res_df=res_method_ec, category='ec')
    resl.append(res_item)
    
resl=pd.DataFrame(resl, columns=['baselineName', 'Accuracy', 'Precision', 'Recall', 'PPV(Sensitivity)', 'NPV(Specificity)', 'F1', 'TP', 'FP', 'FN', 'TN', 'UP', 'UN'])

resl.sort_values(by=['F1'], ascending=False).reset_index(drop=True)

isEnzyme Predcition Results - EC METHODs


Unnamed: 0,baselineName,Accuracy,Precision,Recall,PPV(Sensitivity),NPV(Specificity),F1,TP,FP,FN,TN,UP,UN
0,ecrecer,0.789493,0.532835,0.911388,0.532835,0.964646,0.672499,2921,2561,284,7749,0,0
1,ecpred,0.597839,0.346035,0.631643,0.346035,0.703467,0.447122,2579,3930,560,5499,65,879
2,catfam,0.810285,0.749416,0.300468,0.749416,0.81668,0.428953,963,322,2242,9988,0,0
3,clean,0.237144,0.237144,1.0,0.237144,0.0,0.383373,3205,10310,0,0,0,0
4,ecblast,0.547244,0.302502,0.343502,0.302502,0.37037,0.321701,2466,1406,433,4930,306,3974
5,priam,0.218498,0.218498,0.397069,0.218498,0.0,0.281882,2953,6078,0,0,252,4232
6,deepec,0.090196,0.090196,0.093446,0.090196,0.0,0.091792,1219,470,0,0,1986,9840


In [74]:
fig_data = resl.sort_values(by=['F1']).set_index('baselineName').T.head(6)


plotdata = []
for method in fig_data.columns.values:
    plotdata.append(go.Bar(
        x=fig_data.index, 
        y=fig_data[f'{method}'], 
        name=f'{method}',
        text=fig_data[f'{method}'],
        textposition='auto',
        texttemplate='%{text:.2f}',  # 格式化数值标签 (保留两位小数)
        ))

fig = go.Figure(
    data=plotdata,
    layout=dict(
        barcornerradius=10,
        
    ),
)

fig.show()

### 5.2 Reaction prediction

In [6]:
# Make Label
res_method_ec['lb_rxn_blast'] = res_method_ec.reaction_ecblast.apply(lambda x: btools.make_label(reaction_id=str(x), rxn_label_dict=dict_rxn2id))
res_method_ec['lb_rxn_deepec'] = res_method_ec.reaction_deepec.apply(lambda x: btools.make_label(reaction_id=str(x), rxn_label_dict=dict_rxn2id))
res_method_ec['lb_rxn_clean'] = res_method_ec.reaction_clean.apply(lambda x: btools.make_label(reaction_id=str(x), rxn_label_dict=dict_rxn2id))
res_method_ec['lb_rxn_ecrecer'] = res_method_ec.reaction_ecrecer.apply(lambda x: btools.make_label(reaction_id=str(x), rxn_label_dict=dict_rxn2id))
res_method_ec['lb_rxn_ecpred'] = res_method_ec.reaction_ecpred.apply(lambda x: btools.make_label(reaction_id=str(x), rxn_label_dict=dict_rxn2id))
res_method_ec['lb_rxn_catfam'] = res_method_ec.reaction_catfam.apply(lambda x: btools.make_label(reaction_id=str(x), rxn_label_dict=dict_rxn2id))
res_method_ec['lb_rxn_priam'] = res_method_ec.reaction_priam.apply(lambda x: btools.make_label(reaction_id=str(x), rxn_label_dict=dict_rxn2id))

#full
metrics_rxn_fullset = btools.rxn_eva_metric(eva_df=res_method_ec, eva_name='[FULL SET]', methods=methods)
#noneenzyme
res_noneenzyme = res_method_ec[res_method_ec.rxn_groundtruth=='-'].reset_index(drop=True)
metrics_rxn_noneenzyme = btools.rxn_eva_metric(eva_df=res_noneenzyme, eva_name='[NONE ENZYME]', methods=methods)
#enzyme
res_enzyme = res_method_ec[res_method_ec.rxn_groundtruth!='-'].reset_index(drop=True)
metrics_rxn_enzyme = btools.rxn_eva_metric(eva_df=res_enzyme, eva_name='[ENZYME]', methods=methods)

Evaluating: Reaction Predcition Results [FULL SET]
Evaluating: Reaction Predcition Results [NONE ENZYME]
Evaluating: Reaction Predcition Results [ENZYME]


In [11]:
#show results
HTML(f'''
     <div style="float:left; width: 25%;"><h2>Reaction Predcition Results <span style="color:red"> [FULL SET]</span></h2>{metrics_rxn_fullset.sort_values(by=['mF1'], ascending=False).reset_index(drop=True).to_html()} </div>
     <div style="float:left; width: 25%;"><h2>Reaction Predcition Results <span style="color:red"> [NONE-ENZYME]</span></h2>{metrics_rxn_noneenzyme.sort_values(by=['mF1'], ascending=False).reset_index(drop=True).to_html()} </div>
     <div style="float:left; width: 25%;"><h2>Reaction Predcition Results <span style="color:red"> [ENZYME]</span></h2>{metrics_rxn_enzyme.sort_values(by=['mF1'], ascending=False).reset_index(drop=True).to_html()} </div>
     ''')

Unnamed: 0,baselineName,mAccuracy,mPrecision,mRecall,mF1
0,ecrecer,0.6899,0.967961,0.651723,0.714824
1,catfam,0.770477,0.870005,0.670351,0.620349
2,blast,0.43337,0.907868,0.40919,0.504355
3,ecpred,0.420792,0.926196,0.36349,0.455263
4,clean,0.078949,0.879031,0.145483,0.097747
5,priam,0.017832,0.853372,0.160137,0.075407
6,deepec,0.03485,0.9784,0.052406,0.059727

Unnamed: 0,baselineName,mAccuracy,mPrecision,mRecall,mF1
0,catfam,0.968768,1.0,0.968768,0.984136
1,ecrecer,0.7516,1.0,0.7516,0.858187
2,ecpred,0.53356,1.0,0.53356,0.695845
3,blast,0.478177,1.0,0.478177,0.646982
4,deepec,0.0,1.0,0.0,0.0
5,clean,0.0,1.0,0.0,0.0
6,priam,0.0,1.0,0.0,0.0

Unnamed: 0,baselineName,mAccuracy,mPrecision,mRecall,mF1
0,ecrecer,0.49142,0.979519,0.474029,0.486309
1,clean,0.332917,0.8617,0.404314,0.389627
2,blast,0.289236,0.931659,0.286454,0.297081
3,priam,0.075195,0.692765,0.445039,0.289674
4,deepec,0.146958,0.964808,0.145643,0.171148
5,catfam,0.132605,0.978662,0.139431,0.15227
6,ecpred,0.058034,0.977168,0.060915,0.073204
