## 反应直接预测结果分析
> 2024-11-08

### 1. 导入必要的包

In [1]:
# Standard Library Imports
import os
import sys

# Third-party Imports
import json
import numpy as np
import pandas as pd
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor, as_completed
import plotly.graph_objects as go
from IPython.display import HTML
from pandarallel import pandarallel  # Importing pandarallel for parallel processing

# Setting up the path for the module
sys.path.insert(0, os.path.dirname(os.path.realpath('__file__')))
sys.path.insert(1, '../')

# Local Imports
from config import conf as cfg
from tools import btools
import evTools
# Initialize parallel processing
pandarallel.initialize(progress_bar=False)

# Enable autoreloading of modules in IPython
%load_ext autoreload
%autoreload 2

INFO: Pandarallel will run on 128 workers.
INFO: Pandarallel will use Memory file system to transfer data between the main process and workers.


### 2. 加载测试数据集

In [2]:
# Read CSV files serially
def read_csv_files(file_paths):
    return [pd.read_csv(file, sep='\t') for file in file_paths]

# Function to get ec_rxn_nores
def get_ec_rxn_nores(pred_detail,  rxnkey):

    no_prediction = len(pred_detail[(pred_detail[rxnkey].str.contains('NO-PREDICTION'))])
    return [len(pred_detail), no_prediction]

def process_no_res(res_list,  rxnkey):
    return pd.DataFrame([get_ec_rxn_nores(pred_detail=res_list[item], rxnkey=rxnkey) for item in range(10)], 
                        columns=['test_size', 'no_prediction'])

# Make one-hot encoding label for each prediction
def make_labels(resdf, src_col1, src_col2, lb1, lb2, rxn_label_dict):
    resdf[[lb1, lb2]] = resdf.apply(
        lambda row: pd.Series({
            lb1: btools.make_label(reaction_id=str(row[src_col1]), rxn_label_dict=rxn_label_dict),
            lb2: btools.make_label(reaction_id=str(row[src_col2]), rxn_label_dict=rxn_label_dict)
        }), axis=1
    )
    return resdf

def apply_labels(res_list, src_col1, src_col2, lb1, lb2, rxn_label_dict):
    for i in tqdm(range(10)):
        res_list[i] = make_labels(resdf=res_list[i], src_col1=src_col1, src_col2=src_col2, lb1=lb1, lb2=lb2, rxn_label_dict=rxn_label_dict)
    return res_list


# Function to calculate metrics
def calculate_metrics(eva_df, ground_truth_col, pred_col, eva_name):
    res =  btools.rxn_eva_metric_with_colName(eva_df=eva_df, col_groundtruth=ground_truth_col, col_pred=pred_col, eva_name=eva_name)
    return res

# 多线程运行评价函数
def calculate_metrics_parallel(res_unirep, ground_truth_col, pred_col, max_workers=None):
    def run_metric_evaluation(index):
        return calculate_metrics(eva_df=res_unirep[index], ground_truth_col=ground_truth_col, pred_col=pred_col, eva_name=f'fold{index + 1}')
    
    results = [None] * len(res_unirep)
    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        futures = {
            executor.submit(run_metric_evaluation, i): i
            for i in range(len(res_unirep))
        }
        for future in as_completed(futures):
            i = futures[future]
            results[i] = future.result()
            
    results = pd.concat(results,axis=0).reset_index(drop=True)
    
    return results

def get_simi_Pred(pred_list, uniprot_rxn_dict, topk=3):
    uniprot_id_list = [item[0] for item in pred_list][:topk]
    rxn_ids = [uniprot_rxn_dict.get(uniprot_id) for uniprot_id in uniprot_id_list]
    rxn_res = (cfg.SPLITER).join(set(rxn_ids))
    return rxn_res

# Function to display results as HTML
def display_html_results(metrics, fold_std, eva_name):
    return HTML(f"""
         <div style="float:left; width:900px;">
              <h2 style='color:blue'>{eva_name} Evaluation 10 Fold Details</h2>
              {metrics.to_html()}
         </div>
         <div  style="float:left; width:600px;" >
              <h2 style='color:blue' >{eva_name} Evaluation 10 Fold Overview</h2>
                   {fold_std.to_html()}
         </div>
         """)

In [3]:
# 从 JSON 文件加载反应编码字典
with open(cfg.FILE_DS_DICT_RXN2ID, "r") as json_file:
    dict_rxn2id = json.load(json_file)
    print(f'加载反应编码字典完成，共有 {len(dict_rxn2id)} 个反应。')  # 打印加载的数据
    
print('Loading validation datasets feather path ...')
vali_feather_files = [
    f'{cfg.DIR_DATASET}validation/fold{fold_index}/valid.feather' 
    for fold_index in range(1, 11)
]

# load datasets
ds_test =[pd.read_feather(vali_feather_files[item])[['uniprot_id', 'reaction_id']].rename(columns={'reaction_id': 'rxn_groundtruth'}) for item in tqdm(range(10))]


def read_h5_file(file_path):
    with pd.HDFStore(file_path, 'r') as h5:
        data = h5['data']
    return data

print('Loading uniprot_rxn_dict ...' )
d1 = pd.read_feather(cfg.FILE_DS_TRAIN)
d2 = pd.read_feather(cfg.FILE_DS_TEST)
uniprot_rxn_dict = pd.concat([d1,d2], axis=0).reset_index(drop=True)[['uniprot_id', 'reaction_id']].set_index('uniprot_id')['reaction_id'].to_dict()


加载反应编码字典完成，共有 10479 个反应。
Loading validation datasets feather path ...


100%|██████████| 10/10 [00:05<00:00,  1.99it/s]


Loading uniprot_rxn_dict ...


## 4. Load results from EC based method

### 4.1 Blast

In [4]:
std_blast, metrics_blast, ec_no_rxn_blast  = evTools.get_eval_results(baselineName='blast', dict_rxn2id=dict_rxn2id, method_type='direct')
evTools.display_html_results(metrics = metrics_blast, std_mean = std_blast, no_pred=ec_no_rxn_blast, eva_name ='Blast-direct')

Calculating metrics ...
Calculating mean and std ...
Statistic ec no prediction and ec with no reaction ...


Unnamed: 0,baselineName,runFold,mAccuracy,mPrecision,mRecall,mF1,avgType
0,blast,1,0.85,0.982154,0.965517,0.973692,weighted
1,blast,1,0.85,0.823529,0.965517,0.888889,micro
2,blast,1,0.85,0.997897,0.999994,0.997896,macro
3,blast,1,0.85,0.9025,0.96,0.919429,samples
4,blast,2,0.76,0.926939,0.956522,0.932271,weighted
5,blast,2,0.76,0.491071,0.956522,0.648968,micro
6,blast,2,0.76,0.990018,0.999897,0.990003,macro
7,blast,2,0.76,0.835633,0.95,0.862307,samples
8,blast,3,0.81,0.943844,0.964286,0.953178,weighted
9,blast,3,0.81,0.610169,0.964286,0.747405,micro

Unnamed: 0,baselineName,avgType,Metric,mean,std
0,blast,macro,mAccuracy,0.828,0.037357
1,blast,macro,mF1,0.994924,0.002831
2,blast,macro,mPrecision,0.994981,0.002829
3,blast,macro,mRecall,0.999917,8.6e-05
4,blast,micro,mAccuracy,0.828,0.037357
5,blast,micro,mF1,0.789212,0.080942
6,blast,micro,mPrecision,0.677126,0.110561
7,blast,micro,mRecall,0.958911,0.014166
8,blast,samples,mAccuracy,0.828,0.037357
9,blast,samples,mF1,0.898099,0.026283

Unnamed: 0,run_fold,test_size,no_prediction_count
0,1,50858,2424
1,2,50858,2386
2,3,50858,2379
3,4,50858,2301
4,5,50858,2418
5,6,50858,2391
6,7,50858,2369
7,8,50858,2381
8,9,50858,2449
9,10,50858,2431


### 4.2 Unirep

In [28]:
std_unirep, metrics_unirep, ec_no_rxn_unirep  = evTools.get_eval_results(baselineName='unirep', dict_rxn2id=dict_rxn2id, method_type='direct')
evTools.display_html_results(metrics = metrics_unirep, std_mean = std_unirep, no_pred=ec_no_rxn_unirep, eva_name ='Unirep')

Labeling ...
{'rxn_groundtruth': 'lb_rxn_groundtruth', 'rxn_unirep_euclidean': 'lb_rxn_unirep_euclidean'}


100%|██████████| 10/10 [00:15<00:00,  1.58s/it]


Labeling Done!
Calculating metrics ...


  0%|          | 0/10 [00:00<?, ?it/s]


KeyError: 'lb_rxn_unirep'

In [7]:
embd_methd = 'unirep'
file_res_unirep = [f'{cfg.RESULTS_DIR}simi/fold_{fold_num}_{embd_methd}_results.h5' for fold_num in range(1,11)]

In [10]:
read_h5_file(file_res_unirep[0])

Unnamed: 0,uniprot_id,reaction_id,ec_number,euclidean,cosine
0,Q9UYB6,-,-,"[(O58185, 1.067479), (Q8U3J1, 1.1344), (O57947...","[(O58185, 0.974287), (Q8U3J1, 0.970858), (O579..."
1,C1AQW9,RHEA:19669,3.6.5.-,"[(P65270, 0.0), (A1KL96, 0.0), (A5U598, 0.0), ...","[(P65270, 1.0), (A1KL96, 1.0), (A5U598, 1.0), ..."
2,P64647,-,-,"[(P64648, 0.0), (P64646, 0.0), (P64453, 1.3582...","[(P64648, 1.0), (P64646, 1.0), (P64453, 0.9711..."
3,Q9MTM3,RHEA:21248,2.7.7.6,"[(B0Z545, 0.018057), (B0Z5C9, 0.04094), (B0Z4W...","[(B0Z545, 0.999991), (B0Z5C9, 0.999955), (B0Z4..."
4,P45894,RHEA:17989;RHEA:46608,2.7.11.1,"[(Q09137, 0.777959), (P54646, 0.786766), (Q289...","[(Q09137, 0.983195), (P54646, 0.982973), (Q289..."
...,...,...,...,...,...
50853,B2A826,-,-,"[(B2V1Z8, 1.213546), (B2TLR7, 1.219303), (B9E3...","[(B2V1Z8, 0.963565), (B2TLR7, 0.963063), (B9E3..."
50854,Q9SCB9,-,-,"[(A9NAJ6, 1.075184), (Q83EV3, 1.084887), (B6J2...","[(A9NAJ6, 0.974213), (Q83EV3, 0.973735), (B6J2..."
50855,P38647,-,-,"[(O35501, 0.103155), (P38646, 0.104681), (Q3ZC...","[(O35501, 0.999855), (P38646, 0.99985), (Q3ZCH..."
50856,A3N2P1,RHEA:16585,2.8.1.8,"[(B3H2J0, 0.0), (B0BRR9, 0.068894), (Q7VKB1, 0...","[(B3H2J0, 1.0), (B0BRR9, 0.999911), (Q7VKB1, 0..."


In [None]:
embd_methd = 'unirep'
file_res_unirep = [f'{cfg.RESULTS_DIR}simi/fold_{fold_num}_{embd_methd}_results.h5' for fold_num in range(1,11)]
res_unirep = [read_h5_file(item)for item in tqdm(file_res_unirep)]

# 获取反应ID
for i in tqdm(range(10)):
    res_unirep[i]['rxn_euclidean'] = res_unirep[i].euclidean.apply(lambda x : get_simi_Pred(pred_list=x, uniprot_rxn_dict=uniprot_rxn_dict))
    res_unirep[i]['rxn_cosine'] = res_unirep[i].cosine.apply(lambda x : get_simi_Pred(pred_list=x, uniprot_rxn_dict=uniprot_rxn_dict))
    
    

# 将反应ID标签化    
res_unirep = apply_labels(res_unirep, 'reaction_id', 'rxn_euclidean', 'lb_rxn_groundtruth', 'lb_rxn_unirep_euclidean', dict_rxn2id)
for i in tqdm(range(10)):
    res_unirep[i]['lb_rxn_unirep_cosine'] = res_unirep[i].rxn_cosine.parallel_apply(lambda x :btools.make_label(reaction_id=x, rxn_label_dict=dict_rxn2id))
    

# 计算评价指标
res_unirep_euclidean_metrics = calculate_metrics_parallel(res_unirep=res_unirep, ground_truth_col='lb_rxn_groundtruth', pred_col='lb_rxn_unirep_euclidean', max_workers=15)
res_unirep_cosine_metrics = calculate_metrics_parallel(res_unirep=res_unirep, ground_truth_col='lb_rxn_groundtruth', pred_col='lb_rxn_unirep_cosine', max_workers=15)


res_unirep_euclidean_metrics['baselineName'] = 'unirep_eu'
res_unirep_cosine_metrics['baselineName'] = 'unirep_cos'
res_unirep_euclidean_metrics['runFold'] = res_unirep.index+1
res_unirep_cosine_metrics['runFold'] = res_unirep.index+1

res_unirep = pd.concat([res_unirep_euclidean_metrics, res_unirep_cosine_metrics], axis=0).reset_index(drop=True)
res_unirep.to_feather(f'{cfg.DIR_PROJECT_ROOT}/evaluation/data/res_unirep_metrics.feather')


# res_unirep_euclidean_metrics.to_feather('/hpcfs/fhome/shizhenkun/codebase/RXNRECer/evaluation/data/res_unirep_euclidean_metrics.feather')
# res_unirep_cosine_metrics.to_feather('/hpcfs/fhome/shizhenkun/codebase/RXNRECer/evaluation/data/res_unirep_cosine_metrics.feather')

res_unirep_euclidean_fold_std = res_unirep_euclidean_metrics[['mAccuracy', 'mPrecision', 'mRecall', 'mF1']].agg(['mean', 'std'])
res_unirep_cosine_fold_std = res_unirep_cosine_metrics[['mAccuracy', 'mPrecision', 'mRecall', 'mF1']].agg(['mean', 'std'])


  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

In [None]:
HTML(f"""
         <div style="float:left; width:600px;">
              <h2 style='color:blue'>{'Unirep (Euclidean) Evaluation 10 Fold Details'} Evaluation 10 Fold Details</h2>
              {res_unirep_euclidean_metrics.to_html()}
         </div>
         <div  style="float:left; width:600px;" >
              <h2 style='color:blue' >{'Unirep (Euclidean) Evaluation 10 Fold Overview'} Evaluation 10 Fold Overview</h2>
                   {res_unirep_euclidean_fold_std.to_html()}
         </div>
         
        <div style="float:left; display:block; width:600px;">
              <h2 style='color:blue'>{'Unirep (Cosine) Evaluation 10 Fold Details'} Evaluation 10 Fold Details</h2>
              {res_unirep_cosine_metrics.to_html()}
         </div>
         <div  style="float:left; width:600px;" >
              <h2 style='color:blue' >{'Unirep (Cosine) Evaluation 10 Fold Overview'} Evaluation 10 Fold Overview</h2>
                   {res_unirep_cosine_fold_std.to_html()}
         </div>
         """)

Unnamed: 0,baselineName,mAccuracy,mPrecision,mRecall,mF1
0,fold1,0.889575,0.892769,0.963311,0.915861
0,fold2,0.889595,0.892241,0.960985,0.913871
0,fold3,0.888946,0.88975,0.961411,0.913392
0,fold4,0.891345,0.894308,0.96068,0.915687
0,fold5,0.887687,0.893174,0.960435,0.914768
0,fold6,0.889437,0.891764,0.962008,0.914212
0,fold7,0.891325,0.893932,0.961476,0.9151
0,fold8,0.891895,0.894992,0.963322,0.917196
0,fold9,0.887353,0.890655,0.962801,0.914369
0,fold10,0.88985,0.893834,0.961377,0.915201

Unnamed: 0,mAccuracy,mPrecision,mRecall,mF1
mean,0.889701,0.892742,0.961781,0.914966
std,0.001506,0.001661,0.001047,0.001106

Unnamed: 0,baselineName,mAccuracy,mPrecision,mRecall,mF1
0,fold1,0.890656,0.89393,0.964254,0.916958
0,fold2,0.890578,0.894111,0.961342,0.914934
0,fold3,0.890696,0.890728,0.961991,0.914388
0,fold4,0.892249,0.895324,0.961105,0.916438
0,fold5,0.888356,0.894634,0.961083,0.915965
0,fold6,0.890578,0.894673,0.962042,0.916023
0,fold7,0.892642,0.895804,0.961767,0.916512
0,fold8,0.893134,0.897074,0.963579,0.918411
0,fold9,0.888474,0.890809,0.962595,0.914526
0,fold10,0.890243,0.894445,0.962025,0.915833

Unnamed: 0,mAccuracy,mPrecision,mRecall,mF1
mean,0.890761,0.894153,0.962178,0.915999
std,0.00159,0.002005,0.00104,0.001208


### 4.3 ESM

In [None]:
embd_methd = 'esm'
file_res_esm = [f'{cfg.RESULTS_DIR}simi/fold_{fold_num}_{embd_methd}_results.h5' for fold_num in range(1,11)]
res_esm = [read_h5_file(item)for item in tqdm(file_res_esm)]

# 获取反应ID
for i in tqdm(range(10)):
    # res_esm[i]['rxn_euclidean'] = res_esm[i].euclidean.apply(lambda x : uniprot_rxn_dict.get(x[0][0]))
    res_esm[i]['rxn_euclidean'] = res_esm[i].euclidean.apply(lambda x : get_simi_Pred(pred_list=x, uniprot_rxn_dict=uniprot_rxn_dict, topk=6))
    res_esm[i]['rxn_cosine'] = res_esm[i].cosine.apply(lambda x : get_simi_Pred(pred_list=x, uniprot_rxn_dict=uniprot_rxn_dict, topk=6))

# 将反应ID标签化    
res_esm = apply_labels(res_esm, 'reaction_id', 'rxn_euclidean', 'lb_rxn_groundtruth', 'lb_rxn_esm_euclidean', dict_rxn2id)
for i in tqdm(range(10)):
    res_esm[i]['lb_rxn_esm_cosine'] = res_esm[i].rxn_cosine.parallel_apply(lambda x :btools.make_label(reaction_id=x, rxn_label_dict=dict_rxn2id))
    
# 计算评价指标
res_esm_euclidean_metrics = calculate_metrics_parallel(res_unirep=res_esm, ground_truth_col='lb_rxn_groundtruth', pred_col='lb_rxn_esm_euclidean', max_workers=15)
res_esm_cosine_metrics = calculate_metrics_parallel(res_unirep=res_esm, ground_truth_col='lb_rxn_groundtruth', pred_col='lb_rxn_esm_cosine', max_workers=15)


res_esm_euclidean_metrics['baselineName'] = 'esm_eu'
res_unirep_cosine_metrics['baselineName'] = 'esm_cos'

res_esm_euclidean_metrics['runFold'] = res_esm_euclidean_metrics.index+1
res_esm_cosine_metrics['runFold'] = res_esm_cosine_metrics.index+1

res_esm = pd.concat([res_esm_euclidean_metrics, res_esm_cosine_metrics], axis=0).reset_index(drop=True)
res_esm.to_feather(f'{cfg.DIR_PROJECT_ROOT}/evaluation/data/res_esm_metrics.feather')

# res_esm_euclidean_metrics.to_feather('/hpcfs/fhome/shizhenkun/codebase/RXNRECer/evaluation/data/res_esm_euclidean_metrics.feather')
# res_esm_cosine_metrics.to_feather('/hpcfs/fhome/shizhenkun/codebase/RXNRECer/evaluation/data/res_esm_cosine_metrics.feather')
res_esm_euclidean_fold_std = res_esm_euclidean_metrics[['mAccuracy', 'mPrecision', 'mRecall', 'mF1']].agg(['mean', 'std'])
res_esm_cosine_fold_std = res_esm_cosine_metrics[['mAccuracy', 'mPrecision', 'mRecall', 'mF1']].agg(['mean', 'std'])

  0%|          | 0/10 [00:00<?, ?it/s]

100%|██████████| 10/10 [10:12<00:00, 61.29s/it]
100%|██████████| 10/10 [00:15<00:00,  1.51s/it]
100%|██████████| 10/10 [03:09<00:00, 18.98s/it]
100%|██████████| 10/10 [23:22<00:00, 140.22s/it]


NameError: name 'res_unirep_cosine_metrics' is not defined

In [10]:
res_esm_euclidean_metrics

Unnamed: 0,baselineName,mAccuracy,mPrecision,mRecall,mF1,runFold
0,esm_eu,0.887491,0.882065,0.984269,0.918213,1
0,esm_eu,0.886095,0.881635,0.982224,0.915974,1
0,esm_eu,0.886508,0.880745,0.982181,0.916297,1
0,esm_eu,0.889654,0.883257,0.982297,0.917988,1
0,esm_eu,0.888454,0.883698,0.983287,0.918886,1
0,esm_eu,0.888081,0.882835,0.983223,0.917706,1
0,esm_eu,0.889083,0.884422,0.982791,0.918351,1
0,esm_eu,0.888061,0.882121,0.984369,0.917552,1
0,esm_eu,0.886252,0.880343,0.983591,0.91646,1
0,esm_eu,0.885819,0.880933,0.982683,0.916066,1


In [7]:
HTML(f"""
         <div style="float:left; width:600px;">
              <h2 style='color:blue'>{'ESM (Euclidean) Evaluation 10 Fold Details'} Evaluation 10 Fold Details</h2>
              {res_esm_euclidean_metrics.to_html()}
         </div>
         <div  style="float:left; width:600px;" >
              <h2 style='color:blue' >{'ESM (Euclidean) Evaluation 10 Fold Overview'} Evaluation 10 Fold Overview</h2>
                   {res_esm_euclidean_fold_std.to_html()}
         </div>
         
        <div style="float:left; display:block; width:600px;">
              <h2 style='color:blue'>{'ESM (Cosine) Evaluation 10 Fold Details'} Evaluation 10 Fold Details</h2>
              {res_esm_cosine_metrics.to_html()}
         </div>
         <div  style="float:left; width:600px;" >
              <h2 style='color:blue' >{'ESM (Cosine) Evaluation 10 Fold Overview'} Evaluation 10 Fold Overview</h2>
                   {res_esm_cosine_fold_std.to_html()}
         </div>
         """)

Unnamed: 0,baselineName,mAccuracy,mPrecision,mRecall,mF1,runFold
0,esm_eu,0.887491,0.882065,0.984269,0.918213,1
0,esm_eu,0.886095,0.881635,0.982224,0.915974,1
0,esm_eu,0.886508,0.880745,0.982181,0.916297,1
0,esm_eu,0.889654,0.883257,0.982297,0.917988,1
0,esm_eu,0.888454,0.883698,0.983287,0.918886,1
0,esm_eu,0.888081,0.882835,0.983223,0.917706,1
0,esm_eu,0.889083,0.884422,0.982791,0.918351,1
0,esm_eu,0.888061,0.882121,0.984369,0.917552,1
0,esm_eu,0.886252,0.880343,0.983591,0.91646,1
0,esm_eu,0.885819,0.880933,0.982683,0.916066,1

Unnamed: 0,mAccuracy,mPrecision,mRecall,mF1
mean,0.88755,0.882205,0.983091,0.917349
std,0.001334,0.001342,0.000802,0.00106

Unnamed: 0,baselineName,mAccuracy,mPrecision,mRecall,mF1,runFold
0,fold1,0.891109,0.88514,0.9848,0.921256,1
0,fold2,0.889909,0.884424,0.982394,0.918346,1
0,fold3,0.890381,0.883653,0.982539,0.918754,1
0,fold4,0.892839,0.886935,0.982893,0.921122,1
0,fold5,0.892819,0.886306,0.983253,0.921114,1
0,fold6,0.892485,0.88647,0.983377,0.920763,1
0,fold7,0.892721,0.887791,0.982808,0.921154,1
0,fold8,0.892131,0.885634,0.984454,0.920569,1
0,fold9,0.889378,0.882434,0.983916,0.918471,1
0,fold10,0.890774,0.885339,0.983143,0.919844,1

Unnamed: 0,mAccuracy,mPrecision,mRecall,mF1
mean,0.891455,0.885413,0.983358,0.920139
std,0.001305,0.001597,0.000801,0.001191


### 4.4 T5

In [None]:
def calculate_metrics(model_name, distance_metric, data):

    # Calculate metrics for a specific model using a given distance metric
    print(f"Calculating metrics for {model_name} ({distance_metric})...")
    # Your implementation here
    pass

# 5. 整合指标

In [19]:
res_metrics_blast  = pd.read_feather('/hpcfs/fhome/shizhenkun/codebase/RXNRECer/evaluation/data/res_blast_direct_metrics.feather')
res_metrics_unirep_eu = pd.read_feather('/hpcfs/fhome/shizhenkun/codebase/RXNRECer/evaluation/data/res_unirep_euclidean_metrics.feather')
res_metrics_unirep_cos = pd.read_feather('/hpcfs/fhome/shizhenkun/codebase/RXNRECer/evaluation/data/res_unirep_cosine_metrics.feather')
res_metrics_esm_eu = pd.read_feather('/hpcfs/fhome/shizhenkun/codebase/RXNRECer/evaluation/data/res_esm_euclidean_metrics.feather')
res_metrics_esm_cos = pd.read_feather('/hpcfs/fhome/shizhenkun/codebase/RXNRECer/evaluation/data/res_esm_cosine_metrics.feather')
res_metrics_t5_eu = pd.read_feather('/hpcfs/fhome/shizhenkun/codebase/RXNRECer/evaluation/data/res_t5_euclidean_metrics.feather')
res_metrics_t5_cos = pd.read_feather('/hpcfs/fhome/shizhenkun/codebase/RXNRECer/evaluation/data/res_t5_cosine_metrics.feather')

res_metrics = pd.concat([res_metrics_blast, res_metrics_unirep_eu, res_metrics_unirep_cos, res_metrics_esm_eu, res_metrics_esm_cos, res_metrics_t5_eu, res_metrics_t5_cos], axis=0).reset_index(drop=True)

In [27]:
res_metrics_blast

Unnamed: 0,baselineName,mAccuracy,mPrecision,mRecall,mF1,test_size,no_prediction,runFold
0,Blast_direct,0.831649,0.879541,0.950236,0.895765,50858,2424,1
1,Blast_direct,0.832711,0.876034,0.949407,0.892882,50858,2386,2
2,Blast_direct,0.832416,0.877199,0.951146,0.894549,50858,2379,3
3,Blast_direct,0.834008,0.880373,0.951624,0.897268,50858,2301,4
4,Blast_direct,0.83391,0.879803,0.950015,0.896814,50858,2418,5
5,Blast_direct,0.835876,0.882204,0.950201,0.897997,50858,2391,6
6,Blast_direct,0.832593,0.877078,0.949912,0.894182,50858,2369,7
7,Blast_direct,0.832691,0.88234,0.950903,0.897967,50858,2381,8
8,Blast_direct,0.830076,0.877064,0.949523,0.894232,50858,2449,9
9,Blast_direct,0.831,0.877942,0.950009,0.894861,50858,2431,10


In [21]:
res_metrics_unirep_eu

Unnamed: 0,baselineName,mAccuracy,mPrecision,mRecall,mF1
0,fold1,0.889575,0.892769,0.963311,0.915861
0,fold2,0.889595,0.892241,0.960985,0.913871
0,fold3,0.888946,0.88975,0.961411,0.913392
0,fold4,0.891345,0.894308,0.96068,0.915687
0,fold5,0.887687,0.893174,0.960435,0.914768
0,fold6,0.889437,0.891764,0.962008,0.914212
0,fold7,0.891325,0.893932,0.961476,0.9151
0,fold8,0.891895,0.894992,0.963322,0.917196
0,fold9,0.887353,0.890655,0.962801,0.914369
0,fold10,0.88985,0.893834,0.961377,0.915201
