In [1]:
import pandas as pd

results = pd.read_feather('../Figure1/saved_data_plot/results_benchmark_algos.feather').set_index('reference')
results = results[results['dataset']!= 'lncRNA']
results.loc[results['dataset']=='viral_fragments', 'dataset'] = 'Viral mRNA'
results.loc[results['dataset']=='lncRNA_nonFiltered', 'dataset'] = 'Long ncRNA'
results.loc[results['dataset']=='archiveII', 'dataset'] = 'ArchiveII'
results.loc[results['model']=='RNAstructure', 'model'] = 'RNAstructure Fold'

In [2]:
# Group the data by model and dataset and calculate the mean for each group
grouped = results.groupby(['model', 'dataset']).mean(numeric_only=True).reset_index()

# Pivot the table to create a multi-level column structure
pivot_df = pd.pivot_table(grouped, index='model', columns='dataset', values=['Precision', 'Recall', 'F1'])

# Swap the level of the columns to have dataset as the top level and the metrics as the second level
pivot_df = pivot_df.swaplevel(i=0, j=1, axis=1).sort_index(axis=1)

# Define the new order for the models and reorder the rows
new_order = ['RNAstructure Fold', 'EternaFold', 'MXFold2', 'UFold']
pivot_df = pivot_df.reindex(new_order)

pivot_df = pivot_df.reindex(columns=pivot_df.columns.reindex(['Precision', 'Recall', 'F1'], level=1)[0])[['PDB', 'ArchiveII', 'Viral mRNA', 'Long ncRNA']]

pivot_df = pivot_df.style\
            .format(precision=3)\
            .highlight_max(axis=0, props="font-weight:bold;font-color:black;")\
            .background_gradient(axis=1, vmin=-0.1, vmax=1, cmap="viridis", text_color_threshold=0)\
            .set_properties(**{'text-align': 'center'})\
            .set_table_styles(
                        [{"selector": "th", "props": [('text-align', 'center')]},
                        # {"selector": "tbody td", "props": [("border", "1px solid")]},
                        # {"selector": "th", "props": [("border", "1px solid")]}
                        ])
pivot_df


dataset,PDB,PDB,PDB,ArchiveII,ArchiveII,ArchiveII,Viral mRNA,Viral mRNA,Viral mRNA,Long ncRNA,Long ncRNA,Long ncRNA
Unnamed: 0_level_1,Precision,Recall,F1,Precision,Recall,F1,Precision,Recall,F1,Precision,Recall,F1
model,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
RNAstructure Fold,0.899,0.911,0.89,0.546,0.6,0.569,0.687,0.745,0.713,0.46,0.52,0.487
EternaFold,0.883,0.909,0.88,0.572,0.634,0.598,0.75,0.805,0.774,0.45,0.467,0.457
MXFold2,0.914,0.928,0.904,0.729,0.762,0.741,0.703,0.72,0.71,0.41,0.429,0.419
UFold,0.809,0.963,0.868,0.824,0.884,0.85,0.578,0.587,0.579,0.219,0.144,0.163


In [4]:
pivot_df.to_excel('tables/T1_algo_benchmark.xlsx')