Latex table that contains accuracies of all models

In [49]:
import json
import pandas as pd
import numpy as np
from config import TLCPaths
from sklearn.metrics import f1_score, classification_report


In [3]:
with open(TLCPaths.project_data_path / 'all_accuracies.json', 'r') as file:
    accuracies = json.load(file)

In [4]:
models_to_plot = [
    'SAPBERT_ENG_cls_token_on_cls_index',
    
    'SAPBERT_mention_cls_token_rep_cls_token_index RoBERTa Reranking',
    'SAPBERT_mention_cls_token_rep_cls_token_index mBERT Reranking',
    
    'SAPBERT_mention_cls_token_rep_cls_token_index',
    'SAPBERT_mention_mean_token_rep_mean_token_index',
    'SAPBERT_mention_no_cls_token_rep_mean_no_cls_token_index',
    
    'SAPBERT_sentence_cls_token_rep_cls_token_index',
    'SAPBERT_context_cls_token_rep_cls_token_index',
    
    'Solr + WUMLS'
]
new_names = [
    "SapBERT_ENG_cls",
    "SapBERT_cls + RoBERTa Reranking",
    "SapBERT_cls + mBERT Reranking",
    "SapBERT_cls",
    "SapBERT_all",
    "SapBERT_nospec",
    
    "SapBERT_sentence_cls",
    "SapBERT_context_cls",
    
    "Solr + WUMLS"
]

renamed_accs = {}
for name, new_name in zip(models_to_plot, new_names):
    renamed_accs[new_name] = [round(x,2) for x in accuracies[name]]
    

In [5]:
# Convert data to pandas DataFrame
df = pd.DataFrame(renamed_accs)

# Transpose the DataFrame to have models as rows
df = df.transpose()

# Rename columns
df.columns = [f"Top {i} Accuracy" for i in list(range(1,11)) + [16, 32, 64]]
print(df.columns)
# Keep only the required columns
df_selected = df[['Top 1 Accuracy', 'Top 2 Accuracy', 'Top 5 Accuracy', 'Top 10 Accuracy']]

# Generate the LaTeX code for the selected table
latex_table_selected = df_selected.to_latex()

print(latex_table_selected)

Index(['Top 1 Accuracy', 'Top 2 Accuracy', 'Top 3 Accuracy', 'Top 4 Accuracy',
       'Top 5 Accuracy', 'Top 6 Accuracy', 'Top 7 Accuracy', 'Top 8 Accuracy',
       'Top 9 Accuracy', 'Top 10 Accuracy', 'Top 16 Accuracy',
       'Top 32 Accuracy', 'Top 64 Accuracy'],
      dtype='object')
\begin{tabular}{lrrrr}
\toprule
{} &  Top 1 Accuracy &  Top 2 Accuracy &  Top 5 Accuracy &  Top 10 Accuracy \\
\midrule
SapBERT\_ENG\_cls                 &            0.36 &            0.47 &            0.53 &             0.55 \\
SapBERT\_cls + RoBERTa Reranking &            0.21 &            0.25 &            0.35 &             0.46 \\
SapBERT\_cls + mBERT Reranking   &            0.09 &            0.16 &            0.26 &             0.36 \\
SapBERT\_cls                     &            0.46 &            0.56 &            0.69 &             0.78 \\
SapBERT\_all                     &            0.51 &            0.61 &            0.67 &             0.71 \\
SapBERT\_nospec                  &           

  latex_table_selected = df_selected.to_latex()


In [58]:
with open(TLCPaths.project_data_path / 'sapbert_predictions.json', 'r') as file:
    predictions = json.load(file)
    
with open(TLCPaths.project_data_path / "TLC_UMLS.json", "r") as f:
    data = json.load(f)
    X = np.array([entry["mention"] for entry in data])
    Y = np.array([entry["cui"] for entry in data])