In [1]:
# Importing required libraries
import pandas as pd

# Loading the CSV file
file_path = 'benchmark_reqa.csv'
data = pd.read_csv(file_path)

# Displaying the first few rows to understand the structure
data.head(10)

Unnamed: 0,source,augment_col,augment_ratio,top_1,top_5,top_10,mrr_score
0,iapp,,0.0,162,183,237,0.024308
1,iapp,th_aug,0.0,126,137,170,0.018462
2,iapp,th_aug,0.1,163,184,239,0.024405
3,iapp,th_aug,0.2,162,180,234,0.024134
4,iapp,th_aug,0.3,162,180,237,0.024205
5,iapp,th_aug,0.4,160,176,229,0.023752
6,iapp,th_aug,0.5,155,170,211,0.022819
7,iapp,th_aug,0.6,156,170,211,0.022917
8,iapp,th_aug,0.7,157,169,207,0.022936
9,iapp,th_aug,0.8,156,174,200,0.022721


In [7]:
data["augment_col"].unique()

array(['original', 'th_aug', 'th_fasttext_aug', 'th_llm_gec_aug',
       'th_llm_paraphrase_aug', 'th_qcpg_0.2_aug',
       'th_qcpg_0.2_llm_gec_aug', 'th_qcpg_0.5_aug',
       'th_qcpg_0.5_llm_gec_aug', 'th_qcpg_0.8_aug',
       'th_qcpg_0.8_llm_gec_aug', 'th_wordnet_aug'], dtype=object)

In [2]:
# For each unique source get the highest scoring top_1
data.groupby(['source'])['top_1'].max()

source
iapp      169
thaiqa     51
tydiqa     45
xquad      71
Name: top_1, dtype: int64

In [3]:
data[data["augment_col"].isna()].groupby(['source'])['top_1'].max()

source
iapp      162
thaiqa     46
tydiqa     38
xquad      67
Name: top_1, dtype: int64

In [5]:
data["augment_col"] = data["augment_col"].fillna("original")

In [6]:
pretty_names = {
    "th_qcpg_0.8_llm_gec_aug": "QCPG (0.8) + LLM GEC",
    "th_qcpg_0.5_llm_gec_aug": "QCPG (0.5) + LLM GEC",
    "th_qcpg_0.2_llm_gec_aug": "QCPG (0.2) + LLM GEC",
    "th_qcpg_0.8_aug": "QCPG (0.8)",
    "th_qcpg_0.5_aug": "QCPG (0.5)",
    "th_qcpg_0.2_aug": "QCPG (0.2)",
    "th_aug": "Backtranslation",
    "th_fasttext_aug": "FastText",
    "th_llm_gec_aug": "LLM GEC",
    "th_llm_paraphrase_aug": "LLM Paraphrase",
    "th_ltw2v_aug": "LTW2Vec",
    "th_thai2fit_aug": "Thai2Fit",
    "th_wordnet_aug": "WordNet",
}

In [14]:
# Function to extract the augmentation ratio from the name
def get_augment_ratio(name):
    if name == "original":
        return "N/A" # Not applicable for the original model
    return name

# Function to extract the pretty name (augmentation name without ratio) from the name
def get_pretty_name(name):
    if name == "original":
        return "Original"
    return pretty_names[name]


In [15]:
unique_sources = data["source"].unique()
source_scores = {}

for source in unique_sources:

    # Find the best performing model for each augmentation type based on the "test_exact_match" metric
    # best_models = data.loc[data.groupby('augment_col')['top_1'].idxmax()]
    # = data[data["source"] == source].groupby(['augment_col'])['top_1'].max()

    filtered_data = data[data["source"] == source]
    best_models = filtered_data.loc[filtered_data.groupby('augment_col')['top_1'].idxmax()]

    # Resetting the index
    best_models.reset_index(drop=True, inplace=True)

    best_models['pretty_name'] = best_models['augment_col'].apply(get_pretty_name)

    source_scores[source] = best_models.copy()

source_scores

{'iapp':    source              augment_col  augment_ratio  top_1  top_5  top_10  \
 0    iapp                 original            0.0    162    183     237   
 1    iapp                   th_aug            0.1    163    184     239   
 2    iapp          th_fasttext_aug            0.1    162    183     237   
 3    iapp           th_llm_gec_aug            0.2    165    187     239   
 4    iapp    th_llm_paraphrase_aug            0.2    169    193     243   
 5    iapp          th_qcpg_0.2_aug            0.2    161    178     233   
 6    iapp  th_qcpg_0.2_llm_gec_aug            0.1    156    178     235   
 7    iapp          th_qcpg_0.5_aug            0.4    162    183     223   
 8    iapp  th_qcpg_0.5_llm_gec_aug            0.1    156    177     234   
 9    iapp          th_qcpg_0.8_aug            0.2    165    181     236   
 10   iapp  th_qcpg_0.8_llm_gec_aug            0.1    150    181     230   
 11   iapp           th_wordnet_aug            0.7    163    184     234   
 
  