In [8]:
import pandas as pd
import numpy as np

def plot_updos_detail(file_path, model_name):
    data = pd.read_csv(file_path)

    # Define the shortened method names dictionary including freeze methods
    shortened_method_names = {
        'baseline': 'FT Only',
        'before_fastalign': 'Before FA',
        'before_awesome': 'Before AA',
        'before_dico': 'Before BD',
        'during_fastalign': 'During FA',
        'during_awesome': 'During AA',
        'during_dico': 'During BD',
        'freeze_realign_unfreeze_fastalign': 'Front Freeze Before FA',
        'freeze_realign_unfreeze_awesome': 'Front Freeze Before AA',
        'freeze_realign_unfreeze_dico': 'Front Freeze Before BD',
        'freeze_realign_unfreeze_last_half_fastalign': 'Back Freeze Before FA',
        'freeze_realign_unfreeze_last_half_awesome': 'Back Freeze Before AA',
        'freeze_realign_unfreeze_last_half_dico': 'Back Freeze Before BD',
        'during_partial_freeze_front_fastalign': 'Front Freeze During FA',
        'during_partial_freeze_front_awesome': 'Front Freeze During AA',
        'during_partial_freeze_front_dico': 'Front Freeze During BD',
        'during_partial_freeze_back_fastalign': 'Back Freeze During FA',
        'during_partial_freeze_back_awesome': 'Back Freeze During AA',
        'during_partial_freeze_back_dico': 'Back Freeze During BD',
        'freeze_realign_unfreeze_last_6_fastalign': 'Back Freeze Before FA',
        'freeze_realign_unfreeze_last_6_awesome': 'Back Freeze Before AA',
        'freeze_realign_unfreeze_last_6_dico': 'Back Freeze Before BD',
    }

    # Define the language codes to full names mapping
    language_names = {
        'af': 'Afrikaans',
        'ar': 'Arabic',
        'bg': 'Bulgarian',
        'ca': 'Catalan',
        'zh': 'Chinese',
        'cs': 'Czech',
        'da': 'Danish',
        'fi': 'Finnish',
        'fr': 'French',
        'de': 'German',
        'el': 'Greek',
        'he': 'Hebrew',
        'hi': 'Hindi',
        'hu': 'Hungarian',
        'it': 'Italian',
        'ja': 'Japanese',
        'ko': 'Korean',
        'lv': 'Latvian',
        'lt': 'Lithuanian',
        'no': 'Norwegian',
        'fa': 'Persian',
        'pl': 'Polish',
        'pt': 'Portuguese',
        'ro': 'Romanian',
        'ru': 'Russian',
        'sk': 'Slovak',
        'sl': 'Slovenian',
        'es': 'Spanish',
        'sv': 'Swedish',
        'ta': 'Tamil',
        'th': 'Thai',
        'tr': 'Turkish',
        'uk': 'Ukrainian',
        'vi': 'Vietnamese',
        'avg': "Average"
    }

    # Extract accuracy columns
    accuracy_cols = [col for col in data.columns if 'final_eval_' in col and 'accuracy' in col]

    # Melting the dataframe to get a long format
    long_data = data.melt(id_vars=['method'], value_vars=accuracy_cols, var_name='language', value_name='accuracy')

    # Cleaning up the language column
    long_data['language'] = long_data['language'].str.replace('final_eval_', '').str.replace('_accuracy', '')
    long_data['language'] = long_data['language'].map(language_names)

    # Updating method names with shortened versions
    long_data['method'] = long_data['method'].map(shortened_method_names).fillna('FT Only')

    # Extracting method components
    long_data[['primary_group', 'timing', 'aligner']] = long_data['method'].str.extract(r'(FT Only|Front Freeze|Back Freeze)?\s*(Before|During)?\s*(AA|BD|FA)?')

    long_data["primary_group"] = long_data["primary_group"].fillna("Full realignment")

    # Ensure 'FT Only' has empty primary_group, timing, and aligner values
    long_data.loc[long_data['method'] == 'FT Only', ['primary_group', 'timing', 'aligner']] = ['FT Only', '', '']

    # Pivot the data to create the table
    pivot_data = long_data.pivot_table(index='language', columns=['primary_group', 'timing', 'aligner'], values='accuracy', aggfunc=lambda x: f'{np.mean(x)*100:.1f} {np.std(x)*100:.1f}')

    # Reorder the columns
    column_order = [
        ('FT Only', '', ''),
        ("Full realignment", 'Before', 'FA'),
        ("Full realignment", 'Before', 'AA'),
        ("Full realignment", 'Before', 'BD'),
        ('Front Freeze', 'Before', 'FA'),
        ('Front Freeze', 'Before', 'AA'),
        ('Front Freeze', 'Before', 'BD'),
        ('Back Freeze', 'Before', 'FA'),
        ('Back Freeze', 'Before', 'AA'),
        ('Back Freeze', 'Before', 'BD'),
    ]
    pivot_data = pivot_data.reindex(columns=column_order)

    # highlight the maximum in each row
    def format_and_highlight(formatted_data):
        for idx in formatted_data.index:
            max_value = formatted_data.loc[idx].max()
            if pd.notna(max_value):
                formatted_data.loc[idx] = formatted_data.loc[idx].apply(lambda x: f'\\textbf{{{x.split()[0]}}}$_{{\\pm {x.split()[1]}}}$' if x == max_value else f'{x.split()[0]}$_{{\\pm {x.split()[1]}}}$')
        return formatted_data

    highlighted_data = format_and_highlight(pivot_data.copy())

    # Create the LaTeX table using tabularx and centering "Aligner"
    table_header = r"""
        \begin{table*}[ht]
        \centering
        \adjustbox{max width=\linewidth}{
        \begin{tabular}{lcccccccccc}
        \hline
        & \textbf{FT Only} & \multicolumn{3}{c}{\textbf{vanilla realignment}} & \multicolumn{3}{c}{\textbf{\textsc{AlignFreeze} with front-freezing}} & \multicolumn{3}{c}{\textbf{\textsc{AlignFreeze} with back-freezing}}\\ 
        \cline{2-11}
        & -  & FA & AA & BD & FA & AA & BD & FA & AA & BD \\
        \hline
    """

    table_footer = r"""
        \hline
        \end{tabular}
        }
        \caption{PoS tagging average accuracy results across 5 seeds using""" + model_name + r"""by freezing strategy, language, and aligner. Aligner names: FA - FastAlign, AA - AWESOME-align, BD - Bilingual Dictionary. The highest average accuracy value for each language is highlighted in bold.}
        \label{table:results_mBERT_PoS-tagging_before}
        \end{table*}
    """

    # Combine the table data
    table_body = ""
    for index, row in highlighted_data.iterrows():
        row_data = " & ".join(row)
        table_body += f"{index} & {row_data} \\\\\n"

    latex_table = table_header + table_body + table_footer

    print(latex_table)

    alignment_methods = ["Full realignment", "Front Freeze", "Back Freeze"]
    aligners = ["FA", "AA", "BD"]

    baseline = list(pivot_data["FT Only"])
    baseline_values = [tuple(map(float, x.split())) for x in baseline]
    lower_bounds = [x[0] - x[1] for x in baseline_values]
    upper_bounds = [x[0] + x[1] for x in baseline_values]

    results = {}
    language_detail = {}
    for method in alignment_methods:
        for aligner in aligners:
            series = list(pivot_data[method]["Before"][aligner])
            languages = list(pivot_data[method]["Before"][aligner].index)
            values = [float(x.split()[0]) for x in series]

            lower, middle, upper = 0, 0, 0
            lowers, middles, uppers = [], [], []
            for lang, lower_bound, upper_bound, value in zip(languages, lower_bounds, upper_bounds, values):
                if lang == "Average":
                    continue
                if value < lower_bound:
                    lower += 1
                    lowers.append(lang)
                elif value > upper_bound:
                    upper += 1
                    uppers.append(lang)
                else:
                    middle += 1
                    middles.append(lang)

            results[(method, aligner)] = (
                lower,
                middle,
                upper
            )
            language_detail[(method, aligner)] = uppers
    for key in results:
        print(key, results[key])
    for key in language_detail:
        print(key, language_detail[key])


In [9]:
plot_updos_detail("./raw_results/bert-base-multilingual-cased__opus100.csv", "mBERT")


        \begin{table*}[ht]
        \centering
        \adjustbox{max width=\linewidth}{
        \begin{tabular}{lcccccccccc}
        \hline
        & \textbf{FT Only} & \multicolumn{3}{c}{\textbf{vanilla realignment}} & \multicolumn{3}{c}{\textbf{\textsc{AlignFreeze} with front-freezing}} & \multicolumn{3}{c}{\textbf{\textsc{AlignFreeze} with back-freezing}}\\ 
        \cline{2-11}
        & -  & FA & AA & BD & FA & AA & BD & FA & AA & BD \\
        \hline
    Afrikaans & 87.0$_{\pm 0.4}$ & \textbf{88.4}$_{\pm 0.3}$ & 88.2$_{\pm 0.2}$ & 88.2$_{\pm 0.3}$ & 87.3$_{\pm 0.4}$ & 87.7$_{\pm 0.3}$ & 87.7$_{\pm 0.3}$ & 88.0$_{\pm 0.6}$ & 88.0$_{\pm 0.2}$ & 87.5$_{\pm 0.5}$ \\
Arabic & 51.0$_{\pm 0.5}$ & 63.7$_{\pm 1.6}$ & 63.9$_{\pm 1.0}$ & \textbf{65.1}$_{\pm 1.4}$ & 63.6$_{\pm 0.9}$ & 63.1$_{\pm 1.4}$ & 63.7$_{\pm 1.2}$ & 63.1$_{\pm 0.9}$ & 63.4$_{\pm 1.3}$ & 64.1$_{\pm 1.2}$ \\
Average & 77.0$_{\pm 0.5}$ & 79.1$_{\pm 0.3}$ & 79.2$_{\pm 0.2}$ & \textbf{79.6}$_{\pm 0.4}$ & 78.9$_{\pm 0.4}$ &

In [10]:
plot_updos_detail("./raw_results/distilbert-base-multilingual-cased__opus100.csv", "distilMBERT")


        \begin{table*}[ht]
        \centering
        \adjustbox{max width=\linewidth}{
        \begin{tabular}{lcccccccccc}
        \hline
        & \textbf{FT Only} & \multicolumn{3}{c}{\textbf{vanilla realignment}} & \multicolumn{3}{c}{\textbf{\textsc{AlignFreeze} with front-freezing}} & \multicolumn{3}{c}{\textbf{\textsc{AlignFreeze} with back-freezing}}\\ 
        \cline{2-11}
        & -  & FA & AA & BD & FA & AA & BD & FA & AA & BD \\
        \hline
    Afrikaans & 85.5$_{\pm 0.2}$ & \textbf{86.4}$_{\pm 0.3}$ & 86.4$_{\pm 0.2}$ & 85.6$_{\pm 0.3}$ & 86.2$_{\pm 0.1}$ & 86.3$_{\pm 0.2}$ & 86.1$_{\pm 0.3}$ & 86.0$_{\pm 0.2}$ & 86.0$_{\pm 0.2}$ & 85.4$_{\pm 0.1}$ \\
Arabic & 51.7$_{\pm 1.5}$ & 63.9$_{\pm 0.4}$ & 63.6$_{\pm 0.3}$ & \textbf{66.6}$_{\pm 0.4}$ & 63.3$_{\pm 0.4}$ & 63.0$_{\pm 0.4}$ & 65.0$_{\pm 0.5}$ & 63.5$_{\pm 0.5}$ & 62.8$_{\pm 0.7}$ & 65.3$_{\pm 0.3}$ \\
Average & 73.8$_{\pm 0.6}$ & 77.2$_{\pm 0.2}$ & 77.3$_{\pm 0.2}$ & \textbf{77.7}$_{\pm 0.2}$ & 77.0$_{\pm 0.2}$ &

In [11]:
plot_updos_detail("./raw_results/xlm-roberta-base__opus100.csv", "XLM-R")


        \begin{table*}[ht]
        \centering
        \adjustbox{max width=\linewidth}{
        \begin{tabular}{lcccccccccc}
        \hline
        & \textbf{FT Only} & \multicolumn{3}{c}{\textbf{vanilla realignment}} & \multicolumn{3}{c}{\textbf{\textsc{AlignFreeze} with front-freezing}} & \multicolumn{3}{c}{\textbf{\textsc{AlignFreeze} with back-freezing}}\\ 
        \cline{2-11}
        & -  & FA & AA & BD & FA & AA & BD & FA & AA & BD \\
        \hline
    Afrikaans & 88.4$_{\pm 0.3}$ & 88.6$_{\pm 0.1}$ & 88.7$_{\pm 0.1}$ & \textbf{88.8}$_{\pm 0.1}$ & 88.6$_{\pm 0.2}$ & 88.6$_{\pm 0.2}$ & \textbf{88.8}$_{\pm 0.1}$ & 88.6$_{\pm 0.2}$ & 88.7$_{\pm 0.1}$ & 88.4$_{\pm 0.1}$ \\
Arabic & 63.2$_{\pm 0.7}$ & 65.5$_{\pm 0.8}$ & 65.3$_{\pm 1.0}$ & \textbf{67.6}$_{\pm 1.0}$ & 65.5$_{\pm 0.5}$ & 65.3$_{\pm 0.9}$ & 67.0$_{\pm 0.7}$ & 63.9$_{\pm 0.7}$ & 64.3$_{\pm 0.8}$ & 66.3$_{\pm 0.5}$ \\
Average & 80.9$_{\pm 0.1}$ & 80.8$_{\pm 0.2}$ & 81.0$_{\pm 0.2}$ & 81.3$_{\pm 0.1}$ & 81.2$_{\pm 0.1}$ &

In [12]:
import pandas as pd
import numpy as np

# Load the CSV file
file_path = './raw_results/bert-base-multilingual-cased__xnli__opus100.csv'
def plot_xnli_tables(file_path, model_name, aligners=["BD"]):
    data = pd.read_csv(file_path)

    # Define the shortened method names dictionary including freeze methods
    shortened_method_names = {
        'baseline': 'FT Only',
        'before_fastalign': 'Before FA',
        'before_awesome': 'Before AA',
        'before_dico': 'Before BD',
        'during_fastalign': 'During FA',
        'during_awesome': 'During AA',
        'during_dico': 'During BD',
        'freeze_realign_unfreeze_fastalign': 'Front Freeze Before FA',
        'freeze_realign_unfreeze_awesome': 'Front Freeze Before AA',
        'freeze_realign_unfreeze_dico': 'Front Freeze Before BD',
        'freeze_realign_unfreeze_last_half_fastalign': 'Back Freeze Before FA',
        'freeze_realign_unfreeze_last_half_awesome': 'Back Freeze Before AA',
        'freeze_realign_unfreeze_last_half_dico': 'Back Freeze Before BD',
        'during_partial_freeze_front_fastalign': 'Front Freeze During FA',
        'during_partial_freeze_front_awesome': 'Front Freeze During AA',
        'during_partial_freeze_front_dico': 'Front Freeze During BD',
        'during_partial_freeze_back_fastalign': 'Back Freeze During FA',
        'during_partial_freeze_back_awesome': 'Back Freeze During AA',
        'during_partial_freeze_back_dico': 'Back Freeze During BD'
    }

    # Define the language codes to full names mapping
    language_names = {
        'ar': 'Arabic',
        'bg': 'Bulgarian',
        'zh': 'Chinese',
        'fr': 'French',
        'de': 'German',
        'el': 'Greek',
        'hi': 'Hindi',
        'es': 'Spanish',
        'th': 'Thai',
        'tr': 'Turkish',
        'vi': 'Vietnamese',
        'avg': "Average"
    }

    # Extract accuracy columns
    accuracy_cols = [col for col in data.columns if 'final_eval_' in col and 'accuracy' in col]

    # Melting the dataframe to get a long format
    long_data = data.melt(id_vars=['method'], value_vars=accuracy_cols, var_name='language', value_name='accuracy')

    # Cleaning up the language column
    long_data['language'] = long_data['language'].str.replace('final_eval_', '').str.replace('_accuracy', '')
    long_data['language'] = long_data['language'].map(language_names)

    # Updating method names with shortened versions
    long_data['method'] = long_data['method'].map(shortened_method_names).fillna('FT Only')

    # Extracting method components
    long_data[['primary_group', 'timing', 'aligner']] = long_data['method'].str.extract(r'(FT Only|Front Freeze|Back Freeze)?\s*(Before|During)?\s*(AA|BD|FA)?')

    long_data["primary_group"] = long_data["primary_group"].fillna("Full realignment")

    # Ensure 'FT Only' has empty primary_group, timing, and aligner values
    long_data.loc[long_data['method'] == 'FT Only', ['primary_group', 'timing', 'aligner']] = ['FT Only', '', '']

    # Pivot the data to create the table
    pivot_data = long_data.pivot_table(index='language', columns=['primary_group', 'timing', 'aligner'], values='accuracy', aggfunc=lambda x: f'{np.mean(x)*100:.1f} {np.std(x)*100:.1f}')

    # Reorder the columns
    column_order = [
        ('FT Only', '', ''),
        *[
            ("Full realignment", "Before", aligner) for aligner in aligners
        ],
        *[
            ("Front Freeze", "Before", aligner) for aligner in aligners
        ],
        *[
            ("Back Freeze", "Before", aligner) for aligner in aligners
        ],
    ]
    pivot_data = pivot_data.reindex(columns=column_order)

    # highlight the maximum in each row
    def format_and_highlight(formatted_data):
        for idx in formatted_data.index:
            max_value = formatted_data.loc[idx].max()
            if pd.notna(max_value):
                formatted_data.loc[idx] = formatted_data.loc[idx].apply(lambda x: f'\\textbf{{{x.split()[0]}}}$_{{\\pm {x.split()[1]}}}$' if x == max_value else f'{x.split()[0]}$_{{\\pm {x.split()[1]}}}$')
        return formatted_data

    highlighted_data = format_and_highlight(pivot_data.copy())

    # Create the LaTeX table using tabularx and centering "Aligner"
    table_header = r"""
        \begin{table*}[ht]
        \centering
        \adjustbox{max width=\linewidth}{
        \begin{tabular}{lcccc}
        \hline
        & \textbf{FT Only} & \multicolumn{1}{c}{\textbf{vanilla realignment}} & \multicolumn{1}{c}{\textbf{\textsc{AlignFreeze} with front-freezing}} & \multicolumn{1}{c}{\textbf{\textsc{AlignFreeze} with back-freezing}}\\ 
        \cline{2-11}
        & -  & BD & BD & BD \\
        \hline
    """

    table_footer = r"""
        \hline
        \end{tabular}
        }
        \caption{XNLI average accuracy results across 5 seeds using """ + model_name + r""" by freezing strategy, language, and aligner. Aligner names: BD - Bilingual Dictionary. The highest average accuracy value for each language is highlighted in bold.}
        \label{table:results_mBERT_xnli_before}
        \end{table*}
    """

    # Combine the table data
    table_body = ""
    for index, row in highlighted_data.iterrows():
        row_data = " & ".join(row)
        table_body += f"{index} & {row_data} \\\\\n"

    latex_table = table_header + table_body + table_footer

    print(latex_table)

    alignment_methods = ["Full realignment", "Front Freeze", "Back Freeze"]

    baseline = list(pivot_data["FT Only"])
    baseline_values = [tuple(map(float, x.split())) for x in baseline]
    lower_bounds = [x[0] - x[1] for x in baseline_values]
    upper_bounds = [x[0] + x[1] for x in baseline_values]

    results = {}
    language_detail = {}
    for method in alignment_methods:
        for aligner in aligners:
            series = list(pivot_data[method]["Before"][aligner])
            languages = list(pivot_data[method]["Before"][aligner].index)
            values = [float(x.split()[0]) for x in series]

            lower, middle, upper = 0, 0, 0
            lowers, middles, uppers = [], [], []
            for lang, lower_bound, upper_bound, value in zip(languages, lower_bounds, upper_bounds, values):
                if lang == "Average":
                    continue
                if value < lower_bound:
                    lower += 1
                    lowers.append(lang)
                elif value > upper_bound:
                    upper += 1
                    uppers.append(lang)
                else:
                    middle += 1
                    middles.append(lang)

            results[(method, aligner)] = (
                lower,
                middle,
                upper
            )
            language_detail[(method, aligner)] = uppers
    for key in results:
        print(key, results[key])
    for key in language_detail:
        print(key, language_detail[key])

In [13]:
plot_xnli_tables("./raw_results/xlm-roberta-base__xnli__opus100__aggregated_additional.csv", "XLM-R")


        \begin{table*}[ht]
        \centering
        \adjustbox{max width=\linewidth}{
        \begin{tabular}{lcccc}
        \hline
        & \textbf{FT Only} & \multicolumn{1}{c}{\textbf{vanilla realignment}} & \multicolumn{1}{c}{\textbf{\textsc{AlignFreeze} with front-freezing}} & \multicolumn{1}{c}{\textbf{\textsc{AlignFreeze} with back-freezing}}\\ 
        \cline{2-11}
        & -  & BD & BD & BD \\
        \hline
    Arabic & \textbf{70.8}$_{\pm 0.3}$ & 70.0$_{\pm 0.4}$ & 70.2$_{\pm 0.3}$ & 70.0$_{\pm 0.4}$ \\
Average & \textbf{73.9}$_{\pm 0.2}$ & 73.2$_{\pm 0.2}$ & 73.6$_{\pm 0.2}$ & 72.9$_{\pm 0.3}$ \\
Bulgarian & \textbf{77.0}$_{\pm 0.2}$ & 75.5$_{\pm 0.3}$ & 76.5$_{\pm 0.3}$ & 75.8$_{\pm 0.4}$ \\
Chinese & \textbf{72.5}$_{\pm 0.3}$ & 72.4$_{\pm 0.3}$ & 72.3$_{\pm 0.2}$ & 72.1$_{\pm 0.4}$ \\
French & \textbf{77.1}$_{\pm 0.1}$ & 76.4$_{\pm 0.3}$ & 76.7$_{\pm 0.1}$ & 76.0$_{\pm 0.3}$ \\
German & \textbf{75.7}$_{\pm 0.4}$ & 75.0$_{\pm 0.3}$ & 75.2$_{\pm 0.4}$ & 74.6$_{\pm 0.4}

In [16]:
plot_xnli_tables("./raw_results/bert-base-multilingual-cased__xnli__opus100__aggregated_additional.csv", "mBERT")


        \begin{table*}[ht]
        \centering
        \adjustbox{max width=\linewidth}{
        \begin{tabular}{lcccc}
        \hline
        & \textbf{FT Only} & \multicolumn{1}{c}{\textbf{vanilla realignment}} & \multicolumn{1}{c}{\textbf{\textsc{AlignFreeze} with front-freezing}} & \multicolumn{1}{c}{\textbf{\textsc{AlignFreeze} with back-freezing}}\\ 
        \cline{2-11}
        & -  & BD & BD & BD \\
        \hline
    Arabic & 64.6$_{\pm 0.5}$ & 65.0$_{\pm 0.6}$ & \textbf{65.6}$_{\pm 0.2}$ & 65.0$_{\pm 0.8}$ \\
Average & 66.3$_{\pm 0.6}$ & 67.4$_{\pm 0.4}$ & \textbf{67.7}$_{\pm 0.2}$ & 67.5$_{\pm 0.3}$ \\
Bulgarian & 68.0$_{\pm 0.8}$ & 69.1$_{\pm 0.6}$ & \textbf{69.3}$_{\pm 0.2}$ & 69.1$_{\pm 0.7}$ \\
Chinese & 68.9$_{\pm 0.6}$ & 69.5$_{\pm 0.7}$ & 69.2$_{\pm 0.4}$ & \textbf{69.9}$_{\pm 0.6}$ \\
French & 72.8$_{\pm 0.6}$ & 73.6$_{\pm 0.3}$ & \textbf{74.2}$_{\pm 0.3}$ & 73.7$_{\pm 0.5}$ \\
German & 70.1$_{\pm 0.5}$ & 70.3$_{\pm 0.6}$ & \textbf{71.0}$_{\pm 0.3}$ & 70.9$_{\pm 0.6}

In [15]:
plot_xnli_tables("./raw_results/distilbert-base-multilingual-cased__xnli__opus100__aggregated_additional.csv", "distilMBERT")


        \begin{table*}[ht]
        \centering
        \adjustbox{max width=\linewidth}{
        \begin{tabular}{lcccc}
        \hline
        & \textbf{FT Only} & \multicolumn{1}{c}{\textbf{vanilla realignment}} & \multicolumn{1}{c}{\textbf{\textsc{AlignFreeze} with front-freezing}} & \multicolumn{1}{c}{\textbf{\textsc{AlignFreeze} with back-freezing}}\\ 
        \cline{2-11}
        & -  & BD & BD & BD \\
        \hline
    Arabic & 59.2$_{\pm 0.3}$ & 59.3$_{\pm 0.6}$ & \textbf{59.8}$_{\pm 0.4}$ & 59.2$_{\pm 0.5}$ \\
Average & 60.1$_{\pm 0.2}$ & 61.6$_{\pm 0.2}$ & 61.6$_{\pm 0.1}$ & \textbf{61.9}$_{\pm 0.2}$ \\
Bulgarian & 63.4$_{\pm 0.3}$ & 63.6$_{\pm 0.4}$ & \textbf{64.0}$_{\pm 0.2}$ & 63.8$_{\pm 0.5}$ \\
Chinese & 63.9$_{\pm 0.8}$ & 63.4$_{\pm 0.1}$ & \textbf{64.1}$_{\pm 0.5}$ & 63.4$_{\pm 0.5}$ \\
French & \textbf{70.1}$_{\pm 0.6}$ & 68.7$_{\pm 0.6}$ & 69.4$_{\pm 0.3}$ & 69.1$_{\pm 0.2}$ \\
German & 65.7$_{\pm 0.2}$ & 64.8$_{\pm 0.3}$ & \textbf{66.1}$_{\pm 0.5}$ & 65.5$_{\pm 0.6}