In [1]:
from itertools import chain
import pickle
import numpy as np
import os

In [2]:
def get_table(language_models, mode = 'symmetry'):

    table = {}

    for m in language_models:

        if os.path.isfile(f'../_results/language-models/{m}.pkl'):
            dir = f'../_results/language-models/{m}.pkl'
            with open(dir, 'rb') as file:
                models = pickle.load(file)

        for idx, key in enumerate(list(models.keys())):

            if mode == 'symmetry':  scores = 2 * models[key][-3].flatten() - 1
            elif mode == 'directionality': scores = -1 * models[key][-2].flatten()

            score_median = np.median(scores)
            score_q1 = score_median - np.percentile(scores, 25)
            score_q2 = np.percentile(scores, 75) - score_median

            table[key] = [score_median, score_q1, score_q2]

    return table

def print_table(table, mode):

    MAX_NAME_LENGTH = 3000

    medianList = [table[key][0] for key in list(table.keys())]
    q1List = [table[key][1] for key in list(table.keys())]
    q2List = [table[key][2] for key in list(table.keys())]
    
    split_point = len(medianList) // 2

    latex_table = """\\begin{table}
    \\label{table:symmetry-score-models}
    \\caption{Symmetry score for open source pretrained language models. All models are available on Huggingface \\citep{wolfHuggingFaceTransformersStateoftheart2020}.}
    \\vspace{5pt}
    \\centering
    \\begin{tabular}{lcc|lcc}
        \\toprule
        \\textbf{Model} & \\textbf{Median} & \\textbf{Interquartile range} & \\textbf{Model} & \\textbf{Median} & \\textbf{Interquartile range} \\\\ 
        \\midrule
    """

    for i in range(split_point):
        name = list(table.keys())[i]
        median = medianList[i]
        q1 = q1List[i]
        q2 = q2List[i]
        
        if len(name) > MAX_NAME_LENGTH:
            name = name[-MAX_NAME_LENGTH:]
        
        if i + split_point < len(table):
            corresponding_name = list(table.keys())[i + split_point]
            
            if len(corresponding_name) > MAX_NAME_LENGTH:
                corresponding_name = corresponding_name[-MAX_NAME_LENGTH:]
            
            corresponding_median = medianList[i + split_point]
            corresponding_q1 = q1List[i + split_point]
            corresponding_q2 = q2List[i + split_point]
        else:
            corresponding_name = ""
            corresponding_median = ""
            corresponding_q1 = ""
            corresponding_q2 = ""
        
        latex_table += f"{name} & {round(median, 2)} & $\\pm$ [{round(q1, 2)}, {round(q2, 2)}] & {corresponding_name} & {round(corresponding_median, 2)} & $\\pm$ [{round(corresponding_q1, 2)}, {round(corresponding_q2, 2)}] \\\\ \n"

    latex_table += """    \\bottomrule
    \\end{tabular}
    \\end{table}"""

    output_filename = f"../_results/table-{mode}-scores-language.txt"  

    with open(output_filename, 'w') as file:
        file.write(latex_table)
    print(f"LaTeX table saved to '{output_filename}'.")

In [5]:
language_models = ['BERT', 'ALBERT', 'ROBERTA', 'ModernBERT', 'GPT', 'GPT-neo', 'TinyGPT', 'LLAMA',
                    'MISTRAL', 'MobileLLM', 'phi-1', 'phi-2', 'phi-3', 'iGPT', 'GIT',
                    'HUBERT', 'SpeechT5Encoder', 'MusicGenEncoder', 'SpeechT5Decoder', 'MusicGenDecoder']
mode = 'symmetry'

table = get_table(language_models, mode)
print_table(table, mode)

LaTeX table saved to '../_results/table-symmetry-scores-language.txt'.


In [4]:
language_models = ['BERT', 'ALBERT', 'ROBERTA', 'ModernBERT', 'GPT', 'GPT-neo', 'TinyGPT', 'LLAMA',
                    'MISTRAL', 'MobileLLM', 'phi-1', 'phi-2', 'phi-3', 'iGPT', 'GIT',
                    'HUBERT', 'SpeechT5Encoder', 'MusicGenEncoder', 'SpeechT5Decoder', 'MusicGenDecoder']
mode = 'directionality'

table = get_table(language_models, mode)
print_table(table, mode)

LaTeX table saved to '../_results/table-directionality-scores-language.txt'.
