# BertaQA Open Instruct Models

In [9]:
import os
import json
import pandas as pd

BASQUE_TASKS = ['bertaqa_eu']
ENGLISH_TASKS = ['bertaqa_en']

def extract_accuracies(results_dir):
    data = []
    for root, _, files in os.walk(results_dir):
        for file in files:
            if file.endswith('.json'):
                file_path = os.path.join(root, file)
                with open(file_path, 'r') as f:
                    content = json.load(f)
                    for task, metrics in content.get('results', {}).items():
                        acc = metrics.get('acc,none')
                        if acc is not None:
                            acc = round(acc * 100, 2)  # Multiply by 100 and round to 2 decimal places
                            model_name = os.path.basename(root).split('__')[-1]
                            data.append({
                                'Model': model_name,
                                'Task': task,
                                'Accuracy': acc
                            })
    return data

def create_comparison_table(data, tasks_filter=None):
    df = pd.DataFrame(data)
    if tasks_filter is not None:
        df = df[df['Task'].isin(tasks_filter)]
    comparison_table = df.pivot_table(index=['Model'], columns='Task', values='Accuracy')
    comparison_table.reset_index(inplace=True)
    return comparison_table

def merge_all_tables(tables):
    merged_table = pd.concat(tables, ignore_index=True)
    return merged_table

def main():
    results_dir = '../results'
    data = extract_accuracies(results_dir)
    
    # Create separate tables for English and non-English tasks
    all_table = create_comparison_table(data, tasks_filter=ENGLISH_TASKS + BASQUE_TASKS)
    english_table = create_comparison_table(data, tasks_filter=ENGLISH_TASKS)
    basque_table = create_comparison_table(data, tasks_filter=BASQUE_TASKS)
    
    # merge and display tables
    merged_table = merge_all_tables([all_table])
    display(all_table)

if __name__ == "__main__":
    main()

Task,Model,bertaqa_en,bertaqa_eu
0,Llama-2-13b-hf,57.06,38.96
1,Llama-2-70b-hf,63.5,45.86
2,Llama-2-7b-hf,53.01,36.0
3,Meta-Llama-3-70B,72.22,69.85
4,Meta-Llama-3-70B-Instruct,74.56,70.21
5,Meta-Llama-3-8B,63.58,52.9
6,Meta-Llama-3-8B-Instruct,63.86,54.79
7,Mistral-7B-v0.1,60.91,44.22
8,Mixtral-8x7B-v0.1,70.16,52.4
9,Qwen1.5-14B,60.39,45.4
