In [None]:
import sys
sys.path.append("..")

from src.evaluation import evaluate_outputs
import pandas as pd
import re

In [None]:
models = [
    'gemma-3-27b-it',
    'Qwen2.5-72B-Instruct',
    'Llama-3.3-70B-Instruct',
    'Qwen3-8B',
    'Qwen3-32B',
]

techniques = [
    'zero-shot',
    'translated-zero-shot',
    'fewshot_1',
    'fewshot_2',
    'fewshot_5',
    'fewshot_10',
    'filtered_fewshot_1',
    'filtered_fewshot_2',
    'filtered_fewshot_5',
    'filtered_fewshot_10',
    'translated-fewshot_1',
    'translated-fewshot_2',
    'translated-fewshot_5',
    'translated-fewshot_10',
    'translated-filtered_fewshot_1',
    'translated-filtered_fewshot_2',
    'translated-filtered_fewshot_5',
    'translated-filtered_fewshot_10', 
]


languages = [
    'ara',
    'deu',
    'eng',
    'fra',
    'hi',
    'mr',
    'msa',
    'pa',
    'pol',
    'por',
    'spa',
    'ta',
    'tha'
]

In [None]:
results_df = pd.DataFrame(columns=['model', 'technique'] + languages)

for model in models:
    for technique in techniques:
        results_df = pd.concat([results_df, pd.DataFrame(
            columns=['model', 'technique'] + languages,
            data=[[model, technique] + [None] * len(languages)]
        )])
        for language in languages:
            try:
                df = pd.read_csv(f'../results/{model}/{technique}/dev-{language}_generated.csv')
            except:
                results_df.loc[
                    (results_df['model'] == model) & (results_df['technique'] == technique),
                    language
                ] = None
                continue
            
            generated_outputs = df['generated_output'].tolist()
            
            metrics = evaluate_outputs(
                generated_outputs,
                df['normalized claim'].tolist()
            )
            meteor = metrics['meteor']
            
            results_df.loc[
                (results_df['model'] == model) & (results_df['technique'] == technique),
                language
            ] = meteor

In [None]:
results_df['average'] = results_df[languages].mean(axis=1)

In [None]:
results_df

In [None]:
print(results_df[['model', 'technique'] + languages + ['average']].to_latex(index=False, float_format="{:.3f}".format))