In [1]:
#!/usr/bin/env python
"""
BERTScore-only FLORES eval (fixed compute() args).
"""
import pandas as pd
from pathlib import Path
import evaluate
import numpy as np

output_dir = Path("translation_results")
priority_root = output_dir / "PRIORITY_LANGUAGES"
target_root = output_dir / "TARGET_LANGUAGES"
ref_path =  "data/df.csv"

ref_df = pd.read_csv(ref_path)
print(f"Refs: {len(ref_df)} rows")

bertscore = evaluate.load("bertscore")

def compute_bertscore(hyps, refs, tgt_lang):
    """BERTScore F1 average (lang-specific model)."""
    results = bertscore.compute(predictions=hyps, references=refs, lang=tgt_lang)
    return np.mean(results['f1'])  # 0-1 scale

evaluation_results = []
print("="*60)
print("BERTSCORE EVAL")
print("="*60)



  from .autonotebook import tqdm as notebook_tqdm
  ref_df = pd.read_csv(ref_path)


Refs: 223328 rows
BERTSCORE EVAL


In [None]:
compute_bertscore(["I am a boy"], ["I am a boy"], "eng")

In [None]:
def align_and_clean(ref_df, hyp_df, hyp_col, src_lang, tgt_lang):
    tgt_ref_df = ref_df[ref_df['iso_639_3'] == tgt_lang][['id', 'text']].drop_duplicates('id')
    merged = hyp_df[['id', hyp_col]].merge(tgt_ref_df, on='id', how='inner')
    merged = merged.dropna(subset=['text', hyp_col])
    merged[hyp_col] = merged[hyp_col].astype(str).str.strip()
    merged['text'] = merged['text'].astype(str).str.strip()
    merged = merged[(merged[hyp_col] != '') & (merged['text'] != '')]
    
    if len(merged) == 0: return None, None
    
    hyps = merged[hyp_col].tolist()
    refs = merged['text'].tolist()
    print(f"  {src_lang}→{tgt_lang}: {len(hyps)} pairs")
    return hyps, refs

# TARGET_LANGUAGES
print("TARGET_LANGUAGES:")
for csv_file in target_root.glob("*.csv"):
    stem = csv_file.stem
    if "_to_" not in stem: continue
    src_lang, tgt_lang = stem.split("_to_")
    hyp_df = pd.read_csv(csv_file)
    hyps, refs = align_and_clean(ref_df, hyp_df, "target_text", src_lang, tgt_lang)
    if hyps is None:
        print(f"  Skip {csv_file.name}")
        continue
    bert_f1 = compute_bertscore(hyps, refs, tgt_lang)
    evaluation_results.append({
        "direction": f"{src_lang}_to_{tgt_lang}",
        "srclang": src_lang, "tgtlang": tgt_lang,
        "num_sentences": len(hyps),
        "bertscore_f1": bert_f1
    })
    print(f"  ✓ {src_lang}_to_{tgt_lang}: F1={bert_f1:.4f}")

# PRIORITY_LANGUAGES
print("\nPRIORITY_LANGUAGES:")
for csv_file in priority_root.glob("*.csv"):
    stem = csv_file.stem
    if "_to_" not in stem: continue
    src_lang, tgt_lang = stem.split("_to_")
    hyp_df = pd.read_csv(csv_file)
    hyps, refs = align_and_clean(ref_df, hyp_df, "target_text", src_lang, tgt_lang)
    if hyps is None:
        print(f"  Skip {csv_file.name}")
        continue
    bert_f1 = compute_bertscore(hyps, refs, tgt_lang)
    evaluation_results.append({
        "direction": f"{src_lang}_to_{tgt_lang}",
        "srclang": src_lang, "tgtlang": tgt_lang,
        "num_sentences": len(hyps),
        "bertscore_f1": bert_f1
    })
    print(f"  ✓ {src_lang}_to_{tgt_lang}: F1={bert_f1:.4f}")

pd.DataFrame(evaluation_results).to_csv(output_dir / "bertscore_summary.csv", index=False)
print("\nSaved bertscore_summary.csv") 

