<a href="https://colab.research.google.com/github/lapatradaa/M-MMT4NL/blob/main/LLMs_evaluation2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install sacrebleu bert-score
!pip install sacrebleu bert-score pythainlp
!pip -q install pandas openpyxl jellyfish bert-score tqdm openai --upgrade



In [2]:
import os, random, math
from pathlib import Path
import pandas as pd
import random
import os
from tqdm import tqdm
import jellyfish
from bert_score import score as bertscore


In [3]:
OPENAI_MODEL_TRANSLATE = "gpt-4o-mini"
OPENAI_MODEL_PERTURB  = "gpt-4o-mini"
TEMPERATURE_TRANSLATE  = 0.2
TEMPERATURE_PERTURB    = 0.7


In [4]:
# multilingual BERTScore model (works well for Thai)
BERT_MODEL = "xlm-roberta-large"
W_BERT = 0.8
W_JARO = 0.2

RANDOM_SEED = 42
random.seed(RANDOM_SEED)

In [5]:
from openai import OpenAI

_client = OpenAI(api_key= "<input-openai-apikey>")
def _chat_once(system_prompt: str, user_prompt: str, *, model: str, temperature: float) -> str:
    out = _client.chat.completions.create(
        model=model,
        temperature=temperature,
        messages=[
            {"role": "system", "content": system_prompt.strip()},
            {"role": "user",   "content": user_prompt.strip()},
        ],
    )
    return (out.choices[0].message.content or "").strip()

In [6]:
# (A) Thai -> English (step 2)
SYSTEM_THAI_TO_EN = (
    "You are a professional translator and language expert. "
    "Translate the following Thai sentence into English. "
    "Use English at a C1 proficiency level and output only the final English translation."
)
USER_THAI_TO_EN = (
    "Given this Thai text, translate it into English. Preserve the semantics as much as possible.\n\n{thai_text}"
)


In [7]:
# (B) English -> Thai (your exact prompts, used for back-translation)
SYSTEM_EN_TO_THAI = (
    "You are a professional translator and language expert. \n"
    "Please translate the following English sentence into Thai. \n"
    "Use English at a C1 proficiency level when framing any instructions or explanations—and output only the final Thai translation."
)
USER_EN_TO_THAI = (
    "Given this English text, translate it into Thai. Preserve the semantic of the original English text in the translate version as much as possible.\n\n{english_text}"
)

In [8]:
# (C) English perturbation (step 3)
SYSTEM_PERTURB_EN = "You are an expert paraphraser. You lightly perturb English text while preserving meaning."
USER_PERTURB_EN = (
    "Paraphrase the following English sentence with very small changes "
    "(synonyms, slight reordering, optional mild intensifiers). "
    "Do NOT change the meaning. Output only the sentence.\n\n{english_text}"
)

In [9]:
# ---------- LLM wrappers ----------
def thai_to_english(thai: str) -> str:
    return _chat_once(SYSTEM_THAI_TO_EN, USER_THAI_TO_EN.format(thai_text=thai),
                      model=OPENAI_MODEL_TRANSLATE, temperature=TEMPERATURE_TRANSLATE)

def english_to_thai(eng: str) -> str:
    return _chat_once(SYSTEM_EN_TO_THAI, USER_EN_TO_THAI.format(english_text=eng),
                      model=OPENAI_MODEL_TRANSLATE, temperature=TEMPERATURE_TRANSLATE)

def perturb_english(eng: str) -> str:
    return _chat_once(SYSTEM_PERTURB_EN, USER_PERTURB_EN.format(english_text=eng),
                      model=OPENAI_MODEL_PERTURB, temperature=TEMPERATURE_PERTURB)

In [10]:
def jaro(a: str, b: str) -> float:
    try:
        return float(jellyfish.jaro_similarity(a or "", b or ""))
    except Exception:
        return 0.0

def bert_f1(ref: str, cand: str) -> float:
    """
    Compute multilingual BERTScore F1 (Thai ok).
    We compare THAI vs THAI (original vs back-translation).
    """
    try:
        P, R, F1 = bertscore(
            cands=[cand],
            refs=[ref],
            model_type=BERT_MODEL,
            rescale_with_baseline=True,
            verbose=False,
        )
        return float(F1[0].item())
    except Exception:
        return 0.0

def fitness(ref_thai: str, cand_thai: str):
    b = bert_f1(ref_thai, cand_thai)
    j = jaro(ref_thai, cand_thai)
    f = W_BERT * b + W_JARO * j
    return round(f, 4), round(b, 4), round(j, 4)


In [11]:
def evaluate_sentence(thai_text: str, rounds: int = 5):
    """
    thai_text: original TH sentence
    rounds: number of perturbation rounds

    Returns: dict with base_en, per-round outputs/scores, and best result.
    """
    out = {"original": thai_text}

    # Step 2: initial Thai->English
    base_en = thai_to_english(thai_text)
    out["base_en"] = base_en

    best = {"score": -1.0, "round": None, "en": "", "th_back": "", "bert": 0.0, "jaro": 0.0}
    cur_en = base_en
    for r in range(1, rounds + 1):
        cand_en = perturb_english(cur_en)        # Step 3: perturb
        cand_th = english_to_thai(cand_en)       # back-translate for scoring on Thai
        f, b, j = fitness(thai_text, cand_th)    # Step 4

        out[f"round{r}_en"]       = cand_en
        out[f"round{r}_back_th"]  = cand_th
        out[f"round{r}_bert"]     = b
        out[f"round{r}_jaro"]     = j
        out[f"round{r}_fitness"]  = f

        if f > best["score"]:
            best = {"score": f, "round": r, "en": cand_en, "th_back": cand_th, "bert": b, "jaro": j}

        cur_en = cand_en  # chain next round from last output

    out["best_round"]   = best["round"]
    out["best_fitness"] = best["score"]
    out["best_bert"]    = best["bert"]
    out["best_jaro"]    = best["jaro"]
    out["best_en"]      = best["en"]
    out["best_th_back"] = best["th_back"]
    return out


In [12]:
def load_table(path: str) -> pd.DataFrame:
    ext = Path(path).suffix.lower()
    if ext in (".xlsx", ".xls"):
        return pd.read_excel(path)
    if ext == ".csv":
        return pd.read_csv(path)
    raise ValueError("Use .xlsx/.xls or .csv")

def save_table(df: pd.DataFrame, path: str):
    ext = Path(path).suffix.lower()
    if ext in (".xlsx", ".xls"):
        df.to_excel(path, index=False)
    elif ext == ".csv":
        df.to_csv(path, index=False)
    else:
        raise ValueError("Use .xlsx/.xls or .csv")

def run_file(infile: str, outfile: str, rounds: int = 5):
    df = load_table(infile)
    if "original" not in df.columns:
        raise ValueError("Input must have a column named 'original' (Thai).")

    rows = []
    for i in tqdm(range(len(df)), desc="Evaluating"):
        thai = str(df.at[i, "original"]).strip()
        if not thai:
            continue
        try:
            rows.append(evaluate_sentence(thai, rounds=rounds))
        except Exception as e:
            rows.append({"original": thai, "error": f"{type(e).__name__}: {e}"})

    out_df = pd.DataFrame(rows)

    # pretty column ordering
    cols = ["original", "base_en"]
    for r in range(1, rounds + 1):
        cols += [f"round{r}_en", f"round{r}_back_th", f"round{r}_bert", f"round{r}_jaro", f"round{r}_fitness"]
    cols += ["best_round", "best_fitness", "best_bert", "best_jaro", "best_en", "best_th_back", "error"]
    cols = [c for c in cols if c in out_df.columns]
    out_df = out_df.reindex(columns=cols)

    save_table(out_df, outfile)
    display(out_df.head())
    print(f"Saved → {outfile}")


In [18]:
df = load_table("/content/MMT4NL_result (2).xlsx")

# clean headers
df = df.rename(columns=lambda x: x.strip())
if "Sentiment" in df.columns:
    df = df.rename(columns={"Sentiment": "original"})

# overwrite the same file
df.to_excel("/content/MMT4NL_result (2).xlsx", index=False)

# now run with the cleaned file
infile  = "/content/MMT4NL_result (2).xlsx"
outfile = "/content/scored.xlsx"

run_file(infile, outfile, rounds=6)


Evaluating: 100%|██████████| 51/51 [09:48<00:00, 11.54s/it]


Unnamed: 0,original,base_en,round1_en,round1_back_th,round1_bert,round1_jaro,round1_fitness,round2_en,round2_back_th,round2_bert,...,round6_back_th,round6_bert,round6_jaro,round6_fitness,best_round,best_fitness,best_bert,best_jaro,best_en,best_th_back
0,ฉันมีความสุขมากที่ได้พบเพื่อนใหม่,I am very happy to have met new friends.,I am truly delighted to have made new friends.,ฉันรู้สึกดีใจมากที่ได้ทำความรู้จักกับเพื่อนใหม่,0.0,0.7595,0.1519,I am genuinely thrilled to have formed new fri...,ฉันรู้สึกตื่นเต้นอย่างแท้จริงที่ได้สร้างมิตรภา...,0.0,...,ฉันรู้สึกตื่นเต้นอย่างแท้จริงที่ได้สร้างมิตรภา...,0.0,0.5496,0.1099,1,0.1519,0.0,0.7595,I am truly delighted to have made new friends.,ฉันรู้สึกดีใจมากที่ได้ทำความรู้จักกับเพื่อนใหม่
1,ฉันตื่นเต้นกับการเดินทางไปต่างประเทศ,I am excited about the trip abroad.,I am thrilled about the journey overseas.,ฉันรู้สึกตื่นเต้นเกี่ยวกับการเดินทางไปต่างประเทศ,0.0,0.7973,0.1595,I am excited about the trip abroad.,ฉันรู้สึกตื่นเต้นเกี่ยวกับการเดินทางไปต่างประเทศ,0.0,...,ฉันตื่นเต้นเกี่ยวกับการเดินทางไปต่างประเทศ,0.0,0.8447,0.1689,4,0.1689,0.0,0.8447,I am excited about the trip abroad.,ฉันตื่นเต้นเกี่ยวกับการเดินทางไปต่างประเทศ
2,ฉันไม่พอใจกับบริการที่ได้รับ,I am not satisfied with the service I received.,I am not pleased with the service I got.,ฉันไม่พอใจกับบริการที่ได้รับ,0.0,1.0,0.2,I am not satisfied with the service I received.,ฉันไม่พอใจกับบริการที่ฉันได้รับ,0.0,...,ฉันไม่พอใจกับบริการที่ฉันได้รับ,0.0,0.9697,0.1939,1,0.2,0.0,1.0,I am not pleased with the service I got.,ฉันไม่พอใจกับบริการที่ได้รับ
3,ฉันรู้สึกเหนื่อยและหมดแรง,I feel tired and drained.,I feel fatigued and exhausted.,ฉันรู้สึกเหนื่อยล้าและอ่อนเพลีย,0.0,0.7584,0.1517,I feel tired and drained.,ฉันรู้สึกเหนื่อยและหมดแรง,0.0,...,ฉันรู้สึกเหนื่อยและอ่อนล้า,0.0,0.8021,0.1604,2,0.2,0.0,1.0,I feel tired and drained.,ฉันรู้สึกเหนื่อยและหมดแรง
4,เพลงนี้ทำให้ฉันรู้สึกสงบ,This song makes me feel at peace.,This song brings me a sense of peace.,เพลงนี้ทำให้ฉันรู้สึกสงบสุข,0.0,0.963,0.1926,This song provides me with a feeling of tranqu...,เพลงนี้มอบความรู้สึกสงบให้กับฉัน,0.0,...,เพลงนี้ทำให้ฉันรู้สึกสงบสุข,0.0,0.963,0.1926,1,0.1926,0.0,0.963,This song brings me a sense of peace.,เพลงนี้ทำให้ฉันรู้สึกสงบสุข


Saved → scored.xlsx
