In [None]:
!pip install torch transformers sacrebleu tqdm

In [None]:
data = [
  {
    "prompt": "ব্যবহারকারী: তুমি কি আমাকে আবহাওয়ার খবর বলতে পারো?\nএজেন্ট:",
    "target": "অবশ্যই! দয়া করে আপনার অবস্থান জানান, আমি আবহাওয়ার খবর জানাব।"
  },
  {
    "prompt": "ব্যবহারকারী: আজ শুক্রবার, আমি ছুটিতে আছি!\nএজেন্ট:",
    "target": "দারুণ! ছুটির দিনটি উপভোগ করুন।"
  }
]

In [None]:
import json
import sacrebleu
from tqdm import tqdm

# ==== Load your LLM ====
# Replace this with however you load your model
# Example for HuggingFace:
from transformers import AutoTokenizer, AutoModelForCausalLM

model_name = "google/mt5-base"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name).to("cuda")

def generate_text(prompt, max_new_tokens=64):
    inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
    output = model.generate(**inputs, max_new_tokens=max_new_tokens)
    return tokenizer.decode(output[0], skip_special_tokens=True).replace(prompt, "").strip()


# ==== Load Data ====
def load_data(json_file):
    with open(json_file, "r", encoding="utf-8") as f:
        return json.load(f)

# ==== Evaluate BLEU ====
def evaluate_bleu(data, stop_sequence=None):
    references = []
    predictions = []

    for item in tqdm(data, desc="Evaluating"):
        prompt = item["prompt"]
        target = item["target"]

        generated = generate_text(prompt, stop_sequence=stop_sequence)

        references.append([target])
        predictions.append(generated)

    bleu = sacrebleu.corpus_bleu(predictions, references)
    return bleu.score, list(zip(predictions, [ref[0] for ref in references]))

# ==== Main ====
if __name__ == "__main__":
    # Example data file with [{"prompt": "...", "target": "..."}]
    DATA_FILE = "data.json"

    data = load_data(DATA_FILE)
    bleu_score, results = evaluate_bleu(data)

    print(f"\n🔍 Average BLEU Score: {bleu_score:.2f}\n")

    # Optional: print sample outputs
    for i, (gen, ref) in enumerate(results[:5]):
        print(f"Example {i+1}")
        print(f"🔹 Prompt     : {data[i]['prompt']}")
        print(f"✅ Reference  : {ref}")
        print(f"🤖 Generated  : {gen}")
        print("-" * 60)
