In [None]:
!pip install torch transformers sacrebleu tqdm

In [None]:
data = [
  {
    "prompt": "‡¶¨‡ßç‡¶Ø‡¶¨‡¶π‡¶æ‡¶∞‡¶ï‡¶æ‡¶∞‡ßÄ: ‡¶§‡ßÅ‡¶Æ‡¶ø ‡¶ï‡¶ø ‡¶Ü‡¶Æ‡¶æ‡¶ï‡ßá ‡¶Ü‡¶¨‡¶π‡¶æ‡¶ì‡¶Ø‡¶º‡¶æ‡¶∞ ‡¶ñ‡¶¨‡¶∞ ‡¶¨‡¶≤‡¶§‡ßá ‡¶™‡¶æ‡¶∞‡ßã?\n‡¶è‡¶ú‡ßá‡¶®‡ßç‡¶ü:",
    "target": "‡¶Ö‡¶¨‡¶∂‡ßç‡¶Ø‡¶á! ‡¶¶‡¶Ø‡¶º‡¶æ ‡¶ï‡¶∞‡ßá ‡¶Ü‡¶™‡¶®‡¶æ‡¶∞ ‡¶Ö‡¶¨‡¶∏‡ßç‡¶•‡¶æ‡¶® ‡¶ú‡¶æ‡¶®‡¶æ‡¶®, ‡¶Ü‡¶Æ‡¶ø ‡¶Ü‡¶¨‡¶π‡¶æ‡¶ì‡¶Ø‡¶º‡¶æ‡¶∞ ‡¶ñ‡¶¨‡¶∞ ‡¶ú‡¶æ‡¶®‡¶æ‡¶¨‡•§"
  },
  {
    "prompt": "‡¶¨‡ßç‡¶Ø‡¶¨‡¶π‡¶æ‡¶∞‡¶ï‡¶æ‡¶∞‡ßÄ: ‡¶Ü‡¶ú ‡¶∂‡ßÅ‡¶ï‡ßç‡¶∞‡¶¨‡¶æ‡¶∞, ‡¶Ü‡¶Æ‡¶ø ‡¶õ‡ßÅ‡¶ü‡¶ø‡¶§‡ßá ‡¶Ü‡¶õ‡¶ø!\n‡¶è‡¶ú‡ßá‡¶®‡ßç‡¶ü:",
    "target": "‡¶¶‡¶æ‡¶∞‡ßÅ‡¶£! ‡¶õ‡ßÅ‡¶ü‡¶ø‡¶∞ ‡¶¶‡¶ø‡¶®‡¶ü‡¶ø ‡¶â‡¶™‡¶≠‡ßã‡¶ó ‡¶ï‡¶∞‡ßÅ‡¶®‡•§"
  }
]

In [None]:
import json
import sacrebleu
from tqdm import tqdm

# ==== Load your LLM ====
# Replace this with however you load your model
# Example for HuggingFace:
from transformers import AutoTokenizer, AutoModelForCausalLM

model_name = "google/mt5-base"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name).to("cuda")

def generate_text(prompt, max_new_tokens=64):
    inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
    output = model.generate(**inputs, max_new_tokens=max_new_tokens)
    return tokenizer.decode(output[0], skip_special_tokens=True).replace(prompt, "").strip()


# ==== Load Data ====
def load_data(json_file):
    with open(json_file, "r", encoding="utf-8") as f:
        return json.load(f)

# ==== Evaluate BLEU ====
def evaluate_bleu(data, stop_sequence=None):
    references = []
    predictions = []

    for item in tqdm(data, desc="Evaluating"):
        prompt = item["prompt"]
        target = item["target"]

        generated = generate_text(prompt, stop_sequence=stop_sequence)

        references.append([target])
        predictions.append(generated)

    bleu = sacrebleu.corpus_bleu(predictions, references)
    return bleu.score, list(zip(predictions, [ref[0] for ref in references]))

# ==== Main ====
if __name__ == "__main__":
    # Example data file with [{"prompt": "...", "target": "..."}]
    DATA_FILE = "data.json"

    data = load_data(DATA_FILE)
    bleu_score, results = evaluate_bleu(data)

    print(f"\nüîç Average BLEU Score: {bleu_score:.2f}\n")

    # Optional: print sample outputs
    for i, (gen, ref) in enumerate(results[:5]):
        print(f"Example {i+1}")
        print(f"üîπ Prompt     : {data[i]['prompt']}")
        print(f"‚úÖ Reference  : {ref}")
        print(f"ü§ñ Generated  : {gen}")
        print("-" * 60)
