# BLEU Score for Unigrams

In [1]:
original = "Der schnelle braune Fuchs sprang ueber den faulen Hund"

reference =   "The quick brown fox jumped over the lazy dog"
candidate_1 = "The fast  brown fox leaped over the      dog"
candidate_2 = "The swift brown fox jumped over the lazy dog"
candidate_3 = "The swift tawny fox leaped over the indolent canine."

### NLTK

In [2]:
#pip install nltk

In [3]:
from nltk.translate.bleu_score import sentence_bleu

bleu_nltk_1 = sentence_bleu([reference.split()], candidate_1.split(), weights=[1.])
bleu_nltk_2 = sentence_bleu([reference.split()], candidate_2.split(), weights=[1.])
bleu_nltk_3 = sentence_bleu([reference.split()], candidate_3.split(), weights=[1.])

print(f"BLEU score for example 1: {bleu_nltk_1:.2f}")
print(f"BLEU score for example 2: {bleu_nltk_2:.2f}")
print(f"BLEU score for example 3: {bleu_nltk_3:.2f}")

BLEU score for example 1: 0.66
BLEU score for example 2: 0.89
BLEU score for example 3: 0.44


### TorchMetrics

In [6]:
from torchmetrics import BLEUScore

bleu = BLEUScore(n_gram=1)

# Calculate BLEU scores
bleu_tm_1 = bleu(target=[[reference]], preds=[candidate_1])
bleu_tm_2 = bleu(target=[[reference]], preds=[candidate_2])
bleu_tm_3 = bleu(target=[[reference]], preds=[candidate_3])

print(f"BLEU score for example 1: {bleu_tm_1:.2f}")
print(f"BLEU score for example 2: {bleu_tm_2:.2f}")
print(f"BLEU score for example 3: {bleu_tm_3:.2f}")

BLEU score for example 1: 0.66
BLEU score for example 2: 0.89
BLEU score for example 3: 0.44


### From Scratch

In [7]:
import math
from collections import Counter

def ngrams(sentence, n):
    return [tuple(sentence[i:i+n]) for i in range(len(sentence)-n+1)]

def modified_precision(reference, candidate, n):
    ref_ngrams = Counter(ngrams(reference, n))
    cand_ngrams = Counter(ngrams(candidate, n))

    count_clip = sum(min(cand_ngrams[ng], ref_ngrams[ng]) for ng in cand_ngrams)
    count_total = sum(cand_ngrams.values())

    return count_clip / count_total if count_total > 0 else 0

def brevity_penalty(reference, candidate):
    ref_len = len(reference)
    cand_len = len(candidate)

    if cand_len > ref_len:
        return 1
    elif cand_len == 0:
        return 0
    else:
        return math.exp(1 - ref_len / cand_len)

def bleu_score_unigram(reference, candidate):
    bp = brevity_penalty(reference, candidate)
    precision = modified_precision(reference, candidate, n=1)

    return bp * precision


bleu_scratch_1 = bleu_score_unigram(reference=reference.split(), candidate=candidate_1.split())
bleu_scratch_2 = bleu_score_unigram(reference=reference.split(), candidate=candidate_2.split())
bleu_scratch_3 = bleu_score_unigram(reference=reference.split(), candidate=candidate_3.split())

print(f"BLEU score for example 1: {bleu_scratch_1:.2f}")
print(f"BLEU score for example 2: {bleu_scratch_2:.2f}")
print(f"BLEU score for example 3: {bleu_scratch_3:.2f}")

BLEU score for example 1: 0.66
BLEU score for example 2: 0.89
BLEU score for example 3: 0.44
