In [1]:
import numpy as np
from collections import Counter
import math

In [2]:
class BLEU:
    @classmethod
    def unigram_naive(cls, can_text, refs_text, modified=False):
        """can_sen: candidate text/sentence
        ref_text: reference text/sentence, or ground truth
        """
        def count_words(text):
            text = text.strip()
            if text[-1] == ".":
                text = text[:-1]
            words = text.lower().split()
            words = sorted(words)
            
            return Counter(words)
        
        can_cnt = count_words(can_text)
        refs_cnt = [count_words(ref_text) for ref_text in refs_text]
        
        can_len = sum(can_cnt.values())
        print(can_cnt, can_len)
        
        n_shares = 0
        for k in can_cnt:
            n_hits = sum(ref_cnt.get(k, 0) for ref_cnt in refs_cnt)
            if n_hits > 0:
                cur_shares = can_cnt[k]
                cur_max_ref = 0
                if modified:
                    cur_max_ref = max(ref_cnt[k] for ref_cnt in refs_cnt)
                    cur_shares = min(cur_shares, cur_max_ref)
                n_shares += cur_shares
                print(k, cur_shares, n_shares, cur_max_ref)
        
        return n_shares, can_len, n_shares/can_len
    
    @classmethod
    def count_ngrams(cls, text, ngrams):
        text = text.strip()
        if text[-1] == ".":
            text = text[:-1]
        words = text.lower().split()
        ngram_segments = [tuple(words[i:i+ngrams]) for i in range(len(words)-ngrams+1)]

        return Counter(ngram_segments)
        
    @classmethod
    def calc_ngram(cls, can_text, refs_text, ngrams=1, modified=False):
        """can_sen: candidate text/sentence
        ref_text: reference text/sentence, or ground truth
        """
        
        can_cnt = cls.count_ngrams(can_text, ngrams=ngrams)
        refs_cnt = [cls.count_ngrams(ref_text, ngrams=ngrams) for ref_text in refs_text]
        
        can_len = sum(can_cnt.values())
        
        n_shares = 0
        for k in can_cnt:
            n_hits = sum(ref_cnt.get(k, 0) for ref_cnt in refs_cnt)
            if n_hits > 0:
                cur_shares = can_cnt[k]
                cur_max_ref = 0
                if modified:
                    cur_max_ref = max(ref_cnt[k] for ref_cnt in refs_cnt)
                    cur_shares = min(cur_shares, cur_max_ref)
                n_shares += cur_shares
                print(k, cur_shares, n_shares, cur_max_ref)
        
        return n_shares, can_len, n_shares/can_len
    
    @classmethod
    def BP(cls, can_len, refs_len):
        """calculate brevity penalty
        """
        refs_len = list(set(refs_len))  #de-duplicate
        sorted_refs_len = sorted(refs_len, key=lambda x: (abs(can_len-x), x))
        r = sorted_refs_len[0]
        print(sorted_refs_len, r)
        
        c = can_len
        bp = 1 if c >= r else math.exp(1-r/c)
        
        return bp
    
    @classmethod
    def bleu4(cls, can_text, refs_text, n_max=4):
        ps = [cls.calc_ngram(can_text, refs_text, ngrams=i, modified=True)[2] for i in range(1,n_max+1)]
        print("ps: ", ps)
        can_len = sum(cls.count_ngrams(can_text, ngrams=1).values())
        refs_len = [sum(cls.count_ngrams(ref_text, ngrams=1).values()) for ref_text in refs_text]
        bp = cls.BP(can_len, refs_len)
        print("bp: ", bp)
        
        wsum_log_ps = sum([math.log(p)*1/n_max if p > 0 else 0 for p in ps])
        print("wsum_log_ps: ", wsum_log_ps)
        bleu = bp*math.exp(wsum_log_ps)

        return bleu

In [3]:
example_1 = {
    "Candidate": 
        [
            "It is a guide to action which ensures that the military always obeys the commands of the party.",
            "It is to insure the troops forever hearing the activity guidebook that party direct."
        ],
    "Reference": 
        [   "It is a guide to action that ensures that the military will forever heed Party commands.",
            "It is the guiding principle which guarantees the military forces always being under the command of the Party."
        ]
}
example_2 = {
    "Candidate":
        [
            "the the the the the the the."
        ],
    "Reference":
        [
            "The cat is on the mat.",
            "There is a cat on the mat."      
        ]
}

example_3 = {
    "Candidate":
        [
            "of the"
        ],
    "Reference":
        [
            "It is a guide to action that ensures that the military will forever heed Party commands.",
            "It is the guiding principle which guarantees the military forces always being under the command of the Party.",
            "It is the practical guide for the army always to heed the directions of the party"
        ]   
}


In [67]:
for can in example_1['Candidate']:
    print(BLEU.unigram_naive(can, example_1['Reference'], modified=False))
    

Counter({'the': 3, 'a': 1, 'action': 1, 'always': 1, 'commands': 1, 'ensures': 1, 'guide': 1, 'is': 1, 'it': 1, 'military': 1, 'obeys': 1, 'of': 1, 'party': 1, 'that': 1, 'to': 1, 'which': 1}) 18
a 1 1 0
action 1 2 0
always 1 3 0
commands 1 4 0
ensures 1 5 0
guide 1 6 0
is 1 7 0
it 1 8 0
military 1 9 0
of 1 10 0
party 1 11 0
that 1 12 0
the 3 15 0
to 1 16 0
which 1 17 0
(17, 18, 0.9444444444444444)
Counter({'the': 2, 'activity': 1, 'direct': 1, 'forever': 1, 'guidebook': 1, 'hearing': 1, 'insure': 1, 'is': 1, 'it': 1, 'party': 1, 'that': 1, 'to': 1, 'troops': 1}) 14
forever 1 1 0
is 1 2 0
it 1 3 0
party 1 4 0
that 1 5 0
the 2 7 0
to 1 8 0
(8, 14, 0.5714285714285714)


In [94]:
for can in example_1['Candidate']:
    print(BLEU.calc_ngram(can, example_1['Reference'], ngrams=2, modified=True))

[('a', 'guide'), ('action', 'which'), ('always', 'obeys'), ('commands', 'of'), ('ensures', 'that'), ('guide', 'to'), ('is', 'a'), ('it', 'is'), ('military', 'always'), ('obeys', 'the'), ('of', 'the'), ('that', 'the'), ('the', 'commands'), ('the', 'military'), ('the', 'party'), ('to', 'action'), ('which', 'ensures')]
[('a', 'guide'), ('action', 'that'), ('ensures', 'that'), ('forever', 'heed'), ('guide', 'to'), ('heed', 'party'), ('is', 'a'), ('it', 'is'), ('military', 'will'), ('party', 'commands'), ('that', 'ensures'), ('that', 'the'), ('the', 'military'), ('to', 'action'), ('will', 'forever')]
[('always', 'being'), ('being', 'under'), ('command', 'of'), ('forces', 'always'), ('guarantees', 'the'), ('guiding', 'principle'), ('is', 'the'), ('it', 'is'), ('military', 'forces'), ('of', 'the'), ('principle', 'which'), ('the', 'command'), ('the', 'guiding'), ('the', 'military'), ('the', 'party'), ('under', 'the'), ('which', 'guarantees')]
Counter({('it', 'is'): 1, ('is', 'a'): 1, ('a', 'gu

In [100]:
for can in example_3['Candidate']:
    print(BLEU.calc_ngram(can, example_3['Reference'], ngrams=3, modified=True))

[]
[('a', 'guide', 'to'), ('action', 'that', 'ensures'), ('ensures', 'that', 'the'), ('forever', 'heed', 'party'), ('guide', 'to', 'action'), ('heed', 'party', 'commands'), ('is', 'a', 'guide'), ('it', 'is', 'a'), ('military', 'will', 'forever'), ('that', 'ensures', 'that'), ('that', 'the', 'military'), ('the', 'military', 'will'), ('to', 'action', 'that'), ('will', 'forever', 'heed')]
[('always', 'being', 'under'), ('being', 'under', 'the'), ('command', 'of', 'the'), ('forces', 'always', 'being'), ('guarantees', 'the', 'military'), ('guiding', 'principle', 'which'), ('is', 'the', 'guiding'), ('it', 'is', 'the'), ('military', 'forces', 'always'), ('of', 'the', 'party'), ('principle', 'which', 'guarantees'), ('the', 'command', 'of'), ('the', 'guiding', 'principle'), ('the', 'military', 'forces'), ('under', 'the', 'command'), ('which', 'guarantees', 'the')]
[('always', 'to', 'heed'), ('army', 'always', 'to'), ('directions', 'of', 'the'), ('for', 'the', 'army'), ('guide', 'for', 'the'), (

ZeroDivisionError: division by zero

In [56]:
for can in example_2['Candidate']:
    print(BLEU.unigram_naive(can, example_2['Reference'], modified=True))
    

Counter({'the': 7}) 7
the 2 2 2
(2, 7, 0.2857142857142857)


In [60]:
sorted(['xy', 'abc', 'cd'])

['abc', 'cd', 'xy']

In [78]:
BLEU.count_ngrams(example_1["Candidate"][0], 2)

[('a', 'guide'), ('action', 'which'), ('always', 'obeys'), ('commands', 'of'), ('ensures', 'that'), ('guide', 'to'), ('is', 'a'), ('it', 'is'), ('military', 'always'), ('obeys', 'the'), ('of', 'the'), ('that', 'the'), ('the', 'commands'), ('the', 'military'), ('the', 'party'), ('to', 'action'), ('which', 'ensures')]


Counter({('it', 'is'): 1,
         ('is', 'a'): 1,
         ('a', 'guide'): 1,
         ('guide', 'to'): 1,
         ('to', 'action'): 1,
         ('action', 'which'): 1,
         ('which', 'ensures'): 1,
         ('ensures', 'that'): 1,
         ('that', 'the'): 1,
         ('the', 'military'): 1,
         ('military', 'always'): 1,
         ('always', 'obeys'): 1,
         ('obeys', 'the'): 1,
         ('the', 'commands'): 1,
         ('commands', 'of'): 1,
         ('of', 'the'): 1,
         ('the', 'party'): 1})

In [97]:
(example_3["Candidate"][0], 1)

[('of',), ('the',)]


Counter({('of',): 1, ('the',): 1})

In [114]:
BLEU.BP(9, [7,10,12,15])

[10, 7, 12, 15] 10


0.8948393168143697

In [116]:
math.exp(1-10/9)

0.8948393168143697

In [127]:
for can in example_1['Candidate']:
    print(BLEU.bleu4(can, example_1['Reference']))

('it',) 1 1 1
('is',) 1 2 1
('a',) 1 3 1
('guide',) 1 4 1
('to',) 1 5 1
('action',) 1 6 1
('which',) 1 7 1
('ensures',) 1 8 1
('that',) 1 9 2
('the',) 3 12 4
('military',) 1 13 1
('always',) 1 14 1
('commands',) 1 15 1
('of',) 1 16 1
('party',) 1 17 1
('it', 'is') 1 1 1
('is', 'a') 1 2 1
('a', 'guide') 1 3 1
('guide', 'to') 1 4 1
('to', 'action') 1 5 1
('ensures', 'that') 1 6 1
('that', 'the') 1 7 1
('the', 'military') 1 8 1
('of', 'the') 1 9 1
('the', 'party') 1 10 1
('it', 'is', 'a') 1 1 1
('is', 'a', 'guide') 1 2 1
('a', 'guide', 'to') 1 3 1
('guide', 'to', 'action') 1 4 1
('ensures', 'that', 'the') 1 5 1
('that', 'the', 'military') 1 6 1
('of', 'the', 'party') 1 7 1
('it', 'is', 'a', 'guide') 1 1 1
('is', 'a', 'guide', 'to') 1 2 1
('a', 'guide', 'to', 'action') 1 3 1
('ensures', 'that', 'the', 'military') 1 4 1
ps:  [0.9444444444444444, 0.5882352941176471, 0.4375, 0.26666666666666666]
[18, 16] 18
bp:  1
wsum_log_ps:  -0.6840552695172266
0.5045666840058485
('it',) 1 1 1
('is',) 1 2 

In [125]:
np.prod([0.9444444444444444, 0.5882352941176471, 0.4375, 0.26666666666666666])**(1/4)

0.5045666840058485

### Test ScareBlue

In [4]:
from sacrebleu.metrics import BLEU, CHRF, TER

In [5]:
refs = [ # First set of references
         ['The dog bit the man.', 'It was not unexpected.', 'The man bit him first.'],
         # Second set of references
         ['The dog had bit the man.', 'No one was surprised.', 'The man had bitten the dog.'],
       ]
sys = ['The dog bit the man.', "It wasn't surprising.", 'The man had just bitten him.']

In [6]:
bleu = BLEU(effective_order=True)
bleu.corpus_score(sys, refs)

ref_lens, ref_ngrams: 
 [6, 7] 
 Counter({('The',): 1, ('dog',): 1, ('bit',): 1, ('the',): 1, ('man',): 1, ('.',): 1, ('The', 'dog'): 1, ('dog', 'bit'): 1, ('bit', 'the'): 1, ('the', 'man'): 1, ('man', '.'): 1, ('The', 'dog', 'bit'): 1, ('dog', 'bit', 'the'): 1, ('bit', 'the', 'man'): 1, ('the', 'man', '.'): 1, ('The', 'dog', 'bit', 'the'): 1, ('dog', 'bit', 'the', 'man'): 1, ('bit', 'the', 'man', '.'): 1, ('had',): 1, ('dog', 'had'): 1, ('had', 'bit'): 1, ('The', 'dog', 'had'): 1, ('dog', 'had', 'bit'): 1, ('had', 'bit', 'the'): 1, ('The', 'dog', 'had', 'bit'): 1, ('dog', 'had', 'bit', 'the'): 1, ('had', 'bit', 'the', 'man'): 1})
hyp_len, hyp_ngrams: 
 6 
 Counter({('The',): 1, ('dog',): 1, ('bit',): 1, ('the',): 1, ('man',): 1, ('.',): 1, ('The', 'dog'): 1, ('dog', 'bit'): 1, ('bit', 'the'): 1, ('the', 'man'): 1, ('man', '.'): 1, ('The', 'dog', 'bit'): 1, ('dog', 'bit', 'the'): 1, ('bit', 'the', 'man'): 1, ('the', 'man', '.'): 1, ('The', 'dog', 'bit', 'the'): 1, ('dog', 'bit', 'the',

BLEU = 48.53 82.4/50.0/45.5/37.5 (BP = 0.943 ratio = 0.944 hyp_len = 17 ref_len = 18)

In [149]:
bleu.sentence_score(sys[0], [refs[0][0], refs[1][0]])

BLEU = 100.00 100.0/100.0/100.0/100.0 (BP = 1.000 ratio = 1.000 hyp_len = 6 ref_len = 6)

In [140]:
?bleu.sentence_score

In [150]:
for can in example_1['Candidate']:
    print(can)
    print(example_1['Reference'])
    print(bleu.sentence_score(can, example_1['Reference']))

It is a guide to action which ensures that the military always obeys the commands of the party.
['It is a guide to action that ensures that the military will forever heed Party commands.', 'It is the guiding principle which guarantees the military forces always being under the command of the Party.']
BLEU = 44.57 89.5/50.0/35.3/25.0 (BP = 1.000 ratio = 1.000 hyp_len = 19 ref_len = 19)
It is to insure the troops forever hearing the activity guidebook that party direct.
['It is a guide to action that ensures that the military will forever heed Party commands.', 'It is the guiding principle which guarantees the military forces always being under the command of the Party.']
BLEU = 6.51 53.3/7.1/3.8/2.1 (BP = 0.875 ratio = 0.882 hyp_len = 15 ref_len = 17)


In [146]:
"It is a guide to action that ensures that the military will forever heed Party commands .".split().__len__()

17

In [147]:
'It is the guiding principle which guarantees the military forces always being under the command of the Party .'.split().__len__()

19

In [152]:
import sacrebleu

In [154]:
tokenizer = sacrebleu.tokenizers.tokenizer_13a.Tokenizer13a()

In [155]:
tokenizer(example_1["Candidate"][0])

'It is a guide to action which ensures that the military always obeys the commands of the party .'

In [156]:
import re

In [158]:
re.sub(r'([^0-9])([\.,])', r'\1 \2 ', "abc.")

'abc . '

In [159]:
class TestTokenizer13a:
    def __call__(self, line):
        """Tokenizes an input line using a relatively minimal tokenization
        that is however equivalent to mteval-v13a, used by WMT.

        :param line: a segment to tokenize
        :return: the tokenized line
        """

        # language-independent part:
        line = line.replace('<skipped>', '')
        line = line.replace('-\n', '')
        line = line.replace('\n', ' ')

        if '&' in line:
            line = line.replace('&quot;', '"')
            line = line.replace('&amp;', '&')
            line = line.replace('&lt;', '<')
            line = line.replace('&gt;', '>')
        
        return f' {line} '


In [161]:
TestTokenizer13a()(example_1["Candidate"][0])

' It is a guide to action which ensures that the military always obeys the commands of the party. '

In [162]:
post_tokenizer = sacrebleu.tokenizers.tokenizer_re.TokenizerRegexp()

In [164]:
post_tokenizer(TestTokenizer13a()(example_1["Candidate"][0])).split()

['It',
 'is',
 'a',
 'guide',
 'to',
 'action',
 'which',
 'ensures',
 'that',
 'the',
 'military',
 'always',
 'obeys',
 'the',
 'commands',
 'of',
 'the',
 'party',
 '.']

In [165]:
" a b c ".split()

['a', 'b', 'c']

In [46]:
import sacrebleu

class BLEUEx:
    default_tokenizer = sacrebleu.tokenizers.tokenizer_13a.Tokenizer13a()
    
    @classmethod
    def tokenize(cls, line):
        return cls.default_tokenizer(line)
    
    @classmethod
    def count_ngrams(cls, text, ngrams):
        text = cls.tokenize(text)
        words = text.lower().split()
        # \print(' '.join(words))
        ngram_segments = [tuple(words[i:i+ngrams]) for i in range(len(words)-ngrams+1)]

        return Counter(ngram_segments)
        
    @classmethod
    def match_ngram(cls, hyp_text, refs_text, ngrams=1):
        """can_sen: candidate text/sentence
        ref_text: reference text/sentence, or ground truth
        """
        
        hyp_cnt = cls.count_ngrams(hyp_text, ngrams=ngrams)
        #print(hyp_cnt)
        refs_cnt = [cls.count_ngrams(ref_text, ngrams=ngrams) for ref_text in refs_text]
        #print(refs_cnt)
        
        hyp_len = sum(hyp_cnt.values())
        
        n_shares = 0
        for k in hyp_cnt:
            n_hits = sum(ref_cnt.get(k, 0) for ref_cnt in refs_cnt)
            cur_shares = min(hyp_cnt[k], n_hits)
            n_shares += cur_shares
            #print(k, hyp_cnt[k], n_hits, cur_shares)
        
        print(n_shares, hyp_len, n_shares/hyp_len)
        return n_shares, hyp_len, n_shares/hyp_len
    
    @classmethod
    def BP(cls, can_len, refs_len):
        """calculate brevity penalty
        """
        refs_len = list(set(refs_len))  #de-duplicate
        sorted_refs_len = sorted(refs_len, key=lambda x: (abs(can_len-x), x))
        r = sorted_refs_len[0]
        # print(sorted_refs_len, r)
        
        c = can_len
        bp = 1 if c >= r else math.exp(1-r/c)
        
        return bp
    
    @classmethod
    def sentence_score(cls, can_text, refs_text, n_max=4):
        ps = [cls.match_ngram(can_text, refs_text, ngrams=i)[2] for i in range(1,n_max+1)]
        print("ps: ", ps)
        can_len = sum(cls.count_ngrams(can_text, ngrams=1).values())
        refs_len = [sum(cls.count_ngrams(ref_text, ngrams=1).values()) for ref_text in refs_text]
        bp = cls.BP(can_len, refs_len)
        print("bp: ", bp)
        
        wsum_log_ps = sum([math.log(p)*1/n_max if p > 0 else 0 for p in ps])
        print("wsum_log_ps: ", wsum_log_ps)
        bleu = bp*math.exp(wsum_log_ps)*100

        return bleu

In [47]:
for can in example_1['Candidate']:
    print(BLEUEx.sentence_score(can, example_1['Reference']))

18 19 0.9473684210526315
11 18 0.6111111111111112
8 17 0.47058823529411764
5 16 0.3125
ps:  [0.9473684210526315, 0.6111111111111112, 0.47058823529411764, 0.3125]
bp:  1
wsum_log_ps:  -0.6158665796375327
54.01725898595141
9 15 0.6
1 14 0.07142857142857142
0 13 0.0
0 12 0.0
ps:  [0.6, 0.07142857142857142, 0.0, 0.0]
bp:  0.8751733190429475
wsum_log_ps:  -0.7874707383453123
39.81987320031649


In [17]:
BLEUEx.count_ngrams(example_1['Candidate'][0], 1)
BLEUEx.match_ngram(example_1['Candidate'][0], example_1['Reference'], 1)

it is a guide to action which ensures that the military always obeys the commands of the party .
it is a guide to action which ensures that the military always obeys the commands of the party .
Counter({('the',): 3, ('it',): 1, ('is',): 1, ('a',): 1, ('guide',): 1, ('to',): 1, ('action',): 1, ('which',): 1, ('ensures',): 1, ('that',): 1, ('military',): 1, ('always',): 1, ('obeys',): 1, ('commands',): 1, ('of',): 1, ('party',): 1, ('.',): 1})
it is a guide to action that ensures that the military will forever heed party commands .
it is the guiding principle which guarantees the military forces always being under the command of the party .
[Counter({('that',): 2, ('it',): 1, ('is',): 1, ('a',): 1, ('guide',): 1, ('to',): 1, ('action',): 1, ('ensures',): 1, ('the',): 1, ('military',): 1, ('will',): 1, ('forever',): 1, ('heed',): 1, ('party',): 1, ('commands',): 1, ('.',): 1}), Counter({('the',): 4, ('it',): 1, ('is',): 1, ('guiding',): 1, ('principle',): 1, ('which',): 1, ('guarantees',)

(18, 19, 0.9473684210526315)

In [49]:
s = bleu.sentence_score(example_1['Candidate'][0].lower(), [x.lower() for x in example_1['Reference']])

ref_lens, ref_ngrams: 
 [17, 19] 
 Counter({('the',): 4, ('that',): 2, ('it',): 1, ('is',): 1, ('a',): 1, ('guide',): 1, ('to',): 1, ('action',): 1, ('ensures',): 1, ('military',): 1, ('will',): 1, ('forever',): 1, ('heed',): 1, ('party',): 1, ('commands',): 1, ('.',): 1, ('it', 'is'): 1, ('is', 'a'): 1, ('a', 'guide'): 1, ('guide', 'to'): 1, ('to', 'action'): 1, ('action', 'that'): 1, ('that', 'ensures'): 1, ('ensures', 'that'): 1, ('that', 'the'): 1, ('the', 'military'): 1, ('military', 'will'): 1, ('will', 'forever'): 1, ('forever', 'heed'): 1, ('heed', 'party'): 1, ('party', 'commands'): 1, ('commands', '.'): 1, ('it', 'is', 'a'): 1, ('is', 'a', 'guide'): 1, ('a', 'guide', 'to'): 1, ('guide', 'to', 'action'): 1, ('to', 'action', 'that'): 1, ('action', 'that', 'ensures'): 1, ('that', 'ensures', 'that'): 1, ('ensures', 'that', 'the'): 1, ('that', 'the', 'military'): 1, ('the', 'military', 'will'): 1, ('military', 'will', 'forever'): 1, ('will', 'forever', 'heed'): 1, ('forever', 'hee

In [50]:
s.counts, s.totals, s.sys_len, s.ref_len, s

([18, 11, 8, 5],
 [19, 18, 17, 16],
 19,
 19,
 BLEU = 54.02 94.7/61.1/47.1/31.2 (BP = 1.000 ratio = 1.000 hyp_len = 19 ref_len = 19))

In [18]:
ref_cnt_sacre = Counter({('the',): 4, ('that',): 2, ('It',): 1, ('is',): 1, ('a',): 1, ('guide',): 1, ('to',): 1, ('action',): 1, ('ensures',): 1, ('military',): 1, ('will',): 1, ('forever',): 1, ('heed',): 1, ('Party',): 1, ('commands',): 1, ('.',): 1, ('It', 'is'): 1, ('is', 'a'): 1, ('a', 'guide'): 1, ('guide', 'to'): 1, ('to', 'action'): 1, ('action', 'that'): 1, ('that', 'ensures'): 1, ('ensures', 'that'): 1, ('that', 'the'): 1, ('the', 'military'): 1, ('military', 'will'): 1, ('will', 'forever'): 1, ('forever', 'heed'): 1, ('heed', 'Party'): 1, ('Party', 'commands'): 1, ('commands', '.'): 1, ('It', 'is', 'a'): 1, ('is', 'a', 'guide'): 1, ('a', 'guide', 'to'): 1, ('guide', 'to', 'action'): 1, ('to', 'action', 'that'): 1, ('action', 'that', 'ensures'): 1, ('that', 'ensures', 'that'): 1, ('ensures', 'that', 'the'): 1, ('that', 'the', 'military'): 1, ('the', 'military', 'will'): 1, ('military', 'will', 'forever'): 1, ('will', 'forever', 'heed'): 1, ('forever', 'heed', 'Party'): 1, ('heed', 'Party', 'commands'): 1, ('Party', 'commands', '.'): 1, ('It', 'is', 'a', 'guide'): 1, ('is', 'a', 'guide', 'to'): 1, ('a', 'guide', 'to', 'action'): 1, ('guide', 'to', 'action', 'that'): 1, ('to', 'action', 'that', 'ensures'): 1, ('action', 'that', 'ensures', 'that'): 1, ('that', 'ensures', 'that', 'the'): 1, ('ensures', 'that', 'the', 'military'): 1, ('that', 'the', 'military', 'will'): 1, ('the', 'military', 'will', 'forever'): 1, ('military', 'will', 'forever', 'heed'): 1, ('will', 'forever', 'heed', 'Party'): 1, ('forever', 'heed', 'Party', 'commands'): 1, ('heed', 'Party', 'commands', '.'): 1, ('guiding',): 1, ('principle',): 1, ('which',): 1, ('guarantees',): 1, ('forces',): 1, ('always',): 1, ('being',): 1, ('under',): 1, ('command',): 1, ('of',): 1, ('is', 'the'): 1, ('the', 'guiding'): 1, ('guiding', 'principle'): 1, ('principle', 'which'): 1, ('which', 'guarantees'): 1, ('guarantees', 'the'): 1, ('military', 'forces'): 1, ('forces', 'always'): 1, ('always', 'being'): 1, ('being', 'under'): 1, ('under', 'the'): 1, ('the', 'command'): 1, ('command', 'of'): 1, ('of', 'the'): 1, ('the', 'Party'): 1, ('Party', '.'): 1, ('It', 'is', 'the'): 1, ('is', 'the', 'guiding'): 1, ('the', 'guiding', 'principle'): 1, ('guiding', 'principle', 'which'): 1, ('principle', 'which', 'guarantees'): 1, ('which', 'guarantees', 'the'): 1, ('guarantees', 'the', 'military'): 1, ('the', 'military', 'forces'): 1, ('military', 'forces', 'always'): 1, ('forces', 'always', 'being'): 1, ('always', 'being', 'under'): 1, ('being', 'under', 'the'): 1, ('under', 'the', 'command'): 1, ('the', 'command', 'of'): 1, ('command', 'of', 'the'): 1, ('of', 'the', 'Party'): 1, ('the', 'Party', '.'): 1, ('It', 'is', 'the', 'guiding'): 1, ('is', 'the', 'guiding', 'principle'): 1, ('the', 'guiding', 'principle', 'which'): 1, ('guiding', 'principle', 'which', 'guarantees'): 1, ('principle', 'which', 'guarantees', 'the'): 1, ('which', 'guarantees', 'the', 'military'): 1, ('guarantees', 'the', 'military', 'forces'): 1, ('the', 'military', 'forces', 'always'): 1, ('military', 'forces', 'always', 'being'): 1, ('forces', 'always', 'being', 'under'): 1, ('always', 'being', 'under', 'the'): 1, ('being', 'under', 'the', 'command'): 1, ('under', 'the', 'command', 'of'): 1, ('the', 'command', 'of', 'the'): 1, ('command', 'of', 'the', 'Party'): 1, ('of', 'the', 'Party', '.'): 1})
hyp_cnt_sacre =  Counter({('the',): 3, ('It',): 1, ('is',): 1, ('a',): 1, ('guide',): 1, ('to',): 1, ('action',): 1, ('which',): 1, ('ensures',): 1, ('that',): 1, ('military',): 1, ('always',): 1, ('obeys',): 1, ('commands',): 1, ('of',): 1, ('party',): 1, ('.',): 1, ('It', 'is'): 1, ('is', 'a'): 1, ('a', 'guide'): 1, ('guide', 'to'): 1, ('to', 'action'): 1, ('action', 'which'): 1, ('which', 'ensures'): 1, ('ensures', 'that'): 1, ('that', 'the'): 1, ('the', 'military'): 1, ('military', 'always'): 1, ('always', 'obeys'): 1, ('obeys', 'the'): 1, ('the', 'commands'): 1, ('commands', 'of'): 1, ('of', 'the'): 1, ('the', 'party'): 1, ('party', '.'): 1, ('It', 'is', 'a'): 1, ('is', 'a', 'guide'): 1, ('a', 'guide', 'to'): 1, ('guide', 'to', 'action'): 1, ('to', 'action', 'which'): 1, ('action', 'which', 'ensures'): 1, ('which', 'ensures', 'that'): 1, ('ensures', 'that', 'the'): 1, ('that', 'the', 'military'): 1, ('the', 'military', 'always'): 1, ('military', 'always', 'obeys'): 1, ('always', 'obeys', 'the'): 1, ('obeys', 'the', 'commands'): 1, ('the', 'commands', 'of'): 1, ('commands', 'of', 'the'): 1, ('of', 'the', 'party'): 1, ('the', 'party', '.'): 1, ('It', 'is', 'a', 'guide'): 1, ('is', 'a', 'guide', 'to'): 1, ('a', 'guide', 'to', 'action'): 1, ('guide', 'to', 'action', 'which'): 1, ('to', 'action', 'which', 'ensures'): 1, ('action', 'which', 'ensures', 'that'): 1, ('which', 'ensures', 'that', 'the'): 1, ('ensures', 'that', 'the', 'military'): 1, ('that', 'the', 'military', 'always'): 1, ('the', 'military', 'always', 'obeys'): 1, ('military', 'always', 'obeys', 'the'): 1, ('always', 'obeys', 'the', 'commands'): 1, ('obeys', 'the', 'commands', 'of'): 1, ('the', 'commands', 'of', 'the'): 1, ('commands', 'of', 'the', 'party'): 1, ('of', 'the', 'party', '.'): 1})

In [19]:
ref_cnt_sacre, hyp_cnt_sacre

(Counter({('the',): 4,
          ('that',): 2,
          ('It',): 1,
          ('is',): 1,
          ('a',): 1,
          ('guide',): 1,
          ('to',): 1,
          ('action',): 1,
          ('ensures',): 1,
          ('military',): 1,
          ('will',): 1,
          ('forever',): 1,
          ('heed',): 1,
          ('Party',): 1,
          ('commands',): 1,
          ('.',): 1,
          ('It', 'is'): 1,
          ('is', 'a'): 1,
          ('a', 'guide'): 1,
          ('guide', 'to'): 1,
          ('to', 'action'): 1,
          ('action', 'that'): 1,
          ('that', 'ensures'): 1,
          ('ensures', 'that'): 1,
          ('that', 'the'): 1,
          ('the', 'military'): 1,
          ('military', 'will'): 1,
          ('will', 'forever'): 1,
          ('forever', 'heed'): 1,
          ('heed', 'Party'): 1,
          ('Party', 'commands'): 1,
          ('commands', '.'): 1,
          ('It', 'is', 'a'): 1,
          ('is', 'a', 'guide'): 1,
          ('a', 'guide', 'to'): 1

In [21]:
hyp_cnt_debug = Counter({('the',): 3, ('it',): 1, ('is',): 1, ('a',): 1, ('guide',): 1, ('to',): 1, ('action',): 1, ('which',): 1, ('ensures',): 1, ('that',): 1, ('military',): 1, ('always',): 1, ('obeys',): 1, ('commands',): 1, ('of',): 1, ('party',): 1, ('.',): 1})
refs_cnt_debug = [Counter({('that',): 2, ('it',): 1, ('is',): 1, ('a',): 1, ('guide',): 1, ('to',): 1, ('action',): 1, ('ensures',): 1, ('the',): 1, ('military',): 1, ('will',): 1, ('forever',): 1, ('heed',): 1, ('party',): 1, ('commands',): 1, ('.',): 1}), Counter({('the',): 4, ('it',): 1, ('is',): 1, ('guiding',): 1, ('principle',): 1, ('which',): 1, ('guarantees',): 1, ('military',): 1, ('forces',): 1, ('always',): 1, ('being',): 1, ('under',): 1, ('command',): 1, ('of',): 1, ('party',): 1, ('.',): 1})]

In [24]:
[(k, v) for k, v in hyp_cnt_sacre.items() if len(k) ==1] , hyp_cnt_debug

([(('the',), 3),
  (('It',), 1),
  (('is',), 1),
  (('a',), 1),
  (('guide',), 1),
  (('to',), 1),
  (('action',), 1),
  (('which',), 1),
  (('ensures',), 1),
  (('that',), 1),
  (('military',), 1),
  (('always',), 1),
  (('obeys',), 1),
  (('commands',), 1),
  (('of',), 1),
  (('party',), 1),
  (('.',), 1)],
 Counter({('the',): 3,
          ('it',): 1,
          ('is',): 1,
          ('a',): 1,
          ('guide',): 1,
          ('to',): 1,
          ('action',): 1,
          ('which',): 1,
          ('ensures',): 1,
          ('that',): 1,
          ('military',): 1,
          ('always',): 1,
          ('obeys',): 1,
          ('commands',): 1,
          ('of',): 1,
          ('party',): 1,
          ('.',): 1}))

In [25]:
ref_cnt_debug = refs_cnt_debug[0] + refs_cnt_debug[1]

In [37]:
ref_cnt_sacre_uni = dict([(k, v) for k, v in ref_cnt_sacre.items() if len(k) ==1])

for k in hyp_cnt_sacre:
    v1 = ref_cnt_sacre_uni.get(k,0); v2 = ref_cnt_debug.get(k, 0)
    c_sacre = min(v1, hyp_cnt_sacre.get(k, 0))
    c_debug = min(v2, hyp_cnt_debug.get(k, 0))
    print(k, v1, v2, v1-v2, "|", hyp_cnt_sacre.get(k, 0), hyp_cnt_debug.get(k,0), " | ",
         c_sacre, c_debug, c_sacre-c_debug )


('the',) 4 5 -1 | 3 3  |  3 3 0
('It',) 1 0 1 | 1 0  |  1 0 1
('is',) 1 2 -1 | 1 1  |  1 1 0
('a',) 1 1 0 | 1 1  |  1 1 0
('guide',) 1 1 0 | 1 1  |  1 1 0
('to',) 1 1 0 | 1 1  |  1 1 0
('action',) 1 1 0 | 1 1  |  1 1 0
('which',) 1 1 0 | 1 1  |  1 1 0
('ensures',) 1 1 0 | 1 1  |  1 1 0
('that',) 2 2 0 | 1 1  |  1 1 0
('military',) 1 2 -1 | 1 1  |  1 1 0
('always',) 1 1 0 | 1 1  |  1 1 0
('obeys',) 0 0 0 | 1 1  |  0 0 0
('commands',) 1 1 0 | 1 1  |  1 1 0
('of',) 1 1 0 | 1 1  |  1 1 0
('party',) 0 2 -2 | 1 1  |  0 1 -1
('.',) 1 2 -1 | 1 1  |  1 1 0
('It', 'is') 0 0 0 | 1 0  |  0 0 0
('is', 'a') 0 0 0 | 1 0  |  0 0 0
('a', 'guide') 0 0 0 | 1 0  |  0 0 0
('guide', 'to') 0 0 0 | 1 0  |  0 0 0
('to', 'action') 0 0 0 | 1 0  |  0 0 0
('action', 'which') 0 0 0 | 1 0  |  0 0 0
('which', 'ensures') 0 0 0 | 1 0  |  0 0 0
('ensures', 'that') 0 0 0 | 1 0  |  0 0 0
('that', 'the') 0 0 0 | 1 0  |  0 0 0
('the', 'military') 0 0 0 | 1 0  |  0 0 0
('military', 'always') 0 0 0 | 1 0  |  0 0 0
('always', 

## Smoothing

In [55]:
import sacrebleu

class BLEUV2:
    default_tokenizer = sacrebleu.tokenizers.tokenizer_13a.Tokenizer13a()
    
    @classmethod
    def tokenize(cls, line):
        return cls.default_tokenizer(line)
    
    @classmethod
    def count_ngrams(cls, text, ngrams):
        text = cls.tokenize(text)
        words = text.lower().split()
        ngram_segments = [tuple(words[i:i+ngrams]) for i in range(len(words)-ngrams+1)]

        return Counter(ngram_segments)
        
    @classmethod
    def match_ngram(cls, hyp_text, refs_text, ngrams=1):
        """can_sen: candidate text/sentence
        ref_text: reference text/sentence, or ground truth
        """
        
        hyp_cnt = cls.count_ngrams(hyp_text, ngrams=ngrams)
        #print(hyp_cnt)
        refs_cnt = [cls.count_ngrams(ref_text, ngrams=ngrams) for ref_text in refs_text]
        #print(refs_cnt)
        
        hyp_len = sum(hyp_cnt.values())
        
        n_shares = 0
        for k in hyp_cnt:
            n_hits = max(ref_cnt.get(k, 0) for ref_cnt in refs_cnt)
            cur_shares = min(hyp_cnt[k], n_hits)
            n_shares += cur_shares
            #print(k, hyp_cnt[k], n_hits, cur_shares)
        
        print(n_shares, hyp_len, n_shares/hyp_len)
        return n_shares, hyp_len, n_shares/hyp_len
    
    @classmethod
    def BP(cls, can_len, refs_len):
        """calculate brevity penalty
        """
        refs_len = list(set(refs_len))  #de-duplicate
        sorted_refs_len = sorted(refs_len, key=lambda x: (abs(can_len-x), x))
        r = sorted_refs_len[0]
        # print(sorted_refs_len, r)
        
        c = can_len
        bp = 1 if c >= r else math.exp(1-r/c)
        
        return bp
    
    @classmethod
    def sentence_score(cls, can_text, refs_text, max_ngram_order=4):
        ps = [cls.match_ngram(can_text, refs_text, ngrams=i)[2] for i in range(1, max_ngram_order+1)]
        print("ps: ", ps)
        can_len = sum(cls.count_ngrams(can_text, ngrams=1).values())
        refs_len = [sum(cls.count_ngrams(ref_text, ngrams=1).values()) for ref_text in refs_text]
        bp = cls.BP(can_len, refs_len)
        print("bp: ", bp)
        
        wsum_log_ps = sum([math.log(p)*1/max_ngram_order if p > 0 else 0 for p in ps])
        print("wsum_log_ps: ", wsum_log_ps)
        bleu = bp*math.exp(wsum_log_ps)*100

        return bleu

In [56]:
for can in example_2['Candidate']:
    print(BLEUV2.sentence_score(can, example_2['Reference']))

3 8 0.375
0 7 0.0
0 6 0.0
0 5 0.0
ps:  [0.375, 0.0, 0.0, 0.0]
bp:  1
wsum_log_ps:  -0.24520731325293155
78.25422900366436


In [57]:
bleu.sentence_score(example_2['Candidate'][0].lower(), [x.lower() for x in example_2['Reference']])

ref_lens, ref_ngrams: 
 [7, 8] 
 Counter({('the',): 2, ('cat',): 1, ('is',): 1, ('on',): 1, ('mat',): 1, ('.',): 1, ('the', 'cat'): 1, ('cat', 'is'): 1, ('is', 'on'): 1, ('on', 'the'): 1, ('the', 'mat'): 1, ('mat', '.'): 1, ('the', 'cat', 'is'): 1, ('cat', 'is', 'on'): 1, ('is', 'on', 'the'): 1, ('on', 'the', 'mat'): 1, ('the', 'mat', '.'): 1, ('the', 'cat', 'is', 'on'): 1, ('cat', 'is', 'on', 'the'): 1, ('is', 'on', 'the', 'mat'): 1, ('on', 'the', 'mat', '.'): 1, ('there',): 1, ('a',): 1, ('there', 'is'): 1, ('is', 'a'): 1, ('a', 'cat'): 1, ('cat', 'on'): 1, ('there', 'is', 'a'): 1, ('is', 'a', 'cat'): 1, ('a', 'cat', 'on'): 1, ('cat', 'on', 'the'): 1, ('there', 'is', 'a', 'cat'): 1, ('is', 'a', 'cat', 'on'): 1, ('a', 'cat', 'on', 'the'): 1, ('cat', 'on', 'the', 'mat'): 1})
hyp_len, hyp_ngrams: 
 8 
 Counter({('the',): 7, ('the', 'the'): 6, ('the', 'the', 'the'): 5, ('the', 'the', 'the', 'the'): 4, ('.',): 1, ('the', '.'): 1, ('the', 'the', '.'): 1, ('the', 'the', 'the', '.'): 1})
[De

BLEU = 7.27 37.5/7.1/4.2/2.5 (BP = 1.000 ratio = 1.000 hyp_len = 8 ref_len = 8)

In [58]:
bleu.smooth_method

'exp'

In [61]:
bleu.smooth_value

In [65]:
1/40

0.025

In [69]:
list(zip(*[['a,', 1], ['b', 2], ['c', 3]]))

[('a,', 'b', 'c'), (1, 2, 3)]

In [70]:
from collections import Counter

In [71]:
c = Counter()

In [72]:
c.update(['a', 'b'])

In [73]:
c

Counter({'a': 1, 'b': 1})

In [74]:
c.update(['a', 'b'])

In [75]:
c

Counter({'a': 2, 'b': 2})

In [76]:
c['a']= 3

In [77]:
c

Counter({'a': 3, 'b': 2})

In [78]:
c['f']

0

In [None]:
### Test for transfomerLM blue