In [92]:
import sys
import os
import codecs
import math

In [93]:
'''
Loading file
'''

def load_data(file):
    sentence = ''
    references = []
    data = open(file, 'r')
    for line in data:
        token = line.rstrip("\n")
        if token == '<s>':
          sentence = ''
        elif token == '</s>':
          references.append(sentence)
        else:
          sentence += token + ' '
    return references
    

In [94]:
'''
BP is an exponential decay
Brevity Penalty(BP) will be 1.0 when the candidate translation length
is the same as any reference translation length
'''

def brevity_penalty(candidate, reference):
    if candidate > reference:
        return 1
    else:
        tmp = 1-float(reference)/float(candidate)
        return math.pow(math.e, tmp)

In [95]:
"""
Sum of the clipped n-gram counts for all the candidate sentences
in the corpus divide by the number of candidate n-grams

return: precision value
"""
def modified_precision(clipped_count, candidate_length):
    precision = float(clipped_count)/float(candidate_length)
    return math.log(precision)

In [96]:
def get_clipped_dic(candidate_dic, reference_dic):
    clipped_dic = {}
    for key, value in candidate_dic.items():
        if key in reference_dic:
            ref_v = reference_dic[key]
            clipped_dic[key] = min(value, ref_v)
    return clipped_dic

def get_clipped_dic_count(clipped_dic):
    count = 0
    for key, value in clipped_dic.items():
        count += value
    return count

In [97]:

"""
    Generate Ngram for each line.
    @line: a line of words
    @n: ngram
    return: a dictionary of words and its counts
"""

def generate_n_gram(candidate, reference, n):
    can_dic = {}
    ref_dic = {}
    can_list = candidate.split(" ")
    ref_list = reference.split(" ")
    can_len = 0

    for i in range(0, len(can_list)-n+1):
        key = ""
        for j in range(0, n):
            key += can_list[i+j]
            key += "/"
        can_len += 1
        if key in can_dic:
            can_dic[key] += 1
        else:
            can_dic[key] = 1

    for i in range(0, len(ref_list)-n+1):
        key = ""
        for j in range(0,  n):
            key += ref_list[i+j]
            key += "/"

        if key in ref_dic:
            ref_dic[key] += 1
        else:
            ref_dic[key] = 1
    return can_dic, ref_dic, can_len

In [98]:
"""
    Generating n-gram for the calculation of BLEU score.
"""

def get_count(candidate, reference, n):
    can_dic, ref_dic, can_len = generate_n_gram(candidate, reference, n)
    can_clipped_dic = get_clipped_dic(can_dic, ref_dic)
    can_clipped_count = get_clipped_dic_count(can_clipped_dic)
    return can_clipped_count, can_len


In [111]:
"""
    Individually calculating scores for all 4 grams
"""



def main(source, target):
    can_len, ref_len = 0, 0

    #Unigram model
    uni_c, uni_t = 0, 0

    #Bi gram model
    bi_c, bi_t = 0, 0

    #Tri gram model
    tri_c, tri_t = 0, 0

    #4 gram model
    four_c, four_t = 0, 0
    
    source_data = load_data(source)
    target_data = load_data(target)

    for c_line, r_line in zip(source_data, target_data):
        c_line = c_line.strip()
        r_line = r_line.strip()
        can_len += len(c_line.split(" "))
        ref_len += len(r_line.split(" "))
        
        #Unigram model
        c_c, t_c = get_count(c_line, r_line, 1)
        uni_c += c_c
        uni_t += t_c
        
        #Bi gram model
        c_c, t_c = get_count(c_line, r_line, 2)
        bi_c += c_c
        bi_t += t_c
        
        #Tri gram model
        c_c, t_c = get_count(c_line, r_line, 3)
        tri_c += c_c
        tri_t += t_c

        #4 gram model
        c_c, t_c = get_count(c_line, r_line, 4)
        four_c += c_c
        four_t += t_c
        
        
#     Calculating precision for all 4 grams.
#     Calculating Brevity Penalty.

    uni_p = modified_precision(uni_c, uni_t)
    bi_p = modified_precision(bi_c, bi_t)
    tri_p = modified_precision(tri_c, tri_t)
    four_p = modified_precision(four_c, four_t)
    bp = brevity_penalty(can_len, ref_len)

    score_uni = bp*math.exp(uni_p)
    score_bi = bp*math.exp(bi_p)
    score_tri = bp*math.exp(tri_p)
    score_4 = bp*math.exp(four_p)
    
    return score_uni, score_bi, score_tri, score_4


In [117]:
def evaluate():
    source = 'train-source.txt'
    target = 'train-target.txt'
    
    
    source_test = 'test-source.txt'
    target_test = 'test-target.txt' 
    
    uni, bi, tri, four = main(source,target)
    uni_t, bi_t, tri_t, four_t = main(source_test,target_test)
    
    print('----------------------BLEU Score Train------------------------')
    data = [['Unigram :',uni],['Bigram : ',bi],['Trigram :',tri],['4gram :',four]]
    display_table(data)
    
    print('----------------------BLEU Score Test------------------------')
    data_t = [['Unigram :',uni_t],['Bigram : ',bi_t],['Trigram :',tri_t],['4gram :',four_t]]
    display_table(data_t)
    



In [118]:
evaluate()

----------------------BLEU Score Train------------------------


0,1,2,3
Unigram :,,0.7517325357522582,
Bigram :,,0.5636116001979694,
Trigram :,,0.4276082523222578,
4gram :,,0.3239967134640803,


----------------------BLEU Score Test------------------------


0,1,2,3
Unigram :,,0.7743372494213215,
Bigram :,,0.5926382609489884,
Trigram :,,0.4572905201703593,
4gram :,,0.3525154545911444,
