# Bleu Score

## Setup

### Imports

In [11]:
import numpy as np
import os
import pandas as pd

In [111]:
from nltk.translate.bleu_score import sentence_bleu
# Example reference and candidate sentences
reference = ['私は犬が好きです', 'あなたは猫が好きですか']
candidate = '私は猫が好きです'
# Compute the BLEU score for the candidate sentence
score = sentence_bleu(reference, candidate)
print('BLEU score:', score)

BLEU score: 0.9036020036098449


### Data

In [12]:
os.chdir("/home/andi/code/mochiyam/simply-japanese/data/2_RawData") # Change user name to run on your machine

In [13]:
data150 = "SNOW_T15_150.xlsx"
df150 = pd.read_excel(data150)

## Code

In [112]:
def bleu_score_sentence(reference, target):
    """
    Takes two strings of data, evaluates the bleu score and returns it as a float 
    Input:
    reference (string) = "correct" translation as reference
    target (string) = translated text (MT)
    Output:
    bleu (float) = evaluation
    """
    return sentence_bleu([reference], target)

In [84]:
def bleu_score_series(source, reference, target):
    """
    Takes two series of data, evaluates the bleu score and returns it as a float 
    Input:
    reference (series) = "correct" translation as reference
    target (series) = translated text (MT)
    Output:
    bleu (float) = evaluation
    """
    len(reference) == len(target)
    bleu_list = []
        
    for i in range(len(source)):
        bleu_list.append(bleu_score_sentence(reference[i], target[i]))
    
    
    bleu = pd.Series(bleu_list)
    return bleu

## Tests

### Sentence test

Compare it to something in English => Spits out 0 as expected

In [123]:
bleu_score_sentence(df150.iloc[0][1], "This is not even Japanese")

0

Give it some Japanese => Still a zero

In [124]:
bleu_score_sentence(df150.iloc[0][1], "ポッキーが好き")

0

Give it some better Japanese with some words from source => Technically a zero.

In [126]:
bleu_score_sentence(df150.iloc[0][1], "月曜日に仕事にいきます。")

2.1817887831637873e-78

Give it even closer Japanese => It recognizes a resemblance.

In [127]:
bleu_score_sentence(df150.iloc[0][1], "月曜日までに仕事にいきます。")

0.2523623444520119

Give it the identical reference sentence with a typo => High score, but the sentence is technically incorrect (verb missing)

In [129]:
bleu_score_sentence(df150.iloc[0][1], "月曜日までにこの仕事をわらせてください。")

0.8680538146126793

Give it exactly the source sentence => OK

In [130]:
bleu_score_sentence(df150.iloc[0][1], "月曜日までにこの仕事を終えて下さい。")

0.6159692777152226

Give it the identical reference sentence => 1, as expected.

In [131]:
bleu_score_sentence(df150.iloc[0][1], "月曜日までにこの仕事を終わらせてください。")

1.0

### List test

In [72]:
target = pd.Series(np.random.randint(0,100,size=(150,))).apply(str)

In [95]:
bleu_score_series(df150.iloc[:, 0], df150.iloc[:, 1], target)

0      0
1      0
2      0
3      0
4      0
      ..
145    0
146    0
147    0
148    0
149    0
Length: 150, dtype: int64