In [1]:
from nltk.translate.bleu_score import sentence_bleu
from tqdm import tqdm
import pandas as pd

from transformers import (
    T5ForConditionalGeneration,
    T5Tokenizer,
)
import torch

# Eval Test

In [164]:
df = pd.read_csv('data/filtered/test.csv')

In [3]:
def clean_train(text):
    text = text.lower()
    text = text.replace("[^ a-z.?!,¿]", "")
    text = text.replace("[.?!,¿]", r" \0 ")
    text = text.strip()
    text = f"translate Indonesian to English: {text} </s>"
    return text

def clean_test(text):
    text = text.lower()
    text = text.replace("[^ a-z.?!,¿]", "")
    text = text.replace("[.?!,¿]", r" \0 ")
    text = text.strip()
    return text

In [166]:
df['id'] = df['id'].apply(clean_train)
df['en'] = df['en'].apply(clean_test)

In [11]:
tokenizer = T5Tokenizer.from_pretrained("t5-base")
model = T5ForConditionalGeneration.from_pretrained("model-nmt")

In [14]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

model = model.to(device)

cuda:0


In [46]:
input_seq = list(df['id'].sample(2).values)

In [43]:
input_seq = df['id'].sample(2).values

In [118]:
input_ = []

for a in tqdm(df['id'].values):
    input_.append(tokenizer.encode(a, return_tensors='pt'))

100%|████████████████████████████████████████████████████████████████████████| 100000/100000 [00:40<00:00, 2489.52it/s]


In [119]:
input_tensor = tokenizer(list(df['id'].values), truncation=True, padding='max_length', max_length=64, return_tensors='pt')

In [127]:
import math

length = len(df['id'])
batch_size = 64

input_tensor = []

for i in tqdm(range(math.floor(length / batch_size) + 1)):
    t = tokenizer(list(df['id'].values[i * batch_size : (i+1) * batch_size]),
                  truncation=True, 
                  padding='max_length', 
                  max_length=64,
                  return_tensors='pt')
    input_tensor.append(t)

100%|██████████████████████████████████████████████████████████████████████████████| 1563/1563 [00:33<00:00, 46.08it/s]


In [142]:
output = []
for aa in tqdm(input_tensor):
    a = model.generate(aa['input_ids'].to(device))
    output.append(a)

100%|██████████████████████████████████████████████████████████████████████████████| 1563/1563 [49:57<00:00,  1.92s/it]


In [157]:
decoded = []
for batch in tqdm(output):
    for element in batch:
        decoded.append(tokenizer.decode(element[element > 1]))

100%|██████████████████████████████████████████████████████████████████████████████| 1563/1563 [01:01<00:00, 25.54it/s]


In [172]:
from nltk.translate.bleu_score import sentence_bleu

In [173]:
preds = decoded
reference = df['en'].values

In [174]:
scores = []
for i in tqdm(range(len(preds))):
    score = sentence_bleu([reference[i]], preds[i])
    scores.append(score)

The hypothesis contains 0 counts of 4-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 3-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 2-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
100%|████████████████████████████████████████████████████████████████████████| 100000/100000 [00:19<00:00, 5090.11it/s]


In [175]:
df_scores = pd.DataFrame({
    'id': df['id'],
    'en': df['en'],
    'preds': preds,
    'scores': scores
})

In [179]:
df_scores

Unnamed: 0,id,en,preds,scores
0,translate Indonesian to English: hal ini pun d...,this is also true of families .,this can also be true in family environments.,4.044662e-01
1,translate Indonesian to English: rugerius dari...,roger of lauria (c.,rugerius of lauria (ca.,6.141798e-01
2,translate Indonesian to English: waranggi yang...,a crazy man cannot defend himself.,the savages are unable to protect themselves.,8.856455e-02
3,translate Indonesian to English: paper prepare...,"""symposium austronesia, pascasarjana linguisti...",paper prepared for symposium austronesia pasca...,4.438764e-01
4,translate Indonesian to English: akhirnya selu...,the installation is no longer in use.,the entire facility was eventually abandoned.,1.181154e-01
...,...,...,...,...
99995,translate Indonesian to English: tak ada seora...,there can be no change to god’s words.,no one can change god's promises.,4.273039e-01
99996,translate Indonesian to English: sepertinya ad...,it looks like there's an elevator over there.,there is a lift around there.,2.634675e-01
99997,translate Indonesian to English: kamandanu mur...,mithuna is heartbroken a second time.,samantha samantha for both occasions.,2.806589e-78
99998,translate Indonesian to English: danau dibangu...,construction took four years and was completed...,the lake was built for four years and complete...,6.195234e-01


In [178]:
sum(scores) / len(scores) * 100

47.115279489618594

# Eval Train

In [2]:
df = pd.read_csv('data/train.csv')

In [4]:
df['id'] = df['id'].apply(clean_train)
df['en'] = df['en'].apply(clean_test)

In [5]:
tokenizer = T5Tokenizer.from_pretrained("t5-base")
model = T5ForConditionalGeneration.from_pretrained("model")

In [6]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

model = model.to(device)

cuda:0


In [7]:
input_ = []

for a in tqdm(df['id'].values):
    input_.append(tokenizer.encode(a, return_tensors='pt'))

  f"This sequence already has {self.eos_token}. In future versions this behavior may lead to duplicated eos tokens being added."
100%|████████████████████████████████████████████████████████████████████████| 361998/361998 [02:22<00:00, 2532.67it/s]


In [8]:
input_tensor = tokenizer(list(df['id'].values), truncation=True, padding='max_length', max_length=64, return_tensors='pt')

In [9]:
import math

length = len(df['id'])
batch_size = 64

input_tensor = []

for i in tqdm(range(math.floor(length / batch_size) + 1)):
    t = tokenizer(list(df['id'].values[i * batch_size : (i+1) * batch_size]),
                  truncation=True, 
                  padding='max_length', 
                  max_length=64,
                  return_tensors='pt')
    input_tensor.append(t)

100%|██████████████████████████████████████████████████████████████████████████████| 5657/5657 [02:06<00:00, 44.85it/s]


In [10]:
output = []
for aa in tqdm(input_tensor):
    a = model.generate(aa['input_ids'].to(device))
    output.append(a)

 23%|█████████████████▋                                                          | 1317/5657 [41:38<2:17:14,  1.90s/it]
ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "C:\Users\user\AppData\Roaming\Python\Python37\site-packages\IPython\core\interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-10-8f0d41751d63>", line 3, in <module>
    a = model.generate(aa['input_ids'].to(device))
  File "C:\Users\user\anaconda3\envs\pytorch\lib\site-packages\torch\autograd\grad_mode.py", line 28, in decorate_context
    return func(*args, **kwargs)
  File "C:\Users\user\anaconda3\envs\pytorch\lib\site-packages\transformers\generation_utils.py", line 998, in generate
    **model_kwargs,
  File "C:\Users\user\anaconda3\envs\pytorch\lib\site-packages\transformers\generation_utils.py", line 1295, in greedy_search
    output_hidden_states=output_hidden_states,
  File "C:\Users\user\anaconda3\envs\pytorch\lib\site-packages\torch\nn\modules\module.py", line 1051, in _call_impl
    return forward_call(*input, **kwargs)
  File "C:\Users\user\anaconda3\envs\pytorch\

TypeError: object of type 'NoneType' has no len()

In [None]:
decoded = []
for batch in tqdm(output):
    for element in batch:
        decoded.append(tokenizer.decode(element[element > 1]))

In [None]:
from nltk.translate.bleu_score import sentence_bleu

In [None]:
preds = decoded
reference = df['en'].values

In [None]:
scores = []
for i in tqdm(range(len(preds))):
    score = sentence_bleu([reference[i]], preds[i])
    scores.append(score)

In [None]:
df_scores = pd.DataFrame({
    'id': df['id'],
    'en': df['en'],
    'preds': preds,
    'scores': scores
})

In [None]:
df_scores

In [None]:
sum(scores) / len(scores) * 100