## Tramsformer Pipeline

<br><br>

### Development Envrionment

In [3]:
import os
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_LAUNCH_BLOCKING"] = "0"
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
os.environ["TORCH_USE_CUDA_DSA"] = "0"

import glob
import torch
import evaluate
from model import TransformerForTranslation
from prepare_data import SentencePieceTokenizer, TrainingDataset
from nltk.translate.bleu_score import sentence_bleu
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm
import sentencepiece as spm

### Bleu Score

In [17]:
metric = evaluate.load("bleu")
prediction = ['And', 'at', 'the', 'end', 'of', 'that', 'remarkable', 'conversations', 'with', 'kids', 'and', 'their', 'best', 'friends', 'all', 'the', 'United', 'States', ',', 'after', 'two', 'years', ',', 'we', 'ran', 'together', ',', 'studies', 'data', 'from', 'another', '10,000', 'children', ',', 'put', ',', 'to', 'look', 'up', 'of', 'what', 'we', 'thought', 'were', 'the', 'most', 'results', 'of', 'our', 'research']
label = ['And', 'at', 'the', 'end', 'of', 'those', 'remarkable', 'conversations', 'with', 'kids', 'and', 'their', 'best', 'friends', 'across', 'the', 'United', 'States', ',', 'after', 'two', 'years', ',', 'we', 'pulled', 'together', 'some', 'survey', 'data', 'from', 'another', '10,000', 'children', ',', 'drew', 'up', 'a', 'set', 'up', 'of', 'what', 'we', 'thought', 'were', 'the', 'key', 'findings', 'of', 'our', 'research']
prediction = ' '.join(prediction)
label = ' '.join(label)
result = metric.compute(predictions=[prediction], references=[[label]])
sentence_bleu_score = result['bleu']
print("Prediction:\n{}\n".format(prediction))
print("Label:\n{}\n".format(label))
print("Bleu:\n{}\n".format(sentence_bleu_score))

Prediction:
And at the end of that remarkable conversations with kids and their best friends all the United States , after two years , we ran together , studies data from another 10,000 children , put , to look up of what we thought were the most results of our research

Label:
And at the end of those remarkable conversations with kids and their best friends across the United States , after two years , we pulled together some survey data from another 10,000 children , drew up a set up of what we thought were the key findings of our research

Bleu:
0.5853669350898144



### Inference

#### BLEU

In [None]:
import os
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "1"

!python -m inference \
    --mode 'run' \
    --test_dataset 'F' \
    --inference_dataset 'inference/data/translation_pair' \
    --model_name 'demo_transformer_base' \
    --config './config/demo_transformer_base.json' \
    --load_model_path './custom_output/demo_transformer_base_51.pt' \
    --data_dir './data/iwslt17.de.en/' \
    --txt_dir 'txt/' \
    --tokenizer_dir 'data/iwslt17.de.en/tokenizer' \
    --tokenizer_model_name 'transformer-sp-bpe-iwslt' \
    --encoding_type 'bpe' \
    --src_lang 'de' \
    --tgt_lang 'en' \
    --max_seq_length 50 \
    --pad_id 0 \
    --unk_id 1 \
    --bos_id 2 \
    --eos_id 3 \
    --evaluation_metric 'bleu' \
    --seed 42 \
    --epoch 100 \
    --logging_step 100 \
    --gpu 0 \
    --batch_size 128 \
    --sinusoidal_wave 10000 \
    --embedding_dim 512 \
    --num_attention_heads 8 \
    --num_sub_layer 6 \
    --feed_forward_size 2048 \
    --attention_dropout_prob 0.1 \
    --label_smoothing 0.1 \
    --optimizer_coefficient 0.1 \
    --warmup_steps 4000 \
    --learning_rate 1e-4 \
    --adam_beta1 0.9 \
    --adam_beta2 0.98 \
    --adam_epsilon 1e-9

#### SacreBLEU

In [None]:
import os
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "1"

!python -m inference \
    --mode 'run' \
    --test_dataset 'F' \
    --inference_dataset 'inference/data/translation_pair' \
    --model_name 'demo_transformer_base' \
    --config './config/demo_transformer_base.json' \
    --load_model_path './custom_output/demo_transformer_base_51.pt' \
    --data_dir './data/iwslt17.de.en/' \
    --txt_dir 'txt/' \
    --tokenizer_dir 'data/iwslt17.de.en/tokenizer' \
    --tokenizer_model_name 'transformer-sp-bpe-iwslt' \
    --encoding_type 'bpe' \
    --src_lang 'de' \
    --tgt_lang 'en' \
    --max_seq_length 50 \
    --pad_id 0 \
    --unk_id 1 \
    --bos_id 2 \
    --eos_id 3 \
    --evaluation_metric 'sacrebleu' \
    --seed 42 \
    --epoch 100 \
    --logging_step 100 \
    --gpu 0 \
    --batch_size 128 \
    --sinusoidal_wave 10000 \
    --embedding_dim 512 \
    --num_attention_heads 8 \
    --num_sub_layer 6 \
    --feed_forward_size 2048 \
    --attention_dropout_prob 0.1 \
    --label_smoothing 0.1 \
    --optimizer_coefficient 0.1 \
    --warmup_steps 4000 \
    --learning_rate 1e-4 \
    --adam_beta1 0.9 \
    --adam_beta2 0.98 \
    --adam_epsilon 1e-9

### Reference

<b>Paper</b>
<br>[Attention is all you need](https://arxiv.org/abs/1706.03762)

<br><b>Data</b>
<br>[IWSLT 2017-01](https://wit3.fbk.eu/2017-01)
<br>[IWSLT 2017-01-B](https://wit3.fbk.eu/2017-01-b)