# `quickmt` Benchmark

In [1]:
from time import time

import datasets
from fire import Fire
from quickmt import M2m100Translator, NllbTranslator, OpusmtTranslator, Translator
from sacrebleu import BLEU, CHRF, TER

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
bleu = BLEU()
chrf = CHRF()
ter = TER()

In [9]:
src_lang = "it"
tgt_lang = "en"
src_lang_flores = "ita_Latn"
tgt_lang_flores = "eng_Latn"

quickmt_model_path = "../../quickmt-models/quickmt-en-ja"

opusmt_model_path = "../../ct2-models/opus-mt-it-en-ct2"
opusmt_model_string = "Helsinki-NLP/opus-mt-it-en"

nllb600_model_path = "../../ct2-models/nllb-200-distilled-600M-ct2"
nllb1b_model_path = "../../ct2-models/nllb-200-distilled-1.3B-ct2"
m2m100_418m_model_path = "../../ct2-models/m2m100_418-ct2/"
m2m100_1b_model_path = "../../ct2-models/m2m100_1.2B-ct2/"

compute_device = "cpu"

In [10]:
try:
    flores = datasets.load_dataset(
        "facebook/flores",
        f"{src_lang_flores}-{tgt_lang_flores}",
    )
except:
    flores = datasets.load_dataset(
        "facebook/flores",
        f"{tgt_lang_flores}-{src_lang_flores}",
    )

src = []
ref = []
for i in flores["devtest"]:
    src.append(i[f"sentence_{src_lang_flores}"])
    ref.append(i[f"sentence_{tgt_lang_flores}"])

In [11]:
src[1]

'Lo studio è ancora in fase iniziale, come dichiarato cautelativamente dal dottor Ehud Ur, professore di medicina alla Dalhousie University di Halifax, Nuova Scozia, e direttore del dipartimento clinico e scientifico della Canadian Diabetes Association.'

In [12]:
with open(f"flores.{src_lang}", "wt") as myfile:
    myfile.write("".join([i + "\n" for i in src]))
with open(f"flores.{tgt_lang}", "wt") as myfile:
    myfile.write("".join([i + "\n" for i in ref]))

## Quickmt

In [58]:
translator = Translator(model_path=quickmt_model_path, device=compute_device)

In [59]:
translator(src[1], beam_size=5)

Translation time: 0.036295413970947266


'ノバスコシア州ハリファックスのダルハウジー大学の医学教授で、カナダ糖尿病協会の臨床および科学部門の議長であるEhud Ur博士は、研究はまだ初期段階にあると警告しました。'

In [60]:
translator(src[1], sampling_temperature=1.2, beam_size=1, sampling_topk=50, sampling_topp=0.9)

Translation time: 0.05721259117126465


'Ehud Ur博士は、ノバスコシア州ハリファックスのダルハウジー大学医学部の教授で、カナダ糖尿病協会の臨床および科学的部門の責任者は、研究はまだその初期の段階であると言いました。'

In [17]:
%%time
mt = translator(src, max_batch_size=64)

Translation time: 1.2255754470825195
CPU times: user 4.3 s, sys: 1.51 ms, total: 4.3 s
Wall time: 1.29 s


In [18]:
print(bleu.corpus_score(mt, [ref]))
print(chrf.corpus_score(mt, [ref]))
print(ter.corpus_score(mt, [ref]))

BLEU = 3.63 3.7/3.9/3.8/3.2 (BP = 1.000 ratio = 1.800 hyp_len = 2099 ref_len = 1166)
chrF2 = 42.04
TER = 136.91


In [19]:
with open(f"quickmt.{tgt_lang}", "wt") as myfile:
    myfile.write("".join([i + "\n" for i in mt]))

In [20]:
! ~/miniforge3/envs/comet/bin/comet-score -s flores.{src_lang} -r flores.{tgt_lang} -t quickmt.{tgt_lang} --batch_size 32 --only_system

Seed set to 1
Fetching 5 files: 100%|████████████████████████| 5/5 [00:00<00:00, 48998.88it/s]
Lightning automatically upgraded your loaded checkpoint from v1.8.3.post1 to v2.5.0.post0. To apply the upgrade to your files permanently, run `python -m pytorch_lightning.utilities.upgrade_checkpoint ../../../.cache/huggingface/hub/models--Unbabel--wmt22-comet-da/snapshots/2760a223ac957f30acfb18c8aa649b01cf1d75f2/checkpoints/model.ckpt`
Encoder model frozen.
/home/mark/miniforge3/envs/comet/lib/python3.11/site-packages/pytorch_lightning/core/saving.py:195: Found keys that are not in the model state dict but in the checkpoint: ['encoder.model.embeddings.position_ids']
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Predicting DataLoader 0: 100%|██████████████████| 32/32 [00:04<00:00,  7.74it/s]
quickmt.ja	score: 0.8908


## OpusMT

Be sure to export the models to `ctranslate2` format first, e.g.

```bash
ct2-transformers-converter --model Helsinki-NLP/opus-mt-en-de --output_dir ./ct2-models/opus-mt-en-de-ct2
```


In [15]:
translator = OpusmtTranslator(
    model_path=opusmt_model_path,
    model_string=opusmt_model_string,
    device=compute_device,
)

In [16]:
translator(src[0])

Translation time: 0.5747060775756836


'"We have four-month-old mice who were diabetics and now they are no longer diabetics," he added.'

In [17]:
%%time
mt = translator(src)

Translation time: 114.44406247138977
CPU times: user 7min 33s, sys: 1.9 s, total: 7min 35s
Wall time: 1min 55s


In [18]:
print(bleu.corpus_score(mt, [ref]))
print(chrf.corpus_score(mt, [ref]))
print(ter.corpus_score(mt, [ref]))

BLEU = 29.48 62.1/36.1/22.8/14.8 (BP = 1.000 ratio = 1.069 hyp_len = 26430 ref_len = 24721)
chrF2 = 59.96
TER = 59.48


In [52]:
with open(f"opusmt.{tgt_lang}", "wt") as myfile:
    myfile.write("".join([i + "\n" for i in mt]))

In [53]:
! ~/miniforge3/envs/comet/bin/comet-score -s flores.{src_lang} -r flores.{tgt_lang} -t opusmt.{tgt_lang} --batch_size 32 --only_system

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Seed set to 1
Fetching 5 files: 100%|████████████████████████| 5/5 [00:00<00:00, 36095.56it/s]
Lightning automatically upgraded your loaded checkpoint from v1.8.3.post1 to v2.5.0.post0. To apply the upgrade to your files permanently, run `python -m pytorch_lightning.utilities.upgrade_checkpoint ../../../.cache/huggingface/hub/models--Unbabel--wmt22-comet-da/snapshots/2760a223ac957f30acfb18c8aa649b01cf1d75f2/checkpoints/model.ckpt`
Encoder model frozen.
/home/mark/miniforge3/envs/comet/lib/python3.11/site-packages/pytorch_lightning/core/saving.py:195: Found keys that are not in the model state dict but in the checkpoint: ['encoder.model.embeddings.position_ids']
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Predicting DataLoader 0: 100%|██████████████████| 32/32 [00:05<00:00,  5.37it/s]
opusmt.ja	score: 0.6291


## NLLB-600M

Be sure to export the models to `ctranslate2` format first, e.g.

```bash
ct2-transformers-converter --model facebook/nllb-200-distilled-600M --output_dir ../ct2-models/nllb-200-distilled-600M-ct2
```

In [67]:
translator = NllbTranslator(model_path=nllb600_model_path, device=compute_device)

In [68]:
%%time
mt = translator(src, src_lang=src_lang_flores, tgt_lang=tgt_lang_flores)

Translation time: 22.75724220275879
CPU times: user 40.5 s, sys: 58.5 ms, total: 40.5 s
Wall time: 23 s


In [69]:
print(bleu.corpus_score(mt, [ref]))
print(chrf.corpus_score(mt, [ref]))
print(ter.corpus_score(mt, [ref]))

BLEU = 33.49 65.1/40.0/26.7/18.1 (BP = 1.000 ratio = 1.037 hyp_len = 25636 ref_len = 24721)
chrF2 = 61.97
TER = 54.36


In [70]:
with open(f"nllb-600m.{tgt_lang}", "wt") as myfile:
    myfile.write("".join([i + "\n" for i in mt]))

In [71]:
! ~/miniforge3/envs/comet/bin/comet-score -s flores.{src_lang} -r flores.{tgt_lang} -t nllb-600m.{tgt_lang} --batch_size 32 --only_system

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Seed set to 1
Fetching 5 files: 100%|████████████████████████| 5/5 [00:00<00:00, 67216.41it/s]
Lightning automatically upgraded your loaded checkpoint from v1.8.3.post1 to v2.5.0.post0. To apply the upgrade to your files permanently, run `python -m pytorch_lightning.utilities.upgrade_checkpoint ../../../.cache/huggingface/hub/models--Unbabel--wmt22-comet-da/snapshots/2760a223ac957f30acfb18c8aa649b01cf1d75f2/checkpoints/model.ckpt`
Encoder model frozen.
/home/mark/miniforge3/envs/comet/lib/python3.11/site-packages/pytorch_lightning/core/saving.py:195: Found keys that are not in the model state dict but in the checkpoint: ['encoder.model.embeddings.position_ids']
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Predicting DataLoader 0: 100%|██████████████████| 32/32 [00:03<00:00,  8.86it/s]
nllb-600m.en	score: 0.8739


## NLLB-1.3B

Be sure to export the models to `ctranslate2` format first, e.g.

```bash
ct2-transformers-converter --model facebook/nllb-200-distilled-1.3B --output_dir ../ct2-models/nllb-200-distilled-1.3B-ct2
```

In [72]:
translator = NllbTranslator(model_path=nllb1b_model_path, device=compute_device)

In [73]:
%%time
mt = translator(src, src_lang=src_lang_flores, tgt_lang=tgt_lang_flores)

Translation time: 40.09129095077515
CPU times: user 58.4 s, sys: 111 ms, total: 58.5 s
Wall time: 40.4 s


In [74]:
print(bleu.corpus_score(mt, [ref]))
print(chrf.corpus_score(mt, [ref]))
print(ter.corpus_score(mt, [ref]))

BLEU = 34.97 66.4/41.7/28.1/19.3 (BP = 1.000 ratio = 1.035 hyp_len = 25598 ref_len = 24721)
chrF2 = 63.23
TER = 52.60


In [75]:
with open(f"nllb-1.3b.{tgt_lang}", "wt") as myfile:
    myfile.write("".join([i + "\n" for i in mt]))

In [76]:
del mt

In [77]:
! ~/miniforge3/envs/comet/bin/comet-score -s flores.{src_lang} -r flores.{tgt_lang} -t nllb-1.3b.{tgt_lang} --batch_size 16 --only_system

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Seed set to 1
Fetching 5 files: 100%|████████████████████████| 5/5 [00:00<00:00, 99864.38it/s]
Lightning automatically upgraded your loaded checkpoint from v1.8.3.post1 to v2.5.0.post0. To apply the upgrade to your files permanently, run `python -m pytorch_lightning.utilities.upgrade_checkpoint ../../../.cache/huggingface/hub/models--Unbabel--wmt22-comet-da/snapshots/2760a223ac957f30acfb18c8aa649b01cf1d75f2/checkpoints/model.ckpt`
Encoder model frozen.
/home/mark/miniforge3/envs/comet/lib/python3.11/site-packages/pytorch_lightning/core/saving.py:195: Found keys that are not in the model state dict but in the checkpoint: ['encoder.model.embeddings.position_ids']
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Predicting DataLoader 0: 100%|██████████████████| 64/64 [00:03<00:00, 16.40it/s]
nllb-1.3b.en	score: 0.8814


## M2M100-418M

Be sure to export the models to `ctranslate2` format first, e.g.

```bash
ct2-transformers-converter --model facebook/m2m100_418M --output_dir ../ct2-models/m2m100_418-ct2/
ct2-transformers-converter --model facebook/m2m100_1.2B --output_dir ../ct2-models/m2m100_1.2B-ct2/
```

In [78]:
translator = M2m100Translator(model_path=m2m100_418m_model_path, device=compute_device)

In [79]:
%%time
mt = translator(src, src_lang=src_lang, tgt_lang=tgt_lang)

Translation time: 17.606944799423218
CPU times: user 38.7 s, sys: 57.8 ms, total: 38.8 s
Wall time: 20 s


In [80]:
print(bleu.corpus_score(mt, [ref]))
print(chrf.corpus_score(mt, [ref]))
print(ter.corpus_score(mt, [ref]))

BLEU = 25.92 58.9/32.2/19.6/12.1 (BP = 1.000 ratio = 1.083 hyp_len = 26777 ref_len = 24721)
chrF2 = 56.94
TER = 63.19


In [81]:
with open(f"m2m100-418m.{tgt_lang}", "wt") as myfile:
    myfile.write("".join([i + "\n" for i in mt]))

In [82]:
! ~/miniforge3/envs/comet/bin/comet-score -s flores.{src_lang} -r flores.{tgt_lang} -t m2m100-418m.{tgt_lang} --batch_size 32 --only_system

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Seed set to 1
Fetching 5 files: 100%|███████████████████████| 5/5 [00:00<00:00, 108660.73it/s]
Lightning automatically upgraded your loaded checkpoint from v1.8.3.post1 to v2.5.0.post0. To apply the upgrade to your files permanently, run `python -m pytorch_lightning.utilities.upgrade_checkpoint ../../../.cache/huggingface/hub/models--Unbabel--wmt22-comet-da/snapshots/2760a223ac957f30acfb18c8aa649b01cf1d75f2/checkpoints/model.ckpt`
Encoder model frozen.
/home/mark/miniforge3/envs/comet/lib/python3.11/site-packages/pytorch_lightning/core/saving.py:195: Found keys that are not in the model state dict but in the checkpoint: ['encoder.model.embeddings.position_ids']
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Predicting DataLoader 0: 100%|██████████████████| 32/32 [00:03<00:00,  8.83it/s]
m2m100-418m.en	score: 0.8314


## M2M100-1.2B

Be sure to export the models to `ctranslate2` format first, e.g.

```bash
ct2-transformers-converter --model facebook/m2m100_1.2B --output_dir ../ct2-models/m2m100_1.2B-ct2/
```

In [83]:
translator = M2m100Translator(model_path=m2m100_1b_model_path, device=compute_device)

In [84]:
%%time
mt = translator(src, src_lang=src_lang, tgt_lang=tgt_lang)

Translation time: 35.73472595214844
CPU times: user 57.7 s, sys: 173 ms, total: 57.9 s
Wall time: 38.1 s


In [85]:
print(bleu.corpus_score(mt, [ref]))
print(chrf.corpus_score(mt, [ref]))
print(ter.corpus_score(mt, [ref]))

BLEU = 30.81 63.0/37.3/24.1/15.9 (BP = 1.000 ratio = 1.057 hyp_len = 26122 ref_len = 24721)
chrF2 = 60.43
TER = 57.38


In [86]:
with open(f"m2m100-1.2B.{tgt_lang}", "wt") as myfile:
    myfile.write("".join([i + "\n" for i in mt]))

In [87]:
! ~/miniforge3/envs/comet/bin/comet-score -s flores.{src_lang} -r flores.{tgt_lang} -t m2m100-1.2B.{tgt_lang} --batch_size 32 --only_system

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Seed set to 1
Fetching 5 files: 100%|███████████████████████| 5/5 [00:00<00:00, 121222.66it/s]
Lightning automatically upgraded your loaded checkpoint from v1.8.3.post1 to v2.5.0.post0. To apply the upgrade to your files permanently, run `python -m pytorch_lightning.utilities.upgrade_checkpoint ../../../.cache/huggingface/hub/models--Unbabel--wmt22-comet-da/snapshots/2760a223ac957f30acfb18c8aa649b01cf1d75f2/checkpoints/model.ckpt`
Encoder model frozen.
/home/mark/miniforge3/envs/comet/lib/python3.11/site-packages/pytorch_lightning/core/saving.py:195: Found keys that are not in the model state dict but in the checkpoint: ['encoder.model.embeddings.position_ids']
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Predicting DataLoader 0: 100%|██████████████████| 32/32 [00:03<00:00,  8.84it/s]
m2m100-1.2B.en	score: 0.8643


## Results

| Model                            | chrf2 | bleu    | comet22 | Time (s) |
| -------------------------------- | ----- | ------- | ------- | -------- |
| quickmt/quickmt-de-en            | 68.83 | 44.20   | 88.88   | 0.90     |
| Helsinki-NLP/opus-mt-de-en       | 66.16 | 40.04   | 87.68   | 3.51     |
| facebook/m2m100_418M             | 61.86 | 34.27   | 84.52   | 18.1     |
| facebook/m2m100_1.2B             | 65.99 | 40.34   | 87.67   | 35.2     |
| facebook/nllb-200-distilled-600M | 67.07 | 42.46   | 88.14   | 21.5     |
| facebook/nllb-200-distilled-1.3B | 68.75 | 44.44   | 89.08   | 37.4     |

| Model                            | chrf2 | bleu    | comet22 | Time (s) |
| -------------------------------- | ----- | ------- | ------- | -------- |
| quickmt/quickmt-en-de            | 66.24 | 40.17   | 86.83   | 0.99     |
| Helsinki-NLP/opus-mt-en-de       | 63.53 | 36.06   | 84.63   | 3.66     |
| facebook/nllb-200-distilled-600M | 63.41 | 35.72   | 86.65   | 26.9     |
| facebook/nllb-200-distilled-1.3B | 65.01 | 38.61   | 87.99   | 46.0     |
| facebook/m2m100_418M             | 57.76 | 28.57   | 79.75   | 21.4     |
| facebook/m2m100_1.2B             | 63.37 | 36.24   | 85.82   | 41.0     |


| Model                            | chrf2 | bleu    | comet22 | Time (s) |
| -------------------------------- | ----- | ------- | ------- | -------- |
| quickmt/quickmt-ko-en            | 36.96 | 14.97   | 87.10   | 1.25     |
| facebook/nllb-200-distilled-600M | 33.66 | 12.15   | 87.39   | 25.3     |
| facebook/nllb-200-distilled-1.3B | 35.62 | 13.23   | 88.39   | 40.2     |
| facebook/m2m100_418M             | 30.69 | 9.91    | 83.20   | 22.1     |
| facebook/m2m100_1.2B             | 33.26 | 11.35   | 85.65   | 41.0     |

| Model                            | chrf2 | bleu    | comet22 | Time (s) |
| -------------------------------- | ----- | ------- | ------- | -------- |
| quickmt/quickmt-ko-en            | 56.25 | 27.03   | 86.11   | 0.91     |
| Helsinki-NLP/opus-mt-ko-en       | 50.39 | 20.78   | 83.06   | 3.81     |
| facebook/nllb-200-distilled-600M | 55.04 | 26.53   | 85.83   | 21.0     |
| facebook/nllb-200-distilled-1.3B | 57.56 | 29.61   | 87.24   | 37.2     |
| facebook/m2m100_418M             | 50.65 | 20.75   | 82.07   | 18.2     |
| facebook/m2m100_1.2B             | 62.35 | 24.59   | 85.15   | 34.7     |

| Model                            | chrf2 | comet22 | Time (s) |
| -------------------------------- | ----- | ------- | -------- |
| quickmt/quickmt-en-ja            | 42.04 | 89.08   | 1.29     |
| Helsinki-NLP/opus-mt-en-jap      | 6.41  | 62.91   | 7.35     |
| facebook/nllb-200-distilled-600M | 30.00 | 86.64   | 28.4     |
| facebook/nllb-200-distilled-1.3B | 32.38 | 88.02   | 37.2     |
| facebook/m2m100_418M             | 32.73 | 85.09   | 24.7     |
| facebook/m2m100_1.2B             | 35.83 | 87.78   | 45.7     |

| Model                            | chrf2 | bleu    | comet22 | Time (s) |
| -------------------------------- | ----- | ------- | ------- | -------- |
| quickmt/quickmt-it-en            |  |    |    |      |
| Helsinki-NLP/opus-mt-it-en       |  |    |    |      |
| facebook/nllb-200-distilled-600M | 61.97 | 33.49   | 87.39   | 23.0     |
| facebook/nllb-200-distilled-1.3B | 63.23 | 34.97   | 88.14   | 40.4     |
| facebook/m2m100_418M             | 56.94 | 25.92   | 83.14   | 20.0     |
| facebook/m2m100_1.2B             | 60.43 | 30.81   | 86.43   | 38.1     |

| Model                            | chrf2 | bleu    | comet22 | Time (s) |
| -------------------------------- | ----- | ------- | ------- | -------- |
| quickmt/quickmt-en-it            |  |    |    |      |
| Helsinki-NLP/opus-mt-en-it       |  |    |    |      |
| facebook/nllb-200-distilled-600M |  |    |    |      |
| facebook/nllb-200-distilled-1.3B |  |    |    |      |
| facebook/m2m100_418M             |  |    |    |      |
| facebook/m2m100_1.2B             |  |    |    |      |