# `quickmt` Benchmark

In [1]:
from time import time

import datasets
from fire import Fire
from sacrebleu import BLEU, CHRF, TER

from quickmt import Translator, OpusmtTranslator, NllbTranslator, M2m100Translator

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
bleu = BLEU()
chrf = CHRF()
ter = TER()

In [3]:
src_lang = "fr"
tgt_lang = "en"
src_lang_flores = "fra_Latn"
tgt_lang_flores = "eng_Latn"

In [4]:
try:
    flores = datasets.load_dataset(
        "facebook/flores",
        f"{src_lang_flores}-{tgt_lang_flores}",
    )
except:
    flores = datasets.load_dataset(
        "facebook/flores",
        f"{tgt_lang_flores}-{src_lang_flores}",
    )

src = []
ref = []
for i in flores["devtest"]:
    src.append(i[f"sentence_{src_lang_flores}"])
    ref.append(i[f"sentence_{tgt_lang_flores}"])


In [5]:
with open(f"flores.{src_lang}", "wt") as myfile:
    myfile.write("".join([i+"\n" for i in src]))
with open(f"flores.{tgt_lang}", "wt") as myfile:
    myfile.write("".join([i+"\n" for i in ref]))

## Quickmt

In [6]:
translator = Translator(
    model_path="../quickmt-models/quickmt-fr-en",
    device="cuda"
)

In [7]:
%%time
mt = translator(src, max_batch_size=32)

Translation time: 1.1925811767578125
CPU times: user 3.39 s, sys: 550 ms, total: 3.94 s
Wall time: 1.26 s


In [8]:
print(bleu.corpus_score(mt, [ref]))
print(chrf.corpus_score(mt, [ref]))
print(ter.corpus_score(mt, [ref]))

BLEU = 36.73 67.2/43.2/29.8/21.0 (BP = 1.000 ratio = 1.030 hyp_len = 25462 ref_len = 24721)
chrF2 = 63.17
TER = 47.98


In [9]:
with open(f"quickmt.{tgt_lang}", "wt") as myfile:
    myfile.write("".join([i+"\n" for i in mt]))

In [10]:
! ~/miniforge3/envs/comet/bin/comet-score -s flores.{src_lang} -r flores.{tgt_lang} -t quickmt.{tgt_lang} --batch_size 32 --only_system 

Seed set to 1
Fetching 5 files: 100%|████████████████████████| 5/5 [00:00<00:00, 86659.17it/s]
Lightning automatically upgraded your loaded checkpoint from v1.8.3.post1 to v2.5.0.post0. To apply the upgrade to your files permanently, run `python -m pytorch_lightning.utilities.upgrade_checkpoint ../../../../../home/mark/.cache/huggingface/hub/models--Unbabel--wmt22-comet-da/snapshots/f49d328952c3470eff6bb6f545d62bfdb6e66304/checkpoints/model.ckpt`
Encoder model frozen.
/home/mark/miniforge3/envs/comet/lib/python3.11/site-packages/pytorch_lightning/core/saving.py:195: Found keys that are not in the model state dict but in the checkpoint: ['encoder.model.embeddings.position_ids']
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Predicting DataLoader 0: 100%|██████████████████| 32/32 [00:03<00:00,  8.53it/s]
quickmt.en	score: 0.8408


## OpusMT

Be sure to export the models to `ctranslate2` format first, e.g.

```bash
ct2-transformers-converter --model Helsinki-NLP/opus-mt-fr-en --output_dir ../ct2-models/opus-mt-fr-en-ct2
```


In [11]:
translator = OpusmtTranslator(
    model_path="../ct2-models/opus-mt-fr-en-ct2",
    model_string="Helsinki-NLP/opus-mt-fr-en",
    device="cuda"
)

In [12]:
%%time
mt = translator(src)

Translation time: 3.182037115097046
CPU times: user 9.62 s, sys: 1.69 s, total: 11.3 s
Wall time: 3.61 s


In [13]:
print(bleu.corpus_score(mt, [ref]))
print(chrf.corpus_score(mt, [ref]))
print(ter.corpus_score(mt, [ref]))

BLEU = 41.71 70.7/48.0/34.7/25.7 (BP = 1.000 ratio = 1.029 hyp_len = 25433 ref_len = 24721)
chrF2 = 66.85
TER = 43.52


In [14]:
with open(f"opusmt.{tgt_lang}", "wt") as myfile:
    myfile.write("".join([i+"\n" for i in mt]))

In [15]:
! ~/miniforge3/envs/comet/bin/comet-score -s flores.{src_lang} -r flores.{tgt_lang} -t opusmt.{tgt_lang} --batch_size 32 --only_system 

Seed set to 1
Fetching 5 files: 100%|████████████████████████| 5/5 [00:00<00:00, 63167.23it/s]
Lightning automatically upgraded your loaded checkpoint from v1.8.3.post1 to v2.5.0.post0. To apply the upgrade to your files permanently, run `python -m pytorch_lightning.utilities.upgrade_checkpoint ../../../../../home/mark/.cache/huggingface/hub/models--Unbabel--wmt22-comet-da/snapshots/f49d328952c3470eff6bb6f545d62bfdb6e66304/checkpoints/model.ckpt`
Encoder model frozen.
/home/mark/miniforge3/envs/comet/lib/python3.11/site-packages/pytorch_lightning/core/saving.py:195: Found keys that are not in the model state dict but in the checkpoint: ['encoder.model.embeddings.position_ids']
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Predicting DataLoader 0: 100%|██████████████████| 32/32 [00:03<00:00,  8.50it/s]
opusmt.en	score: 0.8831


## NLLB-600M

Be sure to export the models to `ctranslate2` format first, e.g.

```bash
ct2-transformers-converter --model facebook/nllb-200-distilled-600M --output_dir ../ct2-models/nllb-200-distilled-600M-ct2
```

In [16]:
translator = NllbTranslator(
    model_path="../ct2-models/nllb-200-distilled-600M-ct2",
    device="cuda"
)

In [17]:
%%time
mt = translator(src, src_lang=src_lang_flores, tgt_lang=tgt_lang_flores)

Translation time: 21.47916865348816
CPU times: user 23.9 s, sys: 16.1 s, total: 39.9 s
Wall time: 21.7 s


In [18]:
print(bleu.corpus_score(mt, [ref]))
print(chrf.corpus_score(mt, [ref]))
print(ter.corpus_score(mt, [ref]))

BLEU = 44.04 72.1/50.2/37.1/28.0 (BP = 1.000 ratio = 1.006 hyp_len = 24862 ref_len = 24721)
chrF2 = 67.82
TER = 41.93


In [19]:
with open(f"nllb-600m.{tgt_lang}", "wt") as myfile:
    myfile.write("".join([i+"\n" for i in mt]))

In [20]:
! ~/miniforge3/envs/comet/bin/comet-score -s flores.{src_lang} -r flores.{tgt_lang} -t nllb-600m.{tgt_lang} --batch_size 32 --only_system 

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Seed set to 1
Fetching 5 files: 100%|███████████████████████| 5/5 [00:00<00:00, 104335.92it/s]
Lightning automatically upgraded your loaded checkpoint from v1.8.3.post1 to v2.5.0.post0. To apply the upgrade to your files permanently, run `python -m pytorch_lightning.utilities.upgrade_checkpoint ../../../../../home/mark/.cache/huggingface/hub/models--Unbabel--wmt22-comet-da/snapshots/f49d328952c3470eff6bb6f545d62bfdb6e66304/checkpoints/model.ckpt`
Encoder model frozen.
/home/mark/miniforge3/envs/comet/lib/python3.11/site-packages/pytorch_lightning/core/saving.py:195: Found keys that are not in the model state dict but in the checkpoint: ['encoder.model.embeddings.position_ids']
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Predicting DataLoader 0: 100%|██████████████████| 32/32 [00:03<00:00,  8.69it/s]
nllb-600m.en	score: 0.8847


## NLLB-1.3B

Be sure to export the models to `ctranslate2` format first, e.g.

```bash
ct2-transformers-converter --model facebook/nllb-200-distilled-1.3B --output_dir ../ct2-models/nllb-200-distilled-1.3B-ct2
```

In [21]:
translator = NllbTranslator(
    model_path="../ct2-models/nllb-200-distilled-1.3B-ct2",
    device="cuda"
)

In [22]:
%%time
mt = translator(src, src_lang=src_lang_flores, tgt_lang=tgt_lang_flores)

Translation time: 36.86673855781555
CPU times: user 28.3 s, sys: 27.2 s, total: 55.5 s
Wall time: 37.1 s


In [23]:
print(bleu.corpus_score(mt, [ref]))
print(chrf.corpus_score(mt, [ref]))
print(ter.corpus_score(mt, [ref]))

BLEU = 46.22 73.7/52.5/39.3/30.0 (BP = 1.000 ratio = 1.001 hyp_len = 24744 ref_len = 24721)
chrF2 = 69.30
TER = 39.82


In [24]:
with open(f"nllb-1.3b.{tgt_lang}", "wt") as myfile:
    myfile.write("".join([i+"\n" for i in mt]))

In [25]:
! ~/miniforge3/envs/comet/bin/comet-score -s flores.{src_lang} -r flores.{tgt_lang} -t nllb-1.3b.{tgt_lang} --batch_size 32 --only_system 

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Seed set to 1
Fetching 5 files: 100%|████████████████████████| 5/5 [00:00<00:00, 84904.94it/s]
Lightning automatically upgraded your loaded checkpoint from v1.8.3.post1 to v2.5.0.post0. To apply the upgrade to your files permanently, run `python -m pytorch_lightning.utilities.upgrade_checkpoint ../../../../../home/mark/.cache/huggingface/hub/models--Unbabel--wmt22-comet-da/snapshots/f49d328952c3470eff6bb6f545d62bfdb6e66304/checkpoints/model.ckpt`
Encoder model frozen.
/home/mark/miniforge3/envs/comet/lib/python3.11/site-packages/pytorch_lightning/core/saving.py:195: Found keys that are not in the model state dict but in the checkpoint: ['encoder.model.embeddings.position_ids']
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Predicting DataLoader 0: 100%|██████████████████| 32/32 [00:03<00:00,  8.86it/s]
nllb-1.3b.en	score: 0.8924


## M2M100-418M

Be sure to export the models to `ctranslate2` format first, e.g.

```bash
ct2-transformers-converter --model facebook/m2m100_418M --output_dir ../ct2-models/m2m100_418-ct2/
ct2-transformers-converter --model facebook/m2m100_1.2B --output_dir ../ct2-models/m2m100_1.2B-ct2/
```

In [26]:
translator = M2m100Translator(
    model_path="../ct2-models/m2m100_418-ct2/",
    device="cuda"
)

In [27]:
%%time
mt = translator(src, src_lang=src_lang, tgt_lang=tgt_lang)

Translation time: 15.773196697235107
CPU times: user 25.1 s, sys: 11.1 s, total: 36.2 s
Wall time: 18 s


In [28]:
print(bleu.corpus_score(mt, [ref]))
print(chrf.corpus_score(mt, [ref]))
print(ter.corpus_score(mt, [ref]))

BLEU = 36.49 66.8/43.0/29.6/20.9 (BP = 1.000 ratio = 1.042 hyp_len = 25771 ref_len = 24721)
chrF2 = 63.29
TER = 48.28


In [29]:
with open(f"m2m100-418m.{tgt_lang}", "wt") as myfile:
    myfile.write("".join([i+"\n" for i in mt]))

In [30]:
! ~/miniforge3/envs/comet/bin/comet-score -s flores.{src_lang} -r flores.{tgt_lang} -t m2m100-418m.{tgt_lang} --batch_size 32 --only_system 

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Seed set to 1
Fetching 5 files: 100%|███████████████████████| 5/5 [00:00<00:00, 103307.98it/s]
Lightning automatically upgraded your loaded checkpoint from v1.8.3.post1 to v2.5.0.post0. To apply the upgrade to your files permanently, run `python -m pytorch_lightning.utilities.upgrade_checkpoint ../../../../../home/mark/.cache/huggingface/hub/models--Unbabel--wmt22-comet-da/snapshots/f49d328952c3470eff6bb6f545d62bfdb6e66304/checkpoints/model.ckpt`
Encoder model frozen.
/home/mark/miniforge3/envs/comet/lib/python3.11/site-packages/pytorch_lightning/core/saving.py:195: Found keys that are not in the model state dict but in the checkpoint: ['encoder.model.embeddings.position_ids']
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Predicting DataLoader 0: 100%|██████████████████| 32/32 [00:03<00:00,  8.87it/s]
m2m100-418m.en	score: 0.8587


## M2M100-1.2B

Be sure to export the models to `ctranslate2` format first, e.g.

```bash
ct2-transformers-converter --model facebook/m2m100_1.2B --output_dir ../ct2-models/m2m100_1.2B-ct2/
```

In [31]:
translator = M2m100Translator(
    model_path="../ct2-models/m2m100_1.2B-ct2/",
    device="cuda"
)

In [32]:
%%time
mt = translator(src, src_lang=src_lang, tgt_lang=tgt_lang)

Translation time: 32.46005296707153
CPU times: user 31.3 s, sys: 23.1 s, total: 54.5 s
Wall time: 34.6 s


In [33]:
print(bleu.corpus_score(mt, [ref]))
print(chrf.corpus_score(mt, [ref]))
print(ter.corpus_score(mt, [ref]))

BLEU = 41.69 70.5/47.9/34.7/25.8 (BP = 1.000 ratio = 1.022 hyp_len = 25259 ref_len = 24721)
chrF2 = 66.51
TER = 43.40


In [34]:
with open(f"m2m100-1.2B.{tgt_lang}", "wt") as myfile:
    myfile.write("".join([i+"\n" for i in mt]))

In [35]:
! ~/miniforge3/envs/comet/bin/comet-score -s flores.{src_lang} -r flores.{tgt_lang} -t m2m100-1.2B.{tgt_lang} --batch_size 32 --only_system 

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Seed set to 1
Fetching 5 files: 100%|████████████████████████| 5/5 [00:00<00:00, 85948.85it/s]
Lightning automatically upgraded your loaded checkpoint from v1.8.3.post1 to v2.5.0.post0. To apply the upgrade to your files permanently, run `python -m pytorch_lightning.utilities.upgrade_checkpoint ../../../../../home/mark/.cache/huggingface/hub/models--Unbabel--wmt22-comet-da/snapshots/f49d328952c3470eff6bb6f545d62bfdb6e66304/checkpoints/model.ckpt`
Encoder model frozen.
/home/mark/miniforge3/envs/comet/lib/python3.11/site-packages/pytorch_lightning/core/saving.py:195: Found keys that are not in the model state dict but in the checkpoint: ['encoder.model.embeddings.position_ids']
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Predicting DataLoader 0: 100%|██████████████████| 32/32 [00:03<00:00,  9.00it/s]
m2m100-1.2B.en	score: 0.8800


# fr->en Results

| Model                            | chrf2 | bleu    | comet22 | Time (s) |
| -------------------------------- | ----- | ------- | ------- | -------- |
| quickmt/quickmt-fr-en            | 63.17 | 36.73   | 84.08   |  1.3     |
| Helsinki-NLP/opus-mt-en-zh       | 66.85 | 41.71   | 88.31   |  3.6     |
| facebook/m2m100_418M             | 63.29 | 36.49   | 85.87   | 18.0     |
| facebook/m2m100_1.2B             | 66.51 | 41.69   | 88.00   | 34.6     |
| facebook/nllb-200-distilled-600M | 67.82 | 44.04   | 88.47   | 21.7     |
| facebook/nllb-200-distilled-1.3B | 69.30 | 46.22   | 89.24   | 37.1     |
