# `quickmt` Benchmark

In [36]:
from time import time

import datasets
from fire import Fire
from sacrebleu import BLEU, CHRF, TER

from quickmt import Translator, OpusmtTranslator, NllbTranslator, M2m100Translator

In [37]:
bleu = BLEU()
chrf = CHRF()
ter = TER()

In [38]:
src_lang = "en"
tgt_lang = "fr"
src_lang_flores = "eng_Latn"
tgt_lang_flores = "fra_Latn"

In [39]:
try:
    flores = datasets.load_dataset(
        "facebook/flores",
        f"{src_lang_flores}-{tgt_lang_flores}",
    )
except:
    flores = datasets.load_dataset(
        "facebook/flores",
        f"{tgt_lang_flores}-{src_lang_flores}",
    )

src = []
ref = []
for i in flores["devtest"]:
    src.append(i[f"sentence_{src_lang_flores}"])
    ref.append(i[f"sentence_{tgt_lang_flores}"])


In [40]:
with open(f"flores.{src_lang}", "wt") as myfile:
    myfile.write("".join([i+"\n" for i in src]))
with open(f"flores.{tgt_lang}", "wt") as myfile:
    myfile.write("".join([i+"\n" for i in ref]))

## Quickmt

In [41]:
translator = Translator(
    model_path="../quickmt-models/quickmt-en-fr",
    #model_path="/home/mark/Downloads/ct2-enfr/",
    device="cuda"
)

RuntimeError: Unable to open file 'model.bin' in model '../quickmt-models/quickmt-en-fr'

In [None]:
%%time
mt = translator(src, max_batch_size=64)

In [None]:
print(bleu.corpus_score(mt, [ref]))
print(chrf.corpus_score(mt, [ref]))
print(ter.corpus_score(mt, [ref]))

In [None]:
with open(f"quickmt.{tgt_lang}", "wt") as myfile:
    myfile.write("".join([i+"\n" for i in mt]))

In [None]:
! ~/miniforge3/envs/comet/bin/comet-score -s flores.{src_lang} -r flores.{tgt_lang} -t quickmt.{tgt_lang} --batch_size 32 --only_system 

## OpusMT

Be sure to export the models to `ctranslate2` format first, e.g.

```bash
ct2-transformers-converter --model Helsinki-NLP/opus-mt-fr-en --output_dir ../ct2-models/opus-mt-fr-en-ct2
```


In [None]:
translator = OpusmtTranslator(
    model_path="../ct2-models/opus-mt-en-fr-ct2",
    model_string="Helsinki-NLP/opus-mt-en-fr",
    device="cuda"
)

In [None]:
%%time
mt = translator(src)

In [None]:
print(bleu.corpus_score(mt, [ref]))
print(chrf.corpus_score(mt, [ref]))
print(ter.corpus_score(mt, [ref]))

In [None]:
with open(f"opusmt.{tgt_lang}", "wt") as myfile:
    myfile.write("".join([i+"\n" for i in mt]))

In [None]:
! ~/miniforge3/envs/comet/bin/comet-score -s flores.{src_lang} -r flores.{tgt_lang} -t opusmt.{tgt_lang} --batch_size 32 --only_system 

## NLLB-600M

Be sure to export the models to `ctranslate2` format first, e.g.

```bash
ct2-transformers-converter --model facebook/nllb-200-distilled-600M --output_dir ../ct2-models/nllb-200-distilled-600M-ct2
```

In [16]:
translator = NllbTranslator(
    model_path="../ct2-models/nllb-200-distilled-600M-ct2",
    device="cuda"
)

In [17]:
%%time
mt = translator(src, src_lang=src_lang_flores, tgt_lang=tgt_lang_flores)

Translation time: 27.64427614212036
CPU times: user 31.8 s, sys: 20.7 s, total: 52.5 s
Wall time: 27.9 s


In [18]:
print(bleu.corpus_score(mt, [ref]))
print(chrf.corpus_score(mt, [ref]))
print(ter.corpus_score(mt, [ref]))

BLEU = 48.71 74.1/55.5/44.1/35.8 (BP = 0.965 ratio = 0.966 hyp_len = 27373 ref_len = 28343)
chrF2 = 70.36
TER = 38.87


In [19]:
with open(f"nllb-600m.{tgt_lang}", "wt") as myfile:
    myfile.write("".join([i+"\n" for i in mt]))

In [20]:
! ~/miniforge3/envs/comet/bin/comet-score -s flores.{src_lang} -r flores.{tgt_lang} -t nllb-600m.{tgt_lang} --batch_size 32 --only_system 

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Seed set to 1
Fetching 5 files: 100%|████████████████████████| 5/5 [00:00<00:00, 19204.69it/s]
Lightning automatically upgraded your loaded checkpoint from v1.8.3.post1 to v2.5.0.post0. To apply the upgrade to your files permanently, run `python -m pytorch_lightning.utilities.upgrade_checkpoint ../../../../../home/mark/.cache/huggingface/hub/models--Unbabel--wmt22-comet-da/snapshots/f49d328952c3470eff6bb6f545d62bfdb6e66304/checkpoints/model.ckpt`
Encoder model frozen.
/home/mark/miniforge3/envs/comet/lib/python3.11/site-packages/pytorch_lightning/core/saving.py:195: Found keys that are not in the model state dict but in the checkpoint: ['encoder.model.embeddings.position_ids']
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Predicting DataLoader 0: 100%|██████████████████| 32/32 [00:03<00:00,  8.38it/s]
nllb-600m.fr	score: 0.8763


## NLLB-1.3B

Be sure to export the models to `ctranslate2` format first, e.g.

```bash
ct2-transformers-converter --model facebook/nllb-200-distilled-1.3B --output_dir ../ct2-models/nllb-200-distilled-1.3B-ct2
```

In [21]:
translator = NllbTranslator(
    model_path="../ct2-models/nllb-200-distilled-1.3B-ct2",
    device="cuda"
)

In [22]:
%%time
mt = translator(src, src_lang=src_lang_flores, tgt_lang=tgt_lang_flores)

Translation time: 47.330397844314575
CPU times: user 37.7 s, sys: 35.2 s, total: 1min 12s
Wall time: 47.6 s


In [23]:
print(bleu.corpus_score(mt, [ref]))
print(chrf.corpus_score(mt, [ref]))
print(ter.corpus_score(mt, [ref]))

BLEU = 51.10 75.7/58.0/46.8/38.4 (BP = 0.964 ratio = 0.965 hyp_len = 27354 ref_len = 28343)
chrF2 = 71.95
TER = 37.08


In [24]:
with open(f"nllb-1.3b.{tgt_lang}", "wt") as myfile:
    myfile.write("".join([i+"\n" for i in mt]))

In [25]:
! ~/miniforge3/envs/comet/bin/comet-score -s flores.{src_lang} -r flores.{tgt_lang} -t nllb-1.3b.{tgt_lang} --batch_size 32 --only_system 

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Seed set to 1
Fetching 5 files: 100%|███████████████████████| 5/5 [00:00<00:00, 101803.50it/s]
Lightning automatically upgraded your loaded checkpoint from v1.8.3.post1 to v2.5.0.post0. To apply the upgrade to your files permanently, run `python -m pytorch_lightning.utilities.upgrade_checkpoint ../../../../../home/mark/.cache/huggingface/hub/models--Unbabel--wmt22-comet-da/snapshots/f49d328952c3470eff6bb6f545d62bfdb6e66304/checkpoints/model.ckpt`
Encoder model frozen.
/home/mark/miniforge3/envs/comet/lib/python3.11/site-packages/pytorch_lightning/core/saving.py:195: Found keys that are not in the model state dict but in the checkpoint: ['encoder.model.embeddings.position_ids']
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Predicting DataLoader 0: 100%|██████████████████| 32/32 [00:03<00:00,  8.59it/s]
nllb-1.3b.fr	score: 0.8850


## M2M100-418M

Be sure to export the models to `ctranslate2` format first, e.g.

```bash
ct2-transformers-converter --model facebook/m2m100_418M --output_dir ../ct2-models/m2m100_418-ct2/
ct2-transformers-converter --model facebook/m2m100_1.2B --output_dir ../ct2-models/m2m100_1.2B-ct2/
```

In [26]:
translator = M2m100Translator(
    model_path="../ct2-models/m2m100_418-ct2/",
    device="cuda"
)

In [27]:
%%time
mt = translator(src, src_lang=src_lang, tgt_lang=tgt_lang)

Translation time: 19.757731199264526
CPU times: user 31.5 s, sys: 13.9 s, total: 45.4 s
Wall time: 22.4 s


In [28]:
print(bleu.corpus_score(mt, [ref]))
print(chrf.corpus_score(mt, [ref]))
print(ter.corpus_score(mt, [ref]))

BLEU = 39.52 67.7/46.3/34.0/25.5 (BP = 0.974 ratio = 0.974 hyp_len = 27614 ref_len = 28343)
chrF2 = 64.39
TER = 46.28


In [29]:
with open(f"m2m100-418m.{tgt_lang}", "wt") as myfile:
    myfile.write("".join([i+"\n" for i in mt]))

In [30]:
! ~/miniforge3/envs/comet/bin/comet-score -s flores.{src_lang} -r flores.{tgt_lang} -t m2m100-418m.{tgt_lang} --batch_size 32 --only_system 

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Seed set to 1
Fetching 5 files: 100%|█████████████████████████| 5/5 [00:00<00:00, 7479.14it/s]
Lightning automatically upgraded your loaded checkpoint from v1.8.3.post1 to v2.5.0.post0. To apply the upgrade to your files permanently, run `python -m pytorch_lightning.utilities.upgrade_checkpoint ../../../../../home/mark/.cache/huggingface/hub/models--Unbabel--wmt22-comet-da/snapshots/f49d328952c3470eff6bb6f545d62bfdb6e66304/checkpoints/model.ckpt`
Encoder model frozen.
/home/mark/miniforge3/envs/comet/lib/python3.11/site-packages/pytorch_lightning/core/saving.py:195: Found keys that are not in the model state dict but in the checkpoint: ['encoder.model.embeddings.position_ids']
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Predicting DataLoader 0: 100%|██████████████████| 32/32 [00:03<00:00,  8.64it/s]
m2m100-418m.fr	score: 0.8211


## M2M100-1.2B

Be sure to export the models to `ctranslate2` format first, e.g.

```bash
ct2-transformers-converter --model facebook/m2m100_1.2B --output_dir ../ct2-models/m2m100_1.2B-ct2/
```

In [31]:
translator = M2m100Translator(
    model_path="../ct2-models/m2m100_1.2B-ct2/",
    device="cuda"
)

In [32]:
%%time
mt = translator(src, src_lang=src_lang, tgt_lang=tgt_lang)

Translation time: 41.012052059173584
CPU times: user 40.6 s, sys: 29.4 s, total: 1min 10s
Wall time: 43.6 s


In [33]:
print(bleu.corpus_score(mt, [ref]))
print(chrf.corpus_score(mt, [ref]))
print(ter.corpus_score(mt, [ref]))

BLEU = 45.39 71.8/52.0/40.1/31.6 (BP = 0.973 ratio = 0.973 hyp_len = 27576 ref_len = 28343)
chrF2 = 68.31
TER = 41.57


In [34]:
with open(f"m2m100-1.2B.{tgt_lang}", "wt") as myfile:
    myfile.write("".join([i+"\n" for i in mt]))

In [35]:
! ~/miniforge3/envs/comet/bin/comet-score -s flores.{src_lang} -r flores.{tgt_lang} -t m2m100-1.2B.{tgt_lang} --batch_size 32 --only_system 

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Seed set to 1
Fetching 5 files: 100%|████████████████████████| 5/5 [00:00<00:00, 88487.43it/s]
Lightning automatically upgraded your loaded checkpoint from v1.8.3.post1 to v2.5.0.post0. To apply the upgrade to your files permanently, run `python -m pytorch_lightning.utilities.upgrade_checkpoint ../../../../../home/mark/.cache/huggingface/hub/models--Unbabel--wmt22-comet-da/snapshots/f49d328952c3470eff6bb6f545d62bfdb6e66304/checkpoints/model.ckpt`
Encoder model frozen.
/home/mark/miniforge3/envs/comet/lib/python3.11/site-packages/pytorch_lightning/core/saving.py:195: Found keys that are not in the model state dict but in the checkpoint: ['encoder.model.embeddings.position_ids']
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Predicting DataLoader 0: 100%|██████████████████| 32/32 [00:03<00:00,  8.58it/s]
m2m100-1.2B.fr	score: 0.8650


## en->fr Results

| Model                            | chrf2 | bleu    | comet22 | Time (s) |
| -------------------------------- | ----- | ------- | ------- | -------- |
| quickmt/quickmt-fr-en            | 70.75 | 49.18   | 86.70   |  1.69    |
| Helsinki-NLP/opus-mt-en-zh       | 69.98 | 47.97   | 86.29   |  4.13    |
| facebook/m2m100_418M             | 63.29 | 39.52   | 82.11   | 22.4     |
| facebook/m2m100_1.2B             | 68.31 | 45.39   | 86.50   | 44.0     |
| facebook/nllb-200-distilled-600M | 70.36 | 48.71   | 87.63   | 27.8     |
| facebook/nllb-200-distilled-1.3B | 71.95 | 51.10   | 88.50   | 47.8     |

