# `quickmt` Demo

In [None]:
from pathlib import Path

import datasets
from sacrebleu import BLEU, CHRF

from quickmt import Translator

In [None]:
# For GPU inference
t = Translator("./quickmt-zh-en", device="cuda", intra_threads=1, inter_threads=1)

In [None]:
# For CPU inference, for low latency, set intra_threads equal to your number of cores
# For CPU inference, for bulk translation, set inter_threads equal to your number of cores
t = Translator("./quickmt-zh-en", device="cpu", intra_threads=6, inter_threads=1)

In [None]:
# Can input a string
t("他补充道：“我们现在有 4 个月大没有糖尿病的老鼠，但它们曾经得过该病。”", beam_size=5)

In [None]:
# Or a list of strings
# Change max_batch_size so that batches will fit in RAM on your machine
t(["他补充道：“我们现在有 4 个月大没有糖尿病的老鼠，但它们曾经得过该病。”"]*8, beam_size=5, max_batch_size=8)

In [None]:
# Get alternative translations by sampling
# Can pass any CTranslate2 Translator.translate_batch arguments
# See https://opennmt.net/CTranslate2/python/ctranslate2.Translator.html#ctranslate2.Translator.translate_batch
t(["他补充道：“我们现在有 4 个月大没有糖尿病的老鼠，但它们曾经得过该病。”"], sampling_temperature=0.8, beam_size=1, sampling_topk=50, sampling_topp=0.99)

## Evaluate on flores-devtest

In [None]:
bleu = BLEU()
chrf = CHRF()
src_lang = "zho_Hans"
tgt_lang = "eng_Latn"

try:
    flores = datasets.load_dataset(
        "facebook/flores",
        f"{src_lang}-{tgt_lang}",  # trust_remote_code=True
    )
except:
    flores = datasets.load_dataset(
        "facebook/flores",
        f"{tgt_lang}-{src_lang}",  # trust_remote_code=True
    )

src = []
ref = []
for i in flores["devtest"]:
    src.append(i[f"sentence_{src_lang}"])
    ref.append(i[f"sentence_{tgt_lang}"])

In [None]:
results = t(src, beam_size=1)

In [None]:
results[:5]

In [None]:
print(bleu.corpus_score(results, [ref]))
print(chrf.corpus_score(results, [ref]))