In [None]:
!pip install transformers sentencepiece torch ctranslate2 -qqq

## Download the model

In [None]:
from huggingface_hub import hf_hub_download

hf_hub_download(repo_id='anzorq/m2m100_418M_ft_ru-kbd_44K', subfolder='ctranslate2', filename='config.json', local_dir='./')
hf_hub_download(repo_id='anzorq/m2m100_418M_ft_ru-kbd_44K', subfolder='ctranslate2', filename='model.bin', local_dir='./')
hf_hub_download(repo_id='anzorq/m2m100_418M_ft_ru-kbd_44K', subfolder='ctranslate2', filename='sentencepiece.bpe.model', local_dir='./')
hf_hub_download(repo_id='anzorq/m2m100_418M_ft_ru-kbd_44K', subfolder='ctranslate2', filename='shared_vocabulary.json', local_dir='./')

## Inference

In [None]:
import ctranslate2
import transformers

translator = ctranslate2.Translator("ctranslate2") # Ensure correct path to the ctranslate2 model directory
tokenizer = transformers.AutoTokenizer.from_pretrained("anzorq/m2m100_418M_ft_ru-kbd_44K")
tgt_lang="zu"

def translate(text, num_beams=4, num_return_sequences=4):
    num_return_sequences = min(num_return_sequences, num_beams)

    source = tokenizer.convert_ids_to_tokens(tokenizer.encode(text))
    target_prefix = [tokenizer.lang_code_to_token[tgt_lang]]
    results = translator.translate_batch(
        [source],
        target_prefix=[target_prefix],
        beam_size=num_beams,
        num_hypotheses=num_return_sequences
    )
    
    translations = []
    for hypothesis in results[0].hypotheses:
        target = hypothesis[1:]
        decoded_sentence = tokenizer.decode(tokenizer.convert_tokens_to_ids(target))
        translations.append(decoded_sentence)
    
    return text, translations

In [None]:
#@title Translation

text = "Текст для перевода" #@param {type: "string"}
num_beams = 4 # @param {type:"slider", min:2, max:10, step:1}
print(translate(text))