In [2]:
# Import the Transformer library
from transformers import pipeline

# Apply the "Multilingual to English" OPUS-MT model
sentence_list = ['Typical advertising regulatory bodies suggest, for example that adverts must not: encourage _________, cause unnecessary ________ or _____, and must not cause _______ offence.', 'Safe practices, Fear, Jealousy, Trivial',]
# Define the model
model_OPUS = "Helsinki-NLP/opus-mt-en-ru"

translator_multi = pipeline("translation", model = model_OPUS, device="cuda:0")

# Translate the list of sample sentences
translationMulti = translator_multi(sentence_list)

# See the results (first two translations)
print(translationMulti)



[{'translation_text': 'Типичные органы, регулирующие рекламу, предполагают, например, что реклама не должна поощрять ____, вызывать ненужные ____ или ____ и не должна приводить к правонарушениям ____.'}, {'translation_text': 'Безопасная практика, страх, ревность, тривиаль'}]


In [1]:
from datasets import load_dataset

ds = load_dataset("TIGER-Lab/MMLU-Pro")

ds

DatasetDict({
    test: Dataset({
        features: ['question_id', 'question', 'options', 'answer', 'answer_index', 'cot_content', 'category', 'src'],
        num_rows: 12032
    })
    validation: Dataset({
        features: ['question_id', 'question', 'options', 'answer', 'answer_index', 'cot_content', 'category', 'src'],
        num_rows: 70
    })
})

In [5]:
k = 0
for i in ds['test']:
    print(i['question'])
    k += 1
    if k == 10:
        break

Typical advertising regulatory bodies suggest, for example that adverts must not: encourage _________, cause unnecessary ________ or _____, and must not cause _______ offence.
Managers are entrusted to run the company in the best interest of ________. Specifically, they have a duty to act for the benefit of the company, as well as a duty of ________ and of _______.
There are two main issues associated with _____ sizing. _______ is a key issue as due to the information policy of the corporation it can be argued that employees have a right to know if they are being made redundant. _______ is a second issue, particularly the ________ package that employees receive when laid off.
_______ locate morality beyond the sphere of rationality in an emotional 'moral impulse' towards others.
 Some of key differences between Islamic finance and conventional finance include - prohibition of charging and paying _______, prohibition on ______ and ______ transactions, prohibition of sinful investment an

In [4]:
from transformers import pipeline
from transformers.pipelines.pt_utils import KeyDataset

model_OPUS = "Helsinki-NLP/opus-mt-en-ru"

pipe = pipeline("translation", model=model_OPUS, device="cuda")
for out in pipe(KeyDataset(ds['test'], "question"), batch_size=8, truncation="only_first"):
    print(out)



[{'translation_text': 'Типичные органы, регулирующие рекламу, предполагают, например, что реклама не должна поощрять ____, вызывать ненужные ____ или ____ и не должна приводить к правонарушениям ____.'}]
[{'translation_text': 'Руководителям поручают управлять компанией в наилучших интересах ___ года, в частности, они обязаны действовать в интересах компании, а также ___ года и ___ года.'}]
[{'translation_text': 'Существует два основных вопроса, связанных с калибровкой _______. ___ является одним из ключевых вопросов, поскольку в силу информационной политики корпорации можно утверждать, что работники имеют право знать, являются ли они излишними. ___ является вторым вопросом, в частности пакетом ___, который работники получают в случае увольнения.'}]
[{'translation_text': 'Найдите мораль вне сферы рациональности в эмоциональном «моральном порыве» по отношению к другим.'}]
[{'translation_text': 'Некоторые основные различия между исламскими и обычными финансовыми средствами включают: запре

KeyboardInterrupt: 

In [2]:
from transformers import T5ForConditionalGeneration, T5Tokenizer

model_name = "google/madlad400-3b-mt"
model = T5ForConditionalGeneration.from_pretrained(model_name).cuda()
tokenizer = T5Tokenizer.from_pretrained(model_name)

In [6]:
prompts = ds["test"][:11]['question']

prompts

['Typical advertising regulatory bodies suggest, for example that adverts must not: encourage _________, cause unnecessary ________ or _____, and must not cause _______ offence.',
 'Managers are entrusted to run the company in the best interest of ________. Specifically, they have a duty to act for the benefit of the company, as well as a duty of ________ and of _______.',
 'There are two main issues associated with _____ sizing. _______ is a key issue as due to the information policy of the corporation it can be argued that employees have a right to know if they are being made redundant. _______ is a second issue, particularly the ________ package that employees receive when laid off.',
 "_______ locate morality beyond the sphere of rationality in an emotional 'moral impulse' towards others.",
 ' Some of key differences between Islamic finance and conventional finance include - prohibition of charging and paying _______, prohibition on ______ and ______ transactions, prohibition of si

In [10]:
prompts = ds["test"][:11]['question']
for text in prompts:
    input_ids = tokenizer("<2ru> " + text, return_tensors="pt").input_ids.to(model.device)
    outputs = model.generate(input_ids=input_ids, max_new_tokens=1024)

    answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
    print(text)
    print('----------------------------------------')
    print(answer)
    print('\n\n')

Typical advertising regulatory bodies suggest, for example that adverts must not: encourage _________, cause unnecessary ________ or _____, and must not cause _______ offence.
----------------------------------------
Типичные органы по регулированию рекламы предлагают, например, чтобы реклама не поощряла _________, не вызывала ненужного ________ или _________ и не вызывала _______ оскорбления.



Managers are entrusted to run the company in the best interest of ________. Specifically, they have a duty to act for the benefit of the company, as well as a duty of ________ and of _______.
----------------------------------------
Руководителям поручено управлять компанией в наилучших интересах ________. В частности, они обязаны действовать в интересах компании, а также обязаны действовать в интересах ________ и _______.



There are two main issues associated with _____ sizing. _______ is a key issue as due to the information policy of the corporation it can be argued that employees have a 