In [44]:
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer
from peft import LoraConfig, TaskType, get_peft_model
from typing import Optional, List

In [45]:
class NMTModel:
    def __init__(self, model_dir: str, device, role: Optional[str] = None):
        """
        Loads neural machine translation model.

        # Args
        * `model_dir`: Directory that includes the model, config, and tokenizer.
        * `role`: Description of the system's role, if `None` then the default role is chosen.
        """

        self.model = AutoModelForCausalLM.from_pretrained(
            model_dir,
            torch_dtype="auto",
            device_map=device,
        ).eval()

        self.tokenizer = AutoTokenizer.from_pretrained(model_dir)

        if role is None:
            self.role = "You are a helpful translation assistant."
        else:
            self.role = role

    def prompt(self, prompt: str) -> str:
        return self.prompt_batch([prompt])

    def prompt_batch(self, prompts: List[str]) -> List[str]:
        """
        Runs inference.
        """

        text_batch = [
            self.tokenizer.apply_chat_template(
                [{"role": "system", "content": self.role}, {"role": "user", "content": prompt}],
                tokenize=False,
                add_generation_prompt=True
            ) for prompt in prompts
        ]
        model_inputs = self.tokenizer(text_batch, return_tensors="pt", padding=True).to(self.model.device)

        generated_ids = self.model.generate(
            model_inputs.input_ids,
            max_new_tokens=512
        )
        generated_ids = [
            output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
        ]

        return self.tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
    

### Load model

In [46]:
# NOTE: replace with accurate model directory
model_dir = "../data/qwen0-5b"
out_dir = "../data/out"

# Load model
# TODO: preferably not newer than 2023
model = NMTModel(model_dir, "cpu")

In [47]:
example_prompts = [
    "Translate the following sentence to Japanese: Would you like something to eat?",
    "Translate the following sentence to German: Would you like something to eat?",
]

model.prompt_batch(example_prompts)

['あなたは何か食べたいですか？', 'Wollt ihr etwas trinken?']

### PEFT

In [48]:
# load finetuneable model
# TODO: find good parameters
peft_config = LoraConfig(task_type=TaskType.CAUSAL_LM, inference_mode=False, r=8, lora_alpha=32, lora_dropout=0.1)
model_peft = get_peft_model(model.model, peft_config)

model_peft.print_trainable_parameters()

trainable params: 540,672 || all params: 494,573,440 || trainable%: 0.1093


In [None]:
# TODO: find good parameters
training_args = TrainingArguments(
    output_dir=out_dir,
    learning_rate=1e-3,
    per_device_train_batch_size=32,
    per_device_eval_batch_size=32,
    num_train_epochs=2,
    weight_decay=0.01,
    eval_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
)

# TODO: find good parameters
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=...,
    eval_dataset=...,
    processing_class=model.tokenizer,
    data_collator=...,
    compute_metrics=...,
)

trainer.train()