In [1]:
from unsloth import FastLanguageModel
from trl import SFTTrainer
import torch
from transformers import TrainingArguments
from datasets import DatasetDict
from rich import print
from unsloth.chat_templates import get_chat_template

In [2]:
max_seq_length = 4096  # Supports RoPE Scaling interally, so choose any!
# get instruction dataset from ../data/instructions
dataset = DatasetDict.load_from_disk('../data/instructions')
dataset_train = dataset["train"]
dataset_test = dataset["test"]

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="vilm/vinallama-7b-chat",
    max_seq_length=max_seq_length,
    dtype=None,
    load_in_4bit=True,
)

tokenizer = get_chat_template(
    tokenizer,
    chat_template = "chatml", # Supports zephyr, chatml, mistral, llama, alpaca, vicuna, vicuna_old, unsloth
    map_eos_token = True, # Maps <|im_end|> to </s> instead
)

# Do model patching and add fast LoRA weights
model = FastLanguageModel.get_peft_model(
    model,
    r=16,
    target_modules=[
        "q_proj",
        "k_proj",
        "v_proj",
        "o_proj",
        "gate_proj",
        "up_proj",
        "down_proj",
    ],
    lora_alpha=16,
    lora_dropout=0,  # Supports any, but = 0 is optimized
    bias="none",  # Supports any, but = "none" is optimized
    use_gradient_checkpointing=True,
    random_state=3407,
    max_seq_length=max_seq_length,
    use_rslora=False,  # We support rank stabilized LoRA
    loftq_config=False,  # And LoftQ
)



==((====))==  Unsloth: Fast Llama patching release 2024.4
   \\   /|    GPU: NVIDIA GeForce RTX 3070 Ti. Max memory: 8.0 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.3.0. CUDA = 8.6. CUDA Toolkit = 12.1.
\        /    Bfloat16 = TRUE. Xformers = 0.0.26.post1. FA = False.
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Unsloth: vilm/vinallama-7b-chat has no tokenizer.model file.
Just informing you about this - this is not a critical error.


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Unsloth: Will map <|im_end|> to EOS = <|im_end|>.
Unsloth 2024.4 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.


In [3]:
def format_prompt_func(example):
    messages = [
        {"role": "system", "content": "Bạn là một trợ lí AI hữu ích trong việc dịch thuật tiếng Việt sang Tiếng Anh. Hãy trả lời người dùng một cách chính xác."},
        {"role": "user", "content": example['instruction']},
         {"role": "assistant", "content": example['response']}
    ]
    
    if isinstance(example['instruction'], str):
        print('str')
        return tokenizer.apply_chat_template(
            messages,
            tokenize=False,
            add_generation_prompt=False
        )
        # return f"### Instruction:\n{example['instruction']}\n\n### Response:\n{example['response']}"
    
    output_texts = []
    for i in range(len(example['instruction'])):
        messages = [
            {"role": "system", "content": "Bạn là một trợ lí AI hữu ích trong việc dịch thuật tiếng Việt sang Tiếng Anh. Hãy trả lời người dùng một cách chính xác."},
            {"role": "user", "content": example['instruction'][i]},
            {"role": "assistant", "content": example['response'][i]}
        ]
        
        text = tokenizer.apply_chat_template(
            messages,
            tokenize=False,
            add_generation_prompt=False
        )
        # text = f"### Instruction:\n{example['instruction'][i]}\n\n### Response:\n{example['response'][i]}"
        output_texts.append(text)
    
    return output_texts


trainer = SFTTrainer(
    model=model,
    train_dataset=dataset_train,
    eval_dataset=dataset_test,
    formatting_func=format_prompt_func,
    max_seq_length=max_seq_length,
    tokenizer=tokenizer,
    args=TrainingArguments(
        num_train_epochs=2,
        per_device_train_batch_size=1,
        gradient_accumulation_steps=4,
        warmup_steps=10,
        fp16=not torch.cuda.is_bf16_supported(),
        bf16=torch.cuda.is_bf16_supported(),
        logging_steps=0.1,
        eval_steps=0.1,
        save_steps=0.1,
        output_dir="../models/vinallama-7b-chat",
        optim="adamw_8bit",
        seed=3407,
    ),
)
trainer.train()

# Go to https://github.com/unslothai/unsloth/wiki for advanced tips like
# (1) Saving to GGUF / merging to 16bit for vLLM
# (2) Continued training from a saved LoRA adapter
# (3) Adding an evaluation loop / OOMs
# (4) Cutomized chat templates

Map:   0%|          | 0/6571 [00:00<?, ? examples/s]

Map:   0%|          | 0/1643 [00:00<?, ? examples/s]

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 6,571 | Num Epochs = 2
O^O/ \_/ \    Batch size per device = 1 | Gradient Accumulation steps = 4
\        /    Total batch size = 4 | Total steps = 3,284
 "-____-"     Number of trainable parameters = 39,976,960


Step,Training Loss
329,0.8541
658,0.5325
987,0.4425
1316,0.3729
1645,0.338
1974,0.2592
2303,0.2306
2632,0.2217
2961,0.2052




TrainOutput(global_step=3284, training_loss=0.3654506871528951, metrics={'train_runtime': 5291.3361, 'train_samples_per_second': 2.484, 'train_steps_per_second': 0.621, 'total_flos': 6.848735068122317e+16, 'train_loss': 0.3654506871528951, 'epoch': 1.9990868969715416})