In [None]:
%load_ext autoreload
%autoreload 2

In [2]:
import comet_ml
from trl import SFTTrainer
from datasets import load_dataset
from transformers import TrainingArguments, TextStreamer
from unsloth import FastLanguageModel, is_bfloat16_supported
from unsloth.chat_templates import qwen25_template
from news_summarizer.config import settings

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!


In [None]:
experiment = comet_ml.start(project_name="NewsSummarizerSFT")

In [6]:
model_name = "unsloth/Qwen2.5-1.5B"
max_seq_length = 512

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name=model_name,
    max_seq_length=max_seq_length,
    dtype="float16",
    load_in_4bit=False,
)

==((====))==  Unsloth 2025.1.8: Fast Qwen2 patching. Transformers: 4.48.1.
   \\   /|    GPU: NVIDIA GeForce RTX 3070. Max memory: 8.0 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.0+cu124. CUDA: 8.6. CUDA Toolkit: 12.4. Triton: 3.1.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.28.post2. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


In [None]:
tokenizer.chat_template = qwen25_template
tokenizer.padding_side = "left"
tokenizer.truncate_sequences = True
tokenizer.truncation_size = "right"

In [22]:
model = FastLanguageModel.get_peft_model(
    model,
    r=32,
    lora_alpha=32,
    lora_dropout=0,
    target_modules=[
        "q_proj",
        "k_proj",
        "v_proj",
        "o_proj",
        "gate_proj",
        "up_proj",
        "down_proj",
    ],
    use_gradient_checkpointing="unsloth",  # Save memory
)

### Load data

In [None]:
dataset = load_dataset(
    "json",
    data_files="../data/dataset/dataset_part*.json",
    split="train",
)


EOS_TOKEN = tokenizer.eos_token


# Format dataset into chat format
def format_to_chat(example):
    return {
        "messages": [
            {"role": "system", "content": "Resuma o seguinte artigo."},
            {"role": "user", "content": example["article"]},
            {"role": "assistant", "content": example["summary"] + EOS_TOKEN},
        ]
    }


# Apply formatting
chat_dataset = dataset.map(format_to_chat, remove_columns=["article", "summary"])

# Split into train and test sets
chat_dataset = chat_dataset.train_test_split(test_size=0.1)

In [None]:
chat_dataset.push_to_hub(
    "maikerdr/brazilian-news-articles",
    token=settings.huggingface.access_token._secret_value,
)

### Fine tune!

In [None]:
training_args = TrainingArguments(
    per_device_train_batch_size=4,  # Adjust based on GPU memory
    per_device_eval_batch_size=2,
    gradient_accumulation_steps=8,
    num_train_epochs=4,
    learning_rate=3e-4,
    lr_scheduler_type="linear",
    fp16=not is_bfloat16_supported(),
    bf16=is_bfloat16_supported(),
    logging_steps=1,
    optim="adamw_8bit",
    weight_decay=0.01,
    warmup_steps=10,
    output_dir="../data/outputs",
    seed=0,
    report_to="comet_ml",
)

trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=chat_dataset["train"],
    eval_dataset=chat_dataset["test"],
    dataset_text_field="text",
    max_seq_length=max_seq_length,
    dataset_num_proc=2,
    args=training_args,
)

trainer_stats = trainer.train()
experiment.end()

In [None]:
# model.save_pretrained_merged(
#    "lora_model_merged",
#    tokenizer,
#    save_method="lora",
# )
model.push_to_hub_merged(
    "maikerdr/NewsSummarizer-1.5B",
    tokenizer,
    save_method="lora",
    token=settings.huggingface.access_token._secret_value,
)