In [1]:
from transformers import AutoTokenizer, AutoModelForCausalLM
from datasets import load_from_disk, Dataset
import torch

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
if torch.backends.mps.is_available():
    device = torch.device("mps")
    print("Using MPS device.")
else:
    device = torch.device("cpu")
    print("MPS not available, using CPU.")

Using MPS device.


In [4]:
model_id = "Qwen/Qwen2.5-0.5B-Instruct"

tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id).to(device)

In [5]:
dataset = load_from_disk("./data/llm_mail_dataset")
dataset

DatasetDict({
    train: Dataset({
        features: ['text'],
        num_rows: 335
    })
    validation: Dataset({
        features: ['text'],
        num_rows: 42
    })
    test: Dataset({
        features: ['items'],
        num_rows: 43
    })
})

# Train

In [12]:
from peft import LoraConfig

lora_config = LoraConfig(
  r=4,
  lora_alpha=8,
  lora_dropout=0,
  bias="none",
  task_type="CAUSAL_LM",
  target_modules = ["q_proj", 
    "k_proj", "v_proj", "o_proj", 
    "gate_proj", "up_proj", "down_proj"],
  modules_to_save=["lm_head"]
)

In [13]:
from peft import get_peft_model

peft_model = get_peft_model(
	model,
	lora_config,
)



In [14]:
from trl import SFTTrainer, SFTConfig

In [15]:
training_args = SFTConfig(
   output_dir="trainer_output/my_mail_classifier_llm",
   report_to="none",
   overwrite_output_dir=True,
   do_train=True,
   learning_rate=2e-4,
   num_train_epochs=3,
   bf16=False,  # Disable bf16 to avoid compatibility issues
   fp16=False,  # Disable fp16 for MPS compatibility
)

average_tokens_across_devices is set to True but it is invalid when world size is1. Turn it to False automatically.


In [16]:
trainer = SFTTrainer(
   model=peft_model,
   args=training_args,
   train_dataset=dataset["train"],
   eval_dataset=dataset["validation"],
)

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


In [17]:
trainer.train()

Step,Training Loss
10,2.1124
20,1.0676
30,0.9493
40,0.9118
50,0.6265
60,0.5231
70,0.5258
80,0.5473
90,0.4289
100,0.3556


TrainOutput(global_step=126, training_loss=0.7154572975067865, metrics={'train_runtime': 16003.9889, 'train_samples_per_second': 0.063, 'train_steps_per_second': 0.008, 'total_flos': 472164060015360.0, 'train_loss': 0.7154572975067865})

In [18]:
model_path = "models/my_llm_mail_classifier"
trainer.save_model(model_path)