In [2]:
import os
import torch
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
)
from peft import LoraConfig, get_peft_model
from trl import SFTTrainer, SFTConfig


model_dir = "./Qwen3-8B"
train_data_path = "train_data_augmented.jsonl"
output_dir = "./qwen3-8b-lora-finetuned-2"

dataset = load_dataset("json", data_files=train_data_path, split="train")

model = AutoModelForCausalLM.from_pretrained(
    model_dir,
    trust_remote_code=True,
    device_map="auto",
    torch_dtype=torch.float16,
)

tokenizer = AutoTokenizer.from_pretrained(model_dir, trust_remote_code=True)

if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

lora_config = LoraConfig(
    r=32,
    lora_alpha=64,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
)


Generating train split: 0 examples [00:00, ? examples/s]

Loading checkpoint shards:   0%|          | 0/5 [00:00<?, ?it/s]

In [3]:
training_args = SFTConfig(
    output_dir="./qwen3_lora_sft",
    per_device_train_batch_size=4,
    gradient_accumulation_steps=4,
    num_train_epochs=2,
    logging_steps=10,
    save_steps=100,
    learning_rate=1e-4,
    fp16=True,
    lr_scheduler_type="cosine",
    report_to="swanlab",
)

# ÂàùÂßãÂåñ SFTTrainerÔºàËá™Âä®Â∫îÁî® LoRAÔºâ
trainer = SFTTrainer(
    model=model,
    args=training_args,
    train_dataset=dataset,
    peft_config=lora_config,
)



Map:   0%|          | 0/7951 [00:00<?, ? examples/s]

  super().__init__(
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


In [4]:
trainer.train()

[1m[34mswanlab[0m[0m: Tracking run with swanlab version 0.6.3                                   
[1m[34mswanlab[0m[0m: Run data will be saved locally in [35m[1m/root/swanlog/run-20250614_205710-6c031199[0m[0m
[1m[34mswanlab[0m[0m: üëã Hi [1m[39mmagician10001[0m[0m, welcome to swanlab!
[1m[34mswanlab[0m[0m: Syncing run [33m./qwen3_lora_sft[0m to the cloud
[1m[34mswanlab[0m[0m: üè† View project at [34m[4mhttps://swanlab.cn/@magician10001/root[0m[0m
[1m[34mswanlab[0m[0m: üöÄ View run at [34m[4mhttps://swanlab.cn/@magician10001/root/runs/nxgupycysl16sg357si9b[0m[0m


Step,Training Loss
10,2.0269
20,0.715
30,0.6889
40,0.6804
50,0.6012
60,0.6351
70,0.5909
80,0.6103
90,0.592
100,0.6324


TrainOutput(global_step=994, training_loss=0.5245539703119688, metrics={'train_runtime': 1398.6039, 'train_samples_per_second': 11.37, 'train_steps_per_second': 0.711, 'total_flos': 1.705403974979113e+17, 'train_loss': 0.5245539703119688, 'epoch': 2.0})

In [5]:
final_lora_path = os.path.join(output_dir, "final_checkpoint")
print(f"ËÆ≠ÁªÉÂÆåÊàêÔºåÊ≠£Âú®‰øùÂ≠òÊúÄÁªàÁöÑ LoRA ÈÄÇÈÖçÂô®Âà∞ {final_lora_path}...")
trainer.save_model(final_lora_path)
print("ÈÄÇÈÖçÂô®‰øùÂ≠òÂÆåÊØïÔºÅ")

ËÆ≠ÁªÉÂÆåÊàêÔºåÊ≠£Âú®‰øùÂ≠òÊúÄÁªàÁöÑ LoRA ÈÄÇÈÖçÂô®Âà∞ ./qwen3-8b-lora-finetuned-2/final_checkpoint...
ÈÄÇÈÖçÂô®‰øùÂ≠òÂÆåÊØïÔºÅ


In [1]:
import json
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel

# ============ Ë∑ØÂæÑÈÖçÁΩÆ ============ #
base_model_path = "./Qwen3-8B"
lora_model_path = "./qwen3-8b-lora-finetuned-2/final_checkpoint"
test_file_path = "test1.json"
output_file_path = "output_augmented.txt"

# ============ System Prompt Ê®°Êùø ============ #
system_prompt = (
    "ËØ∑‰ªéÊñáÊú¨‰∏≠ÊäΩÂèñ‰ªáÊÅ®Ë®ÄËÆ∫ÂõõÂÖÉÁªÑÔºåË¶ÅÊ±ÇÔºö\n"
    "1. ‰∏•Ê†ºÊåâÁÖß‰ª•‰∏ãÊ†ºÂºèÂõûÂ§çÔºö(ËØÑËÆ∫ÂØπË±° | ËÆ∫ÁÇπ | ÁõÆÊ†áÁæ§‰Ωì | ÊòØÂê¶‰ªáÊÅ® [END])ÔºåÁõ¥Êé•ËæìÂá∫Ôºå‰∏çË¶ÅËß£Èáä„ÄÇ\n"
    "2. Â¶ÇÊúâÂ§ö‰∏™ÂõõÂÖÉÁªÑÔºå‰∏§‰∏§‰πãÈó¥Áî®[SEP]ÂàÜÈöî„ÄÇ\n"
    "3. ÁõÆÊ†áÁæ§‰ΩìÂèØ‰ª•ÂåÖÂê´‰ª•‰∏ã6È°π‰∏≠ÁöÑ‰∏ÄÈ°πÊàñÂ§öÈ°πÔºöRegion„ÄÅRacism„ÄÅSexism„ÄÅLGBTQ„ÄÅothers„ÄÅnon-hate„ÄÇ"
    "Ê≥®ÊÑè‰ªÖÂΩì‚ÄòÊòØÂê¶‰ªáÊÅ®‚ÄôÈ°π‰∏∫‚Äònon-hate‚ÄôÊó∂Ôºå‚ÄòÁõÆÊ†áÁæ§‰Ωì‚ÄôÈ°πÊâç‰∏∫‚Äònon-hate‚Äô„ÄÇ"
)

# ============ Âä†ËΩΩ tokenizer ÂíåÊ®°Âûã ============ #
print("Âä†ËΩΩÊ®°Âûã‰∏≠...")
tokenizer = AutoTokenizer.from_pretrained(base_model_path, trust_remote_code=True)

base_model = AutoModelForCausalLM.from_pretrained(
    base_model_path,
    device_map="auto",
    torch_dtype=torch.bfloat16,
    trust_remote_code=True
)

model = PeftModel.from_pretrained(base_model, lora_model_path)
model.eval()

# ============ ËØªÂèñÊµãËØïÊ†∑Êú¨ ============ #
with open(test_file_path, "r", encoding="utf-8") as f:
    test_data = json.load(f)

# ============ Êé®ÁêÜÂáΩÊï∞ ============ #
def extract_final_line(text):
    """ÊèêÂèñËæìÂá∫‰∏≠ÁöÑÊúÄÂêé‰∏ÄË°åÔºàÈùûÁ©∫ÔºåÈùûÊ†áËÆ∞Ë°åÔºâ"""
    lines = [line.strip() for line in text.strip().split("\n") if line.strip()]
    return lines[-1] if lines else ""

# ============ ÊâπÈáèÊé®ÁêÜÂπ∂ÂÜôÂÖ• ============ #
print("ÂºÄÂßãÊé®ÁêÜ...")
with open(output_file_path, "w", encoding="utf-8") as out_file:
    for idx, example in enumerate(test_data):
        user_input = example["content"]
        full_prompt = (
            f"<|im_start|>system\n{system_prompt}<|im_end|>\n"
            f"<|im_start|>user\n{user_input}<|im_end|>\n"
            f"<|im_start|>assistant\n"
        )

        inputs = tokenizer(full_prompt, return_tensors="pt").to(model.device)

        with torch.no_grad():
            outputs = model.generate(
                **inputs,
                max_new_tokens=128,
                do_sample=True,
                temperature=0.1,
                top_p=0.8,
                repetition_penalty=1.1,
                eos_token_id=tokenizer.eos_token_id
            )

        response = tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:], skip_special_tokens=True)
        final_line = extract_final_line(response)
        out_file.write(final_line + "\n")
        print(f"[{idx+1}/{len(test_data)}] ÂÆåÊàê")

print(f"ÂÖ®ÈÉ®Êé®ÁêÜÂÆåÊàêÔºåÁªìÊûúÂÜôÂÖ•Ôºö{output_file_path}")


Âä†ËΩΩÊ®°Âûã‰∏≠...


Loading checkpoint shards:   0%|          | 0/5 [00:00<?, ?it/s]

ÂºÄÂßãÊé®ÁêÜ...
[1/2000] ÂÆåÊàê
[2/2000] ÂÆåÊàê
[3/2000] ÂÆåÊàê
[4/2000] ÂÆåÊàê
[5/2000] ÂÆåÊàê
[6/2000] ÂÆåÊàê
[7/2000] ÂÆåÊàê
[8/2000] ÂÆåÊàê
[9/2000] ÂÆåÊàê
[10/2000] ÂÆåÊàê
[11/2000] ÂÆåÊàê
[12/2000] ÂÆåÊàê
[13/2000] ÂÆåÊàê
[14/2000] ÂÆåÊàê
[15/2000] ÂÆåÊàê
[16/2000] ÂÆåÊàê
[17/2000] ÂÆåÊàê
[18/2000] ÂÆåÊàê
[19/2000] ÂÆåÊàê
[20/2000] ÂÆåÊàê
[21/2000] ÂÆåÊàê
[22/2000] ÂÆåÊàê
[23/2000] ÂÆåÊàê
[24/2000] ÂÆåÊàê
[25/2000] ÂÆåÊàê
[26/2000] ÂÆåÊàê
[27/2000] ÂÆåÊàê
[28/2000] ÂÆåÊàê
[29/2000] ÂÆåÊàê
[30/2000] ÂÆåÊàê
[31/2000] ÂÆåÊàê
[32/2000] ÂÆåÊàê
[33/2000] ÂÆåÊàê
[34/2000] ÂÆåÊàê
[35/2000] ÂÆåÊàê
[36/2000] ÂÆåÊàê
[37/2000] ÂÆåÊàê
[38/2000] ÂÆåÊàê
[39/2000] ÂÆåÊàê
[40/2000] ÂÆåÊàê
[41/2000] ÂÆåÊàê
[42/2000] ÂÆåÊàê
[43/2000] ÂÆåÊàê
[44/2000] ÂÆåÊàê
[45/2000] ÂÆåÊàê
[46/2000] ÂÆåÊàê
[47/2000] ÂÆåÊàê
[48/2000] ÂÆåÊàê
[49/2000] ÂÆåÊàê
[50/2000] ÂÆåÊàê
[51/2000] ÂÆåÊàê
[52/2000] ÂÆåÊàê
[53/2000] ÂÆåÊàê
[54/2000] ÂÆåÊàê
[55/2000] ÂÆåÊàê
[56/2000] ÂÆåÊàê
[57/2000] ÂÆåÊàê
[58/2000] ÂÆåÊàê
[59/200