In [None]:
import os
os.environ['HF_ENDPOINT'] = 'https://hf-mirror.com'  #可选
from huggingface_hub import login
hf_token = "XXX" 
login(hf_token)

In [None]:
os.environ["CUDA_VISIBLE_DEVICES"] = "1"  #我在服务器上训练的，如果你在本地训练，这3行注释掉吧
os.environ["NCCL_P2P_DISABLE"] = "1"
os.environ["NCCL_IB_DISABLE"]  = "1"

In [None]:
from unsloth import FastLanguageModel
import torch

In [None]:
max_seq_length = 2048 
dtype = torch.bfloat16 
load_in_4bit = False   

In [None]:
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Llama-3.2-3B-Instruct", 
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
)

In [None]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 32,
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj"],
    lora_alpha = 32,
    lora_dropout = 0,
    bias = "none",
    use_gradient_checkpointing = "unsloth",
    random_state = 3407,
    use_rslora = False,
    loftq_config = None,
)


In [8]:
from unsloth.chat_templates import get_chat_template
tokenizer = get_chat_template(
    tokenizer,
    chat_template = "llama-3.1",
)

In [None]:
from datasets import load_dataset
dataset = load_dataset("json", data_files="XXX", split="train")   #数据集文件路径因人而异，记得设置

# 转换为 chat 格式
def formatting_prompts_func(examples):
    conversations = []
    for inst, out in zip(examples["instruction"], examples["output"]):
        conversations.append([
            {"role": "user", "content": inst},
            {"role": "assistant", "content": out}
        ])
    texts = [
        tokenizer.apply_chat_template(convo, tokenize=False, add_generation_prompt=False)
        for convo in conversations
    ]
    return {"text": texts}

dataset = dataset.map(formatting_prompts_func, batched=True)

In [None]:
from trl import SFTConfig, SFTTrainer
from transformers import DataCollatorForSeq2Seq


trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer),
    packing = False,
    args = SFTConfig(
        per_device_train_batch_size = 32,
        gradient_accumulation_steps = 4,
        warmup_steps = 5,
        max_steps = 150,
        learning_rate = 2e-4,
        logging_steps = 1,
        optim = "adamw_torch",  
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",
        report_to = "none",
    ),
)

In [None]:
from unsloth.chat_templates import train_on_responses_only
trainer = train_on_responses_only(
    trainer,
    instruction_part = "<|start_header_id|>user<|end_header_id|>\n\n",
    response_part = "<|start_header_id|>assistant<|end_header_id|>\n\n",
)

# 启动训练
trainer_stats = trainer.train()

In [None]:
model.save_pretrained("llama3.2-150")
tokenizer.save_pretrained("llama3.2-150")  #模型保存

In [None]:
FastLanguageModel.for_inference(model)

messages = [
    {"role": "user", "content": "帮我写一篇科幻小说"},
]
inputs = tokenizer.apply_chat_template(
    messages,
    tokenize = True,
    add_generation_prompt = True,
    return_tensors = "pt",
).to("cuda")

from transformers import TextStreamer
text_streamer = TextStreamer(tokenizer, skip_prompt = True)
_ = model.generate(input_ids = inputs, streamer = text_streamer, max_new_tokens = 512,
                   use_cache = True, temperature = 1.0, min_p = 0.1)

In [None]:
model.load_adapter("llama3.2-150")  

In [None]:
messages = [
    {"role" : "user", "content" : "如果你统治了世界，你会欺负我吗？"}
]
text = tokenizer.apply_chat_template(
    messages,
    tokenize = False,
    add_generation_prompt = True,
)

from transformers import TextStreamer
_ = model.generate(
    **tokenizer(text, return_tensors = "pt").to("cuda"),
    max_new_tokens = 1500,
    temperature = 1.0, top_p = 0.8, top_k = 20,
    streamer = TextStreamer(tokenizer, skip_prompt = True),
)