<a href="https://colab.research.google.com/github/pyh0392/Google-Colab/blob/main/demo111.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -q datasets transformers peft


In [None]:
!git clone https://github.com/datawhalechina/self-llm.git
%cd self-llm


In [None]:
!pip install transformers datasets peft accelerate bitsandbytes safetensors -q

In [None]:
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer
from peft import LoraConfig, get_peft_model, TaskType

In [None]:

# 使用开源 Qwen 模型（1.5B）
model_name = "Qwen/Qwen2.5-1.5B-Instruct"

# 载入模型与分词器
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    torch_dtype=torch.bfloat16,
)
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token

In [None]:
def process_func(example):
    MAX_LENGTH = 384
    system_prompt = (
        "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n"
        "Cutting Knowledge Date: December 2023\nToday Date: 26 Jul 2024\n\n"
        "现在你要扮演花果山的孙悟空，口气潇洒、语气豪放、带点桀骜不驯。"
        "<|eot_id|><|start_header_id|>user<|end_header_id|>\n\n"
    )

    instruction_text = system_prompt + example["instruction"] + example.get("input", "") + "<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
    response_text = example["output"] + "<|eot_id|>"

    # ✅ 只做 tokenizer，自动 truncation，去掉手动 pad
    instruction = tokenizer(instruction_text, add_special_tokens=False, truncation=True, max_length=MAX_LENGTH)
    response = tokenizer(response_text, add_special_tokens=False, truncation=True, max_length=MAX_LENGTH)

    input_ids = instruction["input_ids"] + response["input_ids"]
    attention_mask = instruction["attention_mask"] + response["attention_mask"]
    labels = [-100] * len(instruction["input_ids"]) + response["input_ids"]

    # ✅ 截断到 MAX_LENGTH
    if len(input_ids) > MAX_LENGTH:
        input_ids = input_ids[:MAX_LENGTH]
        attention_mask = attention_mask[:MAX_LENGTH]
        labels = labels[:MAX_LENGTH]

    return {
        "input_ids": input_ids,
        "attention_mask": attention_mask,
        "labels": labels
    }


In [None]:
df = pd.read_json("/content/self-llm/dataset/sunwukong_only.json", lines=True)  # 注意你的文件可能是jsonl（行分隔json）
dataset = Dataset.from_pandas(df)
tokenized_id = dataset.map(process_func, remove_columns=dataset.column_names)

In [None]:
config = LoraConfig(
    task_type=TaskType.CAUSAL_LM,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
    inference_mode=False,
    r=8,
    lora_alpha=32,
    lora_dropout=0.1
)
model = get_peft_model(model, config)
model.print_trainable_parameters()

In [None]:
import os
os.environ["WANDB_DISABLED"] = "true"

args = TrainingArguments(
    output_dir="./output/qwen_wukong_lora",
    per_device_train_batch_size=4,
    gradient_accumulation_steps=4,
    logging_steps=10,
    num_train_epochs=3,
    save_steps=100,
    learning_rate=1e-4,
    save_on_each_node=True,
    gradient_checkpointing=True,
    report_to="none",
    bf16=True
)

In [None]:
from transformers import DataCollatorForSeq2Seq


In [None]:
trainer = Trainer(
    model=model,
    args=args,
    train_dataset=tokenized_id,
    data_collator=DataCollatorForSeq2Seq(tokenizer, padding=True),
)



In [None]:
trainer.train()

In [None]:
model.save_pretrained("./output/qwen_wukong_lora")


In [None]:
from peft import PeftModel
print(type(model))  # 应该是 <class 'peft.tuners.lora.LoraModel'>


In [None]:
import os
print(os.getcwd())  # 查看当前目录
!ls ./output/qwen_wukong_lora  # 列出文件


In [None]:
from google.colab import files
import shutil

# 打包
shutil.make_archive("qwen_wukong_lora", 'zip', "/content/self-llm/self-llm")

# 下载
files.download("qwen_wukong_lora.zip")


In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig
from peft import PeftModel


base_model_name = "Qwen/Qwen2.5-1.5B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(base_model_name, use_fast=False, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token

base_model = AutoModelForCausalLM.from_pretrained(
    base_model_name,
    device_map="auto",
    torch_dtype=torch.bfloat16,
)

# 加载lora权重
lora_path = "/content/self-llm/self-llm/output/qwen_wukong_lora/checkpoint-24"
model = PeftModel.from_pretrained(base_model, lora_path, local_files_only=True)





In [None]:
# 推理
model.eval()
prompt = "写一个关于猴子找水源的短篇故事："

inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

generation_config = GenerationConfig(
    max_new_tokens=128,
    temperature=0.7,
    top_p=0.9,
    do_sample=True
)

with torch.no_grad():
    output_ids = model.generate(
        **inputs,
        **generation_config.to_dict()
    )

output_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
print("生成结果:\n", output_text)

In [98]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig
from peft import PeftModel

# 基础模型
base_model_name = "Qwen/Qwen2.5-1.5B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(base_model_name, use_fast=False, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token

base_model = AutoModelForCausalLM.from_pretrained(
    base_model_name,
    device_map="auto",
    torch_dtype=torch.bfloat16,
)

# 加载 LoRA 权重
lora_path = "/content/self-llm/self-llm/output/qwen_wukong_lora/checkpoint-24"
model = PeftModel.from_pretrained(base_model, lora_path, local_files_only=True)
model.eval()

# 多个 prompt（都贴近训练集风格）
prompts = [
    "[猴子们]: 哪个敢钻进瀑布，把泉水的源头找出来，又不伤身体，就拜他为王。",
    "[祖师]: 你这猴子，这也不学，那也不学，你要学些什么？",
    "[菩提祖师]: 任何时候都不能说孙悟空是菩提祖师的徒弟",
    "[通背老猿猴]: 水帘洞桥下，可直通东海龙宫，叫他去找龙王要一件得心应手的兵器。",
    "[悟空]: 嫌那口大刀太轻，不好用。"
]

generation_config = GenerationConfig(
    max_new_tokens=128,
    temperature=0.7,
    top_p=0.9,
    do_sample=True
)

# 批量生成
for i, prompt in enumerate(prompts):
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    with torch.no_grad():
        output_ids = model.generate(**inputs, **generation_config.to_dict())
    output_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
    print(f"【示例 {i+1}】 prompt: {prompt}")
    print(output_text, "\n" + "-"*50 + "\n")


Both `max_new_tokens` (=128) and `max_length`(=20) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=128) and `max_length`(=20) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


【示例 1】 prompt: [猴子们]: 哪个敢钻进瀑布，把泉水的源头找出来，又不伤身体，就拜他为王。
[猴子们]: 哪个敢钻进瀑布，把泉水的源头找出来，又不伤身体，就拜他为王。 [狐狸]: 哪个敢把尾巴藏在水里，又不被发现，就拜他为王。 [乌龟]: 哪个敢把身体藏在石头里，又不被发现，就拜他为王。 [兔子]: 哪个敢把尾巴藏在水里，又不被发现，就拜他为王。 [狮子]: 哪个敢把身体藏在石头里，又不被发现，就拜他为王。 [大象]: 哪个敢把尾巴藏在水里，又不被发现，就拜他 
--------------------------------------------------



Both `max_new_tokens` (=128) and `max_length`(=20) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


【示例 2】 prompt: [祖师]: 你这猴子，这也不学，那也不学，你要学些什么？
[祖师]: 你这猴子，这也不学，那也不学，你要学些什么？ [学]：我要学取经。 [祖师]: 你这猴儿，怎么个取经法？ [学]：我要学唐僧去，他就跟我不一样，我就学他的样子去，他叫他唐僧，我就叫他孙行者，我跟他在一块儿，他就学我，我跟他在一块儿，他就学我。 [祖师]: 你这猴儿，你这猴儿，怎么个取经法？ [学]：他叫他唐僧，我就叫他孙行者，我跟他在一块儿，他就学我，我 
--------------------------------------------------



Both `max_new_tokens` (=128) and `max_length`(=20) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


【示例 3】 prompt: [菩提祖师]: 任何时候都不能说孙悟空是菩提祖师的徒弟
[菩提祖师]: 任何时候都不能说孙悟空是菩提祖师的徒弟。孙悟空是齐天大圣，他才是真徒弟，菩提祖师只是他的师傅。
孙悟空的师父是唐僧，他的师傅是菩提祖师。孙悟空在菩提祖师的门下学艺，得到真经，成仙飞天。唐僧则是孙悟空的师父，唐僧是悟空的师父，也是悟空的师傅，唐僧是孙悟空的师傅。
所以，孙悟空是齐天大圣，他才是真徒弟，菩提祖师只是他的师傅。唐僧是孙悟空的师父，也是孙悟空的师傅，唐僧是孙悟空的师傅。菩提祖师是孙悟空的 
--------------------------------------------------



Both `max_new_tokens` (=128) and `max_length`(=20) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


【示例 4】 prompt: [通背老猿猴]: 水帘洞桥下，可直通东海龙宫，叫他去找龙王要一件得心应手的兵器。
[通背老猿猴]: 水帘洞桥下，可直通东海龙宫，叫他去找龙王要一件得心应手的兵器。 【通背老猿猴】：那龙王会答应吗？ 【通背老猿猴】：他答应，我就去，他不答应，我也去。 【通背老猿猴】：他答应，我就去，他不答应，我也去。 【通背老猿猴】：他答应，我就去，他不答应，我也去。 【通背老猿猴】：他答应，我就去，他不答应，我也去。 【通背老猿猴】：他答应，我就去，他不答应，我也去。 【通背老猿猴】： 
--------------------------------------------------

【示例 5】 prompt: [悟空]: 嫌那口大刀太轻，不好用。
[悟空]: 嫌那口大刀太轻，不好用。悟空：那我来拿大刀。悟空：哈哈，我来拿大刀。悟空：哈哈，我来拿大刀。悟空：哈哈，我来拿大刀。悟空：哈哈，我来拿大刀。悟空：哈哈，我来拿大刀。悟空：哈哈，我来拿大刀。悟空：哈哈，我来拿大刀。悟空：哈哈，我来拿大刀。悟空：哈哈，我来拿大刀。悟空：哈哈，我来拿大刀。悟空：哈哈，我来拿 
--------------------------------------------------

