In [None]:
!pip install "opencompass[full]"

!pip install --upgrade Pillow

!pip install huggingface_hub==0.27.0

!pip install peft==0.12.0

In [None]:
import huggingface_hub
print(huggingface_hub.__version__)

%reset -f

import argparse
from dataclasses import dataclass, field
from typing import Optional, List, Dict
import sys
import torch
from transformers import TrainingArguments, HfArgumentParser, Trainer, AutoTokenizer, AutoModelForCausalLM
import datasets

In [None]:
import wandb

wandb.login(key="12953d2c5e63b221c7e2412992f16bac816d7870")
wandb.init(project="qwen-2.5-3b-lora",
           name="test_2")

In [None]:
# Define the arguments required for the main program.
# NOTE: You can customize any arguments you need to pass in.
@dataclass
class ModelArguments:
    """Arguments for model
    """
    model_name_or_path: Optional[str] = field(
        default=None,
        metadata={
            "help": "The path to the LLM to fine-tune or its name on the Hugging Face Hub."
        }
    )
    torch_dtype: Optional[str] = field(
        default=None,
        metadata={
            "help": (
                "Override the default `torch.dtype` and load the model under this dtype."
            ),
            "choices": ["bfloat16", "float16", "float32"],
        },
    )
    # TODO: add your model arguments here
    model_name_or_path = "/kaggle/input/qwen2.5/transformers/3b/1"  
    torch_dtype = "float32"


@dataclass
class DataArguments:
    """Arguments for data
    """
    dataset_path: Optional[str] = field(
        default=None,
        metadata={
            "help": "The path to the fine-tuning dataset or its name on the Hugging Face Hub."
        }
    )
    # TODO: add your data arguments here
    dataset_path = "/kaggle/input/alpaca-language-instruction-training/train.csv"


In [None]:
from peft import LoraConfig, get_peft_model, TaskType
from transformers import AutoModelForCausalLM

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(torch.cuda.is_available())
torch.cuda.empty_cache()

def LoRA_finetune():
    parser = HfArgumentParser((ModelArguments, DataArguments, TrainingArguments))  # 解析器
    model_args, data_args, training_args = parser.parse_args_into_dataclasses()
    #print(training_args)
    
    model_path = "/kaggle/input/qwen2.5/transformers/3b/1"
    
    tokenizer = AutoTokenizer.from_pretrained(model_path)
    model = AutoModelForCausalLM.from_pretrained(model_path,torch_dtype="auto",device_map="auto")
    model.to(device)
    
    peft_config = LoraConfig(
        task_type = TaskType.CAUSAL_LM,
        target_modules = ["q_proj","k_proj","v_proj","o_proj","gate_proj","up_proj","down_proj"],
        inference_mode = False,
        r = 8,
        lora_alpha = 32,
        lora_dropout = 0.1
    )
    
    model = get_peft_model(model, peft_config)
    model.is_parallelizable = True
    model.model_parallel = True
    model.print_trainable_parameters()

    dataset = datasets.load_dataset('csv', data_files=data_args.dataset_path)


    def data_collator(batch: List[Dict]):
        inputs = []
        labels = []
        max_length = 0

        for sample in batch:
            
            instruction_text = sample.get("instruction","")
            input_text = sample.get("input","")
            output_text = sample.get("output","")

            SYSTEM_PROMPT = '''You are an intelligent assistant capable of answering various types of questions based on context. 
                            Carefully read each instruction and use logical reasoning to determine the best answer. 
                            Provide your answer after considering relevant input.'''

            question = f"{SYSTEM_PROMPT}{instruction_text}{input_text}"
            

            if not output_text.strip():
                output_text = "<empty>"


            input_ids = tokenizer.encode_plus(
                question, 
                return_tensors = "pt", 
                max_length = tokenizer.model_max_length, 
                truncation = True, 
                padding = False, 
            ).input_ids

            # 构建输出序列
            output_ids = tokenizer.encode_plus(
                output_text,
                return_tensors = "pt",
                max_length = tokenizer.model_max_length,
                truncation = True,
                padding = False,
            ).input_ids
            
            #input_ids = (instruction["input_ids"] + response["input_ids"] + [tokenizer.pad_token_id])
            full_input = torch.cat([input_ids,output_ids],dim=1)
            inputs.append(full_input)
            
            #attention_mask = (instruction["attention_mask"] + response["attention_mask"] + [1])
            #labels = ([-100] * len(instruction["input_ids"]) + response["input_ids"] + [tokenizer.pad_token_id]) #只计算output的loss
            labels_tensor = torch.full_like(full_input, -100) # 用-100填充表示这些位置在损失计算中被忽略
            labels_tensor[:, input_ids.shape[1]:] = full_input[:, input_ids.shape[1]:]
            labels.append(labels_tensor)
            
            if full_input.shape[1] > max_length:
                max_length = full_input.shape[1]

        inputs = [torch.nn.functional.pad(single_input, (0,max_length - single_input.size(1)),value=tokenizer.pad_token_id) for single_input in inputs]
        labels = [torch.nn.functional.pad(single_label, (0, max_length - single_label.size(1)), value=-100) for single_label in labels]

        inputs = torch.cat(inputs, dim=0)
        labels = torch.cat(labels, dim=0)
        
        return {
            "input_ids": inputs,
            "labels": labels,
            "attention_mask": (inputs != tokenizer.pad_token_id).to(dtype=torch.int)
        }

  
    trainer = Trainer(
        args = training_args,
        model = model, 
        tokenizer = tokenizer,
        data_collator = data_collator,
        train_dataset = dataset["train"],
    )

    trainer.train()

In [None]:
from peft import LoraConfig, get_peft_model, TaskType
from transformers import AutoModelForCausalLM

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(torch.cuda.is_available())
torch.cuda.empty_cache()

def LoRA_finetune():
    parser = HfArgumentParser((ModelArguments, DataArguments, TrainingArguments))  # 解析器
    model_args, data_args, training_args = parser.parse_args_into_dataclasses()
    
    model_path = "/kaggle/input/qwen2.5/transformers/3b/1"
    
    tokenizer = AutoTokenizer.from_pretrained(model_path)
    model = AutoModelForCausalLM.from_pretrained(model_path,torch_dtype="auto")
    model.to(device)
    
    peft_config = LoraConfig(
        task_type = TaskType.CAUSAL_LM,
        target_modules = ["q_proj","k_proj","v_proj","o_proj","gate_proj","up_proj","down_proj"],
        inference_mode = False,
        r = 8,
        lora_alpha = 32,
        lora_dropout = 0.1
    )
    
    model = get_peft_model(model, peft_config)
    model.is_parallelizable = True
    model.model_parallel = True
    model.print_trainable_parameters()

    dataset = datasets.load_dataset('csv', data_files=data_args.dataset_path)

    def data_collator(batch: List[Dict]):
        inputs = []
        labels = []
        mask = []
        max_length = 0
        SYSTEM_PROMPT = '''You are an intelligent assistant capable of answering various types of questions based on context. 
                            Carefully read each instruction and use logical reasoning to determine the best answer. 
                            Provide your answer after considering relevant input.'''

        for sample in batch:
            
            instruction_text = sample.get("instruction","")
            input_text = sample.get("input","")
            output_text = sample.get("output","")

            question = f"instruction:{instruction_text}input:{input_text}"
            # 因为prompt里面提到了instruction和input所以这里显式的给出
            # 鉴于目标是问答机器人，我思考是不是可以更换更符合语境的prompt

            messages = [
                {"role":"system","content":SYSTEM_PROMPT},
                {"role":"user","content":question}
            ]

            prompt = tokenizer.apply_chat_template(messages,tokenize=False,add_generation_prompt=True)

            model_inputs = tokenizer(
                [prompt], 
                return_tensors="pt", 
                max_length = tokenizer.model_max_length,
                truncation = True,
                padding = False,).input_ids  #[1,71]
            model_outputs = tokenizer(
                [output_text], 
                return_tensors="pt", 
                max_length = tokenizer.model_max_length,
                truncation = True,
                padding = False,).input_ids

            mask_inputs = tokenizer(
                [prompt], 
                return_tensors="pt", 
                max_length = tokenizer.model_max_length,
                truncation = True,
                padding = False,).attention_mask
            mask_outputs = tokenizer(
                [output_text], 
                return_tensors="pt", 
                max_length = tokenizer.model_max_length,
                truncation = True,
                padding = False,).attention_mask

            token = torch.tensor(tokenizer.pad_token_id).unsqueeze(0)
            token = token.unsqueeze(0)
            token_one = torch.tensor(1).unsqueeze(0).unsqueeze(0)

            full_inputs = torch.cat((model_inputs,model_outputs,token),dim=1)
            attention_mask = torch.cat((mask_inputs,mask_outputs,token_one),dim=1)
            label_length = model_inputs.shape[1] + model_outputs.shape[1] + 1 #201
            labels_tensor = torch.full((1,label_length), -100) #[201]
            labels_tensor[:,model_inputs.shape[1]:-1] = model_outputs
            labels_tensor[:,-1] = token
            # print(type(full_inputs),type(full_labels)) #list
            
            inputs.append(full_inputs)
            labels.append(labels_tensor)
            mask.append(attention_mask)
            # print(type(inputs),type(labels)) #list
            
            if model_outputs.shape[1] > max_length:
                max_length = model_outputs.shape[1]
                
        # 保持不变
        inputs = [torch.nn.functional.pad(single_input, (0,max_length - single_input.size(1)),value=tokenizer.pad_token_id) for single_input in inputs]
        labels = [torch.nn.functional.pad(single_label, (0, max_length - single_label.size(1)), value=-100) for single_label in labels]
        mask = [torch.nn.functional.pad(single_mask, (0, max_length - single_mask.size(1)), value=0) for single_mask in mask]

        inputs = torch.cat(inputs, dim=0)
        labels = torch.cat(labels, dim=0)
        mask = torch.cat(mask, dim=0)
        # print(type(mask))
    
            
        return {
            "input_ids": inputs,
            "labels": labels,
            "attention_mask": mask
        }

  
    trainer = Trainer(
        args = training_args,
        model = model, 
        tokenizer = tokenizer,
        data_collator = data_collator,
        train_dataset = dataset["train"],
    )

    trainer.train()

In [None]:
sys.argv = [
    "notebook", 
    "--output_dir", "/kaggle/working/qwen2.5/3b-lora",
    "--run_name","test_2",
    "--learning_rate","1e-5",
    "--num_train_epochs", "5",  # 通常3-5个epoch即可收敛，长时间训练可能会过拟合
    "--per_device_train_batch_size", "1",  # 每个GPU上的大小
    "--overwrite_output_dir","True",  #开发过程中覆盖旧的文件
    "--save_steps", "5000",
    "--save_total_limit", "10",
    "--logging_steps","50",
    "--logging_dir", "/kaggle/working/",
    "--remove_unused_columns","False",
    "--dataloader_drop_last", "True",
    '--seed','42',
    "--fp16","True",  # 开启混合精度加速
]
LoRA_finetune()
torch.cuda.empty_cache()

In [None]:
#合并基础模型和lora模型
import time
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
from peft import PeftModel

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_path = "/kaggle/input/qwen2.5/transformers/3b-instruct/1"
lora_dir = "/kaggle/input/lora-qwen2-5-1-5b-batchsize1/qwen2.5/1.5b-lora/checkpoint-103520" #换成自己跑出来的lora

tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(model_path, trust_remote_code=True)
model = PeftModel.from_pretrained(model, lora_dir).to(device)
#print(model)
# 合并model, 同时保存 token
model = model.merge_and_unload()
model.save_pretrained("lora_output_prompt8")
tokenizer.save_pretrained("lora_output_prompt8")