In [1]:
import torch
from peft import PeftModel
import json
from datasets import Dataset
import pandas as pd
from transformers import AutoTokenizer, AutoModelForCausalLM, DataCollatorForSeq2Seq, TrainingArguments, Trainer, GenerationConfig
from peft import LoraConfig, TaskType, get_peft_model

In [2]:
mode_path = '/mnt/qwen2.5-72B/llm-research/meta-llama-3___1-8b-instruct'
lora_path = '/mnt/qwen2.5-72B/lora' # 这里改称你的 lora 输出对应 checkpoint 地址

In [3]:
tokenizer = AutoTokenizer.from_pretrained(mode_path, use_fast=False, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token

In [4]:
def process_func(example):
    MAX_LENGTH = 120000    # Llama分词器会将一个中文字切分为多个token，因此需要放开一些最大长度，保证数据的完整性
    input_ids, attention_mask, labels = [], [], []
    instruction = tokenizer(f"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nYou are given a scientific article and a question. Answer the question as concisely as you can, using a single phrase or sentence if possible. At the same time, provide a step-by-step reasoning process according to the article to answer the question. If the question cannot be answered based on the information in the article, write \"unanswerable\". If the question is a yes/no question, answer \"yes\", \"no\", or \"unanswerable\". Do not provide any explanation in Answer but in Reasoning.Please provide your answer and reasoning according to the following format: Answer: [Your concise final answer here], Reasoning: [Your step-by-step thought process here]. Remember to keep the Answer and Reasoning separate and not mix them together. The Answer should be brief and to the point, while the Reasoning can be more detailed to explain your thought process.<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nArticle{example['context']}\n\nHere is the following question. Question: {example['input']}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n", add_special_tokens=False)  # add_special_tokens 不在开头加 special_tokens
    response = tokenizer(f"{example['output']}<|eot_id|>", add_special_tokens=False)
    input_ids = instruction["input_ids"] + response["input_ids"] + [tokenizer.pad_token_id]
    attention_mask = instruction["attention_mask"] + response["attention_mask"] + [1]  # 因为eos token咱们也是要关注的所以 补充为1
    labels = [-100] * len(instruction["input_ids"]) + response["input_ids"] + [tokenizer.pad_token_id]
    print(len(input_ids))
    if len(input_ids) > MAX_LENGTH:  # 做一个截断
        input_ids = input_ids[:MAX_LENGTH]
        attention_mask = attention_mask[:MAX_LENGTH]
        labels = labels[:MAX_LENGTH]
    return {
        "input_ids": input_ids,
        "attention_mask": attention_mask,
        "labels": labels
    }

In [6]:
df=pd.read_csv('dataset/new_dataset.csv')

In [7]:
ds = Dataset.from_pandas(df)

In [8]:
tokenized_id = ds.map(process_func, remove_columns=ds.column_names)
tokenized_id

Map:   0%|          | 0/122 [00:00<?, ? examples/s]

4171
3503
3149
4611
5545
6670
3662
5494
4765
3407
3541
15735
5313
5274
7986
5507
3303
9401
2984
5504
10789
3898
2828
6015
2858
4406
2642
4510
5748
3746
6067
4081
19713
5304
4967
21448
6786
3280
3801
4531
4170
2715
5298
10168
3303
5669
6742
9825
4184
3929
5905
4542
3550
5521
2742
6879
5814
2705
3920
4932
2288
2393
6065
6764
3426
7689
6045
3330
2284
2645
6266
4611
5297
3244
3799
3867
3247
4833
4079
3802
5777
3485
4158
8212
2885
5771
2633
3522
3150
2895
5765
3474
4380
6142
3983
5904
6636
2966
5121
5903
2796
3001
3367
4428
5313
3329
6352
8615
2879
3099
5332
5450
3830
4166
4475
4272
5674
6066
9826
6065
2657
6970


Dataset({
    features: ['input_ids', 'attention_mask', 'labels'],
    num_rows: 122
})

In [9]:
import torch
model = AutoModelForCausalLM.from_pretrained(mode_path, device_map="cuda:0",torch_dtype=torch.bfloat16)
model

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(128256, 4096)
    (layers): ModuleList(
      (0-31): 32 x LlamaDecoderLayer(
        (self_attn): LlamaSdpaAttention(
          (q_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear(in_features=4096, out_features=1024, bias=False)
          (v_proj): Linear(in_features=4096, out_features=1024, bias=False)
          (o_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (rotary_emb): LlamaRotaryEmbedding()
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear(in_features=4096, out_features=14336, bias=False)
          (up_proj): Linear(in_features=4096, out_features=14336, bias=False)
          (down_proj): Linear(in_features=14336, out_features=4096, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm((4096,), eps=1e-05)
        (post_attention_layernorm): LlamaRMSNorm((4096,), eps=1e-05)
      )
    )
    (n

In [None]:
model.enable_input_require_grads() 开启梯度检查点时，要执行该方法