## 使用微调后的 LLaMA2-7B 推理

In [1]:
import torch
from peft import AutoPeftModelForCausalLM
from transformers import AutoTokenizer


model_dir = "./models/llama-7-int4-dolly-20240503_101722"
 
# 加载基础LLM模型与分词器
model = AutoPeftModelForCausalLM.from_pretrained(
    model_dir,
    low_cpu_mem_usage=True,
    torch_dtype=torch.float16,
    load_in_4bit=True
) 
tokenizer = AutoTokenizer.from_pretrained(model_dir)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [4]:
from datasets import load_dataset 
from random import randrange
 
 
# 从hub加载数据集并得到一个样本
dataset = load_dataset("databricks/databricks-dolly-15k", split="train")
sample = dataset[randrange(len(dataset))]
 
prompt = f"""### Instruction:
Use the Input below to create an instruction, which could have been used to generate the input using an LLM. 
 
### Input:
{sample['response']}
 
### Response:
"""
 
input_ids = tokenizer(prompt, return_tensors="pt", truncation=True).input_ids.cuda()

outputs = model.generate(input_ids=input_ids, max_new_tokens=100, do_sample=True, top_p=0.9,temperature=0.9)

print(f"Prompt:\n{sample['response']}\n")
print(f"Generated instruction:\n{tokenizer.batch_decode(outputs.detach().cpu().numpy(), skip_special_tokens=True)[0][len(prompt):]}")
print(f"Ground truth:\n{sample['instruction']}")

Prompt:
Adam Sandler: Actor
Harry Kane: Athlete
Hugo Lloris: Athlete
Mike Myers: Actor
Tyreek Hill: Athlete
Ryan Gosling: Actor
Joel Embiid: Athlete

Generated instruction:
Classify the following people as athletes or actors: Adam Sandler, Harry Kane, Hugo Lloris, Mike Myers, Tyreek Hill, Ryan Gosling, Joel Embiid

Ground truth:
Classify each of the following people as an actor or athlete: Adam Sandler, Harry Kane, Hugo Lloris, Mike Myers, Tyreek Hill, Ryan Gosling, Joel Embiid
