In [None]:
import os
import torch
import pandas as pd
from transformers import AutoModelForCausalLM, BitsAndBytesConfig, AutoTokenizer
from peft import PeftModel

In [None]:
device_num = 2
torch.cuda.set_device(device_num)
device = f"cuda:{device_num}"
torch.cuda.empty_cache()

In [None]:
df = pd.read_csv("test_dataset.csv", delimiter="\t")

In [None]:
def generate_text(prompt:str, tokenizer:AutoTokenizer, model:AutoModelForCausalLM):
    inputs = tokenizer(prompt, return_tensors="pt", return_token_type_ids=False).to(model.device)
    with torch.inference_mode():
        response = model.generate(
            **inputs,
            max_new_tokens=400,
            eos_token_id=tokenizer.eos_token_id,
            num_beams=4,
            no_repeat_ngram_size=3
        )
    output = tokenizer.decode(response[0])
    return output

In [None]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
)

In [None]:
peft_model = "path-to-qlora-weights"
name = "path-to-base-model"

In [None]:
model = AutoModelForCausalLM.from_pretrained(name, trust_remote_code=True, config=bnb_config)
model = PeftModel.from_pretrained(model, peft_model)
model = model.merge_and_unload()

tokenizer = AutoTokenizer.from_pretrained(peft_model)

In [None]:
result_df = pd.DataFrame()

for ind, prompt in df.iterrows():
    answer = generate_text(prompt, tokenizer, model)
    result_df.loc[ind, f'{name}-answer'] = answer
result_df.to_csv("res.csv")