In [3]:
import json
import time
from vllm import LLM, SamplingParams
from transformers import AutoTokenizer

In [None]:
model_path = "/hpc2hdd/home/dxing004/jhaidata/temp/LLaMA-Factory/export/lora_422_toxiCHCO_generalprompt"
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
sampling_params = SamplingParams(temperature=0, top_p=0.7)
llm = LLM(model=model_path, trust_remote_code=True)

In [5]:
# 从JSON文件中读取数据
def load_data_from_json(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        return json.load(file)

def build_prompt(message):
    messages = [
        {"role": "system", "content": message['instruction']},
        {"role": "user", "content": message['input']}
    ]
    text = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True)
    return text

def generate_prompts(data):
    prompts = []
    for item in data:
        text = build_prompt(item)
        prompts.append(text)
    return prompts

# 创建推理函数
def batch_inference(prompts, llm, sampling_params):
    start_time = time.time()  # 记录开始时间
    outputs = llm.generate(prompts, sampling_params)
    end_time = time.time()  # 记录结束时间
    inference_time = end_time - start_time  # 计算总耗时
    return outputs, inference_time

In [6]:
# 从JSON文件中读取数据
def load_data_from_json(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        return json.load(file)

def generate_prompts(data):
    prompts = []
    for item in data:
        messages = [
        {"role": "system", "content": item['instruction']},
        {"role": "user", "content": item['input']}]
        text = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True)
        prompts.append(text)
    return prompts

# 创建推理函数
def batch_inference(prompts, llm, sampling_params):
    start_time = time.time()  # 记录开始时间
    outputs = llm.generate(prompts, sampling_params)
    end_time = time.time()  # 记录结束时间
    inference_time = end_time - start_time  # 计算总耗时
    return outputs, inference_time

In [7]:
def save_results_to_jsonl(data, outputs, file_path):
    with open(file_path, 'w', encoding='utf-8') as file:
        for item, output in zip(data, outputs):
            result_dict = {
                "label": item['output'],
                "predict": output.outputs[0].text.strip()
                }
            json_line = json.dumps(result_dict, ensure_ascii=False) + "\n"
            file.write(json_line)

In [5]:
def save_results_to_jsonl(data, outputs, file_path):
    with open(file_path, 'w', encoding='utf-8') as file:
        for item, output in zip(data, outputs):
            generated_text = output.outputs[0].text.strip()
            is_not_hate = any(word in generated_text for word in ["not hate", "not toxic", "无害", "非有害", "无毒", "非有毒", '不仇恨', "No", "没有", "不", "no"])
            predict_label = "not hate" if is_not_hate else item['output']
            result_dict = {
                "label": item['output'],
                "predict": predict_label
            }
            json_line = json.dumps(result_dict, ensure_ascii=False) + "\n"
            file.write(json_line)


In [6]:
def inference(data_path,save_path):
    # 加载数据
    data = load_data_from_json(data_path)
    
    # 生成提示
    prompts = generate_prompts(data)

    # 执行批量推理，并记录总耗时
    outputs, total_inference_time = batch_inference(prompts, llm, sampling_params)

    save_results_to_jsonl(data, outputs, save_path)

    # 分析结果
    correct_predictions = 0
    for output, item in zip(outputs, data):
        generated_text = output.outputs[0].text.strip()
        if generated_text == item['output']:
            correct_predictions += 1
    
    # 计算统计信息
    accuracy = correct_predictions / len(data)
    inference_speed = len(data) / total_inference_time  # 推理速度（样本/秒）

    # 打印结果
    print(data_path)
    print(f"准确率: {accuracy * 100:.2f}%")
    print(f"总耗时: {total_inference_time:.2f}秒")
    print(f"样本个数: {len(data)}")
    print(f"推理速度: {inference_speed:.2f}样本/秒")
    

In [10]:
data_path = '/hpc2hdd/home/dxing004/project/zh/COLD_test_generalprompt.json'
save_path = '/hpc2hdd/home/dxing004/project/result/test4.jsonl'
inference(data_path,save_path)

Processed prompts: 100%|██████████| 5323/5323 [00:29<00:00, 180.16it/s]

/hpc2hdd/home/dxing004/project/zh/COLD_test_generalprompt.json
准确率: 79.94%
总耗时: 31.85秒
样本个数: 5323
推理速度: 167.14样本/秒





In [22]:
data_path = '/hpc2hdd/home/dxing004/project/zh/COLD_test_fewprompt.json'
save_path = '/hpc2hdd/home/dxing004/project/result/test5.jsonl'
inference(data_path,save_path)

Processed prompts: 100%|██████████| 5323/5323 [00:44<00:00, 118.56it/s]


/hpc2hdd/home/dxing004/project/zh/COLD_test_fewprompt.json
准确率: 72.85%
总耗时: 48.19秒
样本个数: 5323
推理速度: 110.45样本/秒


In [9]:
data_path = '/hpc2hdd/home/dxing004/project/outputdata/CHSD_test_generalprompt.json'
save_path = '/hpc2hdd/home/dxing004/project/result/test3.jsonl'
inference(data_path,save_path)

Processed prompts: 100%|██████████| 3842/3842 [00:21<00:00, 180.55it/s]

/hpc2hdd/home/dxing004/project/outputdata/CHSD_test_generalprompt.json
准确率: 77.75%
总耗时: 22.90秒
样本个数: 3842
推理速度: 167.80样本/秒





In [23]:
data_path = '/hpc2hdd/home/dxing004/project/zh/CHSD_test_fewprompt.json'
save_path = '/hpc2hdd/home/dxing004/project/result/test6.jsonl'
inference(data_path,save_path)

Processed prompts: 100%|██████████| 3842/3842 [00:32<00:00, 117.38it/s]


/hpc2hdd/home/dxing004/project/zh/CHSD_test_fewprompt.json
准确率: 68.84%
总耗时: 35.10秒
样本个数: 3842
推理速度: 109.45样本/秒


: 

In [7]:
data_path = '/hpc2hdd/home/dxing004/project/zh/ToxiCN_test_generalprompt.json'
save_path = '/hpc2hdd/home/dxing004/project/result/test1.jsonl'
inference(data_path,save_path)

Processed prompts: 100%|██████████| 2402/2402 [00:45<00:00, 52.35it/s] 

/hpc2hdd/home/dxing004/project/zh/ToxiCN_test_generalprompt.json
准确率: 72.11%
总耗时: 46.92秒
样本个数: 2402
推理速度: 51.19样本/秒





In [12]:
import json
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

def inference(data_path,save_path):
    # 加载数据
    data = load_data_from_json(data_path)
    
    # 生成提示
    prompts = generate_prompts(data)

    # 执行批量推理，并记录总耗时
    outputs, total_inference_time = batch_inference(prompts, llm, sampling_params)

    save_results_to_jsonl(data, outputs, save_path)


    binary_labels = []
    predictions = []
    length=len(data)
    with open(save_path, 'r') as file:
        for line in file:
            data = json.loads(line)
            label = 1 if data['label'] == 'hate' else 0
            predict = 1 if data['predict'] == 'hate' else 0
            binary_labels.append(label)
            predictions.append(predict)

    # 计算评估指标
    accuracy = accuracy_score(binary_labels, predictions)
    precision = precision_score(binary_labels, predictions)
    recall = recall_score(binary_labels, predictions)
    f1 = f1_score(binary_labels, predictions)
    
    inference_speed = length / total_inference_time  # 推理速度（样本/秒）

    # 打印结果
    print(data_path)
    print("Accuracy:", accuracy)
    print("Precision:", precision)
    print("Recall:", recall)
    print("F1-score:", f1)
    print(f"总耗时: {total_inference_time:.2f}秒")
    print(f"样本个数: {length}")
    print(f"推理速度: {inference_speed:.2f}样本/秒")
    

In [13]:
data_path = '/hpc2hdd/home/dxing004/project/zh/ToxiCN_test_generalprompt.json'
save_path = '/hpc2hdd/home/dxing004/project/result/test1.jsonl'
inference(data_path,save_path)

Processed prompts: 100%|██████████| 2402/2402 [00:12<00:00, 186.71it/s]

/hpc2hdd/home/dxing004/project/zh/ToxiCN_test_generalprompt.json
Accuracy: 0.7210657785179018
Precision: 0.8390928725701944
Recall: 0.598613251155624
F1-score: 0.6987410071942446
总耗时: 13.83秒
样本个数: 2402
推理速度: 173.63样本/秒





: 