/
test_benchmark.py
106 lines (80 loc) · 4.05 KB
/
test_benchmark.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
# -*- coding: utf-8 -*-
"""
@author:XuMing(xuming624@qq.com)
@description:
"""
import os
import sys
import pandas as pd
sys.path.append('..')
from textgen import GptModel
pwd_path = os.path.abspath(os.path.dirname(__file__))
def get_alpaca_prompt(instruction):
return f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n### Instruction:{instruction}\n\n### Response: """
def get_chatglm_prompt(instruction):
return f"""{instruction}\n答:"""
def get_vicuna_prompt(instruction):
return f"""A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. USER: {instruction} ASSISTANT: """
sentences = [i.strip() for i in open(os.path.join(pwd_path, '../examples/data/llm_benchmark_test.txt')).readlines() if
i.strip()]
def test_llama_13b_lora():
m = GptModel('llama', "decapoda-research/llama-13b-hf", peft_name='shibing624/llama-13b-belle-zh-lora')
predict_sentences = [get_alpaca_prompt(s) for s in sentences]
res = m.predict(predict_sentences)
for s, i in zip(sentences, res):
print('input:', s, '\noutput:', i)
print()
res_dict = {'input': sentences, 'output': res}
df = pd.DataFrame.from_dict(res_dict)
json_file = os.path.join(pwd_path, 'llama_13b_lora_llm_benchmark_test_result.json')
df.to_json(json_file, force_ascii=False, orient='records', lines=True)
df.to_excel(json_file + '.xlsx', index=False)
def test_llama_7b_alpaca_plus():
m = GptModel('llama', "shibing624/chinese-alpaca-plus-7b-hf", args={'use_peft': False})
predict_sentences = [get_alpaca_prompt(s) for s in sentences]
res = m.predict(predict_sentences)
for s, i in zip(sentences, res):
print('input:', s, '\noutput:', i)
print()
res_dict = {'input': sentences, 'output': res}
df = pd.DataFrame.from_dict(res_dict)
json_file = os.path.join(pwd_path, 'llama_7b_alpaca_plus_llm_benchmark_test_result.json')
df.to_json(json_file, force_ascii=False, orient='records', lines=True)
df.to_excel(json_file + '.xlsx', index=False)
def test_llama_13b_alpaca_plus():
m = GptModel('llama', "shibing624/chinese-alpaca-plus-13b-hf", args={'use_peft': False})
predict_sentences = [get_alpaca_prompt(s) for s in sentences]
res = m.predict(predict_sentences)
for s, i in zip(sentences, res):
print('input:', s, '\noutput:', i)
print()
res_dict = {'input': sentences, 'output': res}
df = pd.DataFrame.from_dict(res_dict)
json_file = os.path.join(pwd_path, 'llama_13b_alpaca_plus_llm_benchmark_test_result.json')
df.to_json(json_file, force_ascii=False, orient='records', lines=True)
df.to_excel(json_file + '.xlsx', index=False)
def test_chatglm_6b():
m = GptModel('chatglm', "THUDM/chatglm-6b", peft_name=None, args={'use_peft': False})
predict_sentences = [get_chatglm_prompt(s) for s in sentences]
res = m.predict(predict_sentences)
for s, i in zip(sentences, res):
print('input:', s, '\noutput:', i)
print()
res_dict = {'input': sentences, 'output': res}
df = pd.DataFrame.from_dict(res_dict)
json_file = os.path.join(pwd_path, 'chatglm_6b_llm_benchmark_test_result.json')
df.to_json(json_file, force_ascii=False, orient='records', lines=True)
df.to_excel(json_file + '.xlsx', index=False)
def test_chatglm_6b_lora():
m = GptModel('chatglm', "THUDM/chatglm-6b", peft_name='shibing624/chatglm-6b-belle-zh-lora',
args={'use_peft': True}, )
predict_sentences = [get_chatglm_prompt(s) for s in sentences]
res = m.predict(predict_sentences)
for s, i in zip(sentences, res):
print('input:', s, '\noutput:', i)
print()
res_dict = {'input': sentences, 'output': res}
df = pd.DataFrame.from_dict(res_dict)
json_file = os.path.join(pwd_path, 'chatglm_6b_lora_llm_benchmark_test_result.json')
df.to_json(json_file, force_ascii=False, orient='records', lines=True)
df.to_excel(json_file + '.xlsx', index=False)