-
Notifications
You must be signed in to change notification settings - Fork 336
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[Feat] Add codegeex2 and Humanevalx (#210)
* add codegeex2 * add humanevalx dataset * add evaluator * update evaluator * update configs * update clean code * update configs * fix lint * remove sleep * fix lint * update docs * fix lint
- Loading branch information
Showing
10 changed files
with
448 additions
and
9 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
from mmengine.config import read_base | ||
|
||
with read_base(): | ||
from .humanevalx_gen_fd5822 import humanevalx_datasets # noqa: F401, F403 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
from opencompass.openicl.icl_prompt_template import PromptTemplate | ||
from opencompass.openicl.icl_retriever import ZeroRetriever | ||
from opencompass.openicl.icl_inferencer import GenInferencer | ||
from opencompass.datasets import HumanevalXDataset, HumanevalXEvaluator | ||
|
||
humanevalx_reader_cfg = dict( | ||
input_columns=['prompt'], output_column='task_id', train_split='test') | ||
|
||
humanevalx_infer_cfg = dict( | ||
prompt_template=dict( | ||
type=PromptTemplate, | ||
template='{prompt}'), | ||
retriever=dict(type=ZeroRetriever), | ||
inferencer=dict(type=GenInferencer, max_out_len=1024)) | ||
|
||
humanevalx_eval_cfg_dict = { | ||
lang : dict( | ||
evaluator=dict( | ||
type=HumanevalXEvaluator, | ||
language=lang, | ||
ip_address="localhost", # replace to your code_eval_server ip_address, port | ||
port=5000), # refer to https://github.com/Ezra-Yu/code-evaluator to launch a server | ||
pred_role='BOT') | ||
for lang in ['python', 'cpp', 'go', 'java', 'js'] # do not support rust now | ||
} | ||
|
||
humanevalx_datasets = [ | ||
dict( | ||
type=HumanevalXDataset, | ||
abbr=f'humanevalx-{lang}', | ||
language=lang, | ||
path='./data/humanevalx', | ||
reader_cfg=humanevalx_reader_cfg, | ||
infer_cfg=humanevalx_infer_cfg, | ||
eval_cfg=humanevalx_eval_cfg_dict[lang]) | ||
for lang in ['python', 'cpp', 'go', 'java', 'js'] | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
from mmengine.config import read_base | ||
|
||
with read_base(): | ||
from .datasets.humanevalx.humanevalx_gen import humanevalx_datasets | ||
from .models.hf_codegeex2_6b import models | ||
|
||
datasets = humanevalx_datasets |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
from opencompass.models import HuggingFace | ||
|
||
# refer to https://github.com/THUDM/CodeGeeX2/tree/main | ||
# For pass@1 : n=20 , temperature=0.2, top_p=0.95 | ||
# For Pass@10 : n=200, temperature=0.8, top_p=0.95 | ||
# For Pass@100 : n=200, temperature=0.8, top_p=0.95 | ||
|
||
models = [ | ||
dict( | ||
type=HuggingFace, | ||
abbr='codegeex2-6b', | ||
path='THUDM/codegeex2-6b', | ||
tokenizer_path='THUDM/codegeex2-6b', | ||
tokenizer_kwargs=dict( | ||
padding_side='left', | ||
truncation_side='left', | ||
trust_remote_code=True, | ||
), | ||
max_out_len=1024, | ||
max_seq_len=2048, | ||
batch_size=8, | ||
model_kwargs=dict(trust_remote_code=True, device_map='auto'), | ||
run_cfg=dict(num_gpus=1, num_procs=1), | ||
) | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
# Code Evaluation Service | ||
|
||
We support evaluating datasets of multiple programming languages, similar to [humaneval-x](https://huggingface.co/datasets/THUDM/humaneval-x). Before starting, make sure that you have started the code evaluation service. You can refer to the [code-evaluator](https://github.com/Ezra-Yu/code-evaluator) project for the code evaluation service. | ||
|
||
## Launching the Code Evaluation Service | ||
|
||
Make sure you have installed Docker, then build an image and run a container service. | ||
|
||
Build the Docker image: | ||
|
||
```shell | ||
git clone https://github.com/Ezra-Yu/code-evaluator.git | ||
cd code-evaluator/docker | ||
sudo docker build -t code-eval:latest . | ||
``` | ||
|
||
After obtaining the image, create a container with the following commands: | ||
|
||
```shell | ||
# Log output format | ||
sudo docker run -it -p 5000:5000 code-eval:latest python server.py | ||
|
||
# Run the program in the background | ||
# sudo docker run -itd -p 5000:5000 code-eval:latest python server.py | ||
|
||
# Using different ports | ||
# sudo docker run -itd -p 5001:5001 code-eval:latest python server.py --port 5001 | ||
``` | ||
|
||
Ensure that you can access the service and check the following commands (skip this step if you are running the service on a local host): | ||
|
||
```shell | ||
ping your_service_ip_address | ||
telnet your_service_ip_address your_service_port | ||
``` | ||
|
||
```note | ||
If computing nodes cannot connect to the evaluation service, you can directly run `python run.py xxx...`. The resulting code will be saved in the 'outputs' folder. After migration, use [code-evaluator](https://github.com/Ezra-Yu/code-evaluator) directly to get the results (no need to consider the eval_cfg configuration later). | ||
``` | ||
|
||
## Configuration File | ||
|
||
We have provided the [configuration file](https://github.com/InternLM/opencompass/blob/main/configs/eval_codegeex2.py) for evaluating huamaneval-x on codegeex2 . | ||
|
||
The dataset and related post-processing configuration files can be found at this [link](https://github.com/InternLM/opencompass/tree/main/configs/datasets/humanevalx). Note the `evaluator` field in `humanevalx_eval_cfg_dict`. | ||
|
||
```python | ||
from opencompass.openicl.icl_prompt_template import PromptTemplate | ||
from opencompass.openicl.icl_retriever import ZeroRetriever | ||
from opencompass.openicl.icl_inferencer import GenInferencer | ||
from opencompass.datasets import HumanevalXDataset, HumanevalXEvaluator | ||
|
||
humanevalx_reader_cfg = dict( | ||
input_columns=['prompt'], output_column='task_id', train_split='test') | ||
|
||
humanevalx_infer_cfg = dict( | ||
prompt_template=dict( | ||
type=PromptTemplate, | ||
template='{prompt}'), | ||
retriever=dict(type=ZeroRetriever), | ||
inferencer=dict(type=GenInferencer, max_out_len=1024)) | ||
|
||
humanevalx_eval_cfg_dict = { | ||
lang : dict( | ||
evaluator=dict( | ||
type=HumanevalXEvaluator, | ||
language=lang, | ||
ip_address="localhost", # replace to your code_eval_server ip_address, port | ||
port=5000), # refer to https://github.com/Ezra-Yu/code-evaluator to launch a server | ||
pred_role='BOT') | ||
for lang in ['python', 'cpp', 'go', 'java', 'js'] # do not support rust now | ||
} | ||
|
||
humanevalx_datasets = [ | ||
dict( | ||
type=HumanevalXDataset, | ||
abbr=f'humanevalx-{lang}', | ||
language=lang, | ||
path='./data/humanevalx', | ||
reader_cfg=humanevalx_reader_cfg, | ||
infer_cfg=humanevalx_infer_cfg, | ||
eval_cfg=humanevalx_eval_cfg_dict[lang]) | ||
for lang in ['python', 'cpp', 'go', 'java', 'js'] | ||
] | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
# 代码评测服务 | ||
|
||
我们支持评测多编程语言的数据集,类似 [humaneval-x](https://huggingface.co/datasets/THUDM/humaneval-x). 在启动之前需要确保你已经启动了代码评测服务,代码评测服务可参考[code-evaluator](https://github.com/Ezra-Yu/code-evaluator)项目。 | ||
|
||
## 启动代码评测服务 | ||
|
||
确保您已经安装了 docker,然后构建一个镜像并运行一个容器服务。 | ||
|
||
构建 Docker 镜像: | ||
|
||
```shell | ||
git clone https://github.com/Ezra-Yu/code-evaluator.git | ||
cd code-evaluator/docker | ||
sudo docker build -t code-eval:latest . | ||
``` | ||
|
||
获取镜像后,使用以下命令创建容器: | ||
|
||
```shell | ||
# 输出日志格式 | ||
sudo docker run -it -p 5000:5000 code-eval:latest python server.py | ||
|
||
# 在后台运行程序 | ||
# sudo docker run -itd -p 5000:5000 code-eval:latest python server.py | ||
|
||
# 使用不同的端口 | ||
# sudo docker run -itd -p 5001:5001 code-eval:latest python server.py --port 5001 | ||
``` | ||
|
||
确保您能够访问服务,检查以下命令(如果在本地主机中运行服务,就跳过这个操作): | ||
|
||
```shell | ||
ping your_service_ip_address | ||
telnet your_service_ip_address your_service_port | ||
``` | ||
|
||
```note | ||
如果运算节点不能连接到评估服务,也可直接运行 `python run.py xxx...`,代码生成结果会保存在 'outputs' 文件夹下,迁移后直接使用 [code-evaluator](https://github.com/Ezra-Yu/code-evaluator) 评测得到结果(不需要考虑后面 eval_cfg 的配置)。 | ||
``` | ||
|
||
## 配置文件 | ||
|
||
我么已经给了 huamaneval-x 在 codegeex2 上评估的[配置文件](https://github.com/InternLM/opencompass/blob/main/configs/eval_codegeex2.py)。 | ||
|
||
其中数据集以及相关后处理的配置文件为这个[链接](https://github.com/InternLM/opencompass/tree/main/configs/datasets/humanevalx), 需要注意 `humanevalx_eval_cfg_dict` 中的 | ||
`evaluator` 字段。 | ||
|
||
```python | ||
from opencompass.openicl.icl_prompt_template import PromptTemplate | ||
from opencompass.openicl.icl_retriever import ZeroRetriever | ||
from opencompass.openicl.icl_inferencer import GenInferencer | ||
from opencompass.datasets import HumanevalXDataset, HumanevalXEvaluator | ||
|
||
humanevalx_reader_cfg = dict( | ||
input_columns=['prompt'], output_column='task_id', train_split='test') | ||
|
||
humanevalx_infer_cfg = dict( | ||
prompt_template=dict( | ||
type=PromptTemplate, | ||
template='{prompt}'), | ||
retriever=dict(type=ZeroRetriever), | ||
inferencer=dict(type=GenInferencer, max_out_len=1024)) | ||
|
||
humanevalx_eval_cfg_dict = { | ||
lang : dict( | ||
evaluator=dict( | ||
type=HumanevalXEvaluator, | ||
language=lang, | ||
ip_address="localhost", # replace to your code_eval_server ip_address, port | ||
port=5000), # refer to https://github.com/Ezra-Yu/code-evaluator to launch a server | ||
pred_role='BOT') | ||
for lang in ['python', 'cpp', 'go', 'java', 'js'] # do not support rust now | ||
} | ||
|
||
humanevalx_datasets = [ | ||
dict( | ||
type=HumanevalXDataset, | ||
abbr=f'humanevalx-{lang}', | ||
language=lang, | ||
path='./data/humanevalx', | ||
reader_cfg=humanevalx_reader_cfg, | ||
infer_cfg=humanevalx_infer_cfg, | ||
eval_cfg=humanevalx_eval_cfg_dict[lang]) | ||
for lang in ['python', 'cpp', 'go', 'java', 'js'] | ||
] | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.