Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Dataset] LongBench #236

Merged
merged 1 commit into from
Aug 21, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from mmengine.config import read_base

with read_base():
from .longbench_2wikimqa_gen_6b3efc import LongBench_2wikimqa_datasets # noqa: F401, F403
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import LongBenchF1Evaluator, LongBench2wikimqaDataset

LongBench_2wikimqa_reader_cfg = dict(
input_columns=['context', 'input'],
output_column='answers',
train_split='test',
test_split='test'
)

LongBench_2wikimqa_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(role='HUMAN', prompt='Answer the question based on the given passages. Only give me the answer and do not output any other words.\n\nThe following are given passages.\n{context}\n\nAnswer the question based on the given passages. Only give me the answer and do not output any other words.\n\nQuestion: {input}\nAnswer:'),
], )),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=32)
)

LongBench_2wikimqa_eval_cfg = dict(
evaluator=dict(type=LongBenchF1Evaluator),
pred_role='BOT'
)

LongBench_2wikimqa_datasets = [
dict(
type=LongBench2wikimqaDataset,
abbr='LongBench_2wikimqa',
path='THUDM/LongBench',
name='2wikimqa',
reader_cfg=LongBench_2wikimqa_reader_cfg,
infer_cfg=LongBench_2wikimqa_infer_cfg,
eval_cfg=LongBench_2wikimqa_eval_cfg)
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from mmengine.config import read_base

with read_base():
from .longbench_dureader_gen_c6c7e4 import LongBench_dureader_datasets # noqa: F401, F403
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import LongBenchRougeEvaluator, LongBenchdureaderDataset

LongBench_dureader_reader_cfg = dict(
input_columns=['context', 'input'],
output_column='answers',
train_split='test',
test_split='test'
)

LongBench_dureader_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(role='HUMAN', prompt='请基于给定的文章回答下述问题。\n\n文章:{context}\n\n请基于上述文章回答下面的问题。\n\n问题:{input}\n回答:'),
], )),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=128)
)

LongBench_dureader_eval_cfg = dict(
evaluator=dict(type=LongBenchRougeEvaluator, language='zh'),
pred_role='BOT'
)

LongBench_dureader_datasets = [
dict(
type=LongBenchdureaderDataset,
abbr='LongBench_dureader',
path='THUDM/LongBench',
name='dureader',
reader_cfg=LongBench_dureader_reader_cfg,
infer_cfg=LongBench_dureader_infer_cfg,
eval_cfg=LongBench_dureader_eval_cfg)
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from mmengine.config import read_base

with read_base():
from .longbench_gov_report_gen_54c5b0 import LongBench_gov_report_datasets # noqa: F401, F403
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import LongBenchRougeEvaluator, LongBenchgov_reportDataset

LongBench_gov_report_reader_cfg = dict(
input_columns=['context'],
output_column='answers',
train_split='test',
test_split='test'
)

LongBench_gov_report_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(role='HUMAN', prompt='You are given a report by a government agency. Write a one-page summary of the report.\n\nReport:\n{context}\n\nNow, write a one-page summary of the report.\n\nSummary:'),
], )),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=512)
)

LongBench_gov_report_eval_cfg = dict(
evaluator=dict(type=LongBenchRougeEvaluator),
pred_role='BOT'
)

LongBench_gov_report_datasets = [
dict(
type=LongBenchgov_reportDataset,
abbr='LongBench_gov_report',
path='THUDM/LongBench',
name='gov_report',
reader_cfg=LongBench_gov_report_reader_cfg,
infer_cfg=LongBench_gov_report_infer_cfg,
eval_cfg=LongBench_gov_report_eval_cfg)
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from mmengine.config import read_base

with read_base():
from .longbench_hotpotqa_gen_6b3efc import LongBench_hotpotqa_datasets # noqa: F401, F403
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import LongBenchF1Evaluator, LongBenchhotpotqaDataset

LongBench_hotpotqa_reader_cfg = dict(
input_columns=['context', 'input'],
output_column='answers',
train_split='test',
test_split='test'
)

LongBench_hotpotqa_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(role='HUMAN', prompt='Answer the question based on the given passages. Only give me the answer and do not output any other words.\n\nThe following are given passages.\n{context}\n\nAnswer the question based on the given passages. Only give me the answer and do not output any other words.\n\nQuestion: {input}\nAnswer:'),
], )),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=32)
)

LongBench_hotpotqa_eval_cfg = dict(
evaluator=dict(type=LongBenchF1Evaluator),
pred_role='BOT'
)

LongBench_hotpotqa_datasets = [
dict(
type=LongBenchhotpotqaDataset,
abbr='LongBench_hotpotqa',
path='THUDM/LongBench',
name='hotpotqa',
reader_cfg=LongBench_hotpotqa_reader_cfg,
infer_cfg=LongBench_hotpotqa_infer_cfg,
eval_cfg=LongBench_hotpotqa_eval_cfg)
]
4 changes: 4 additions & 0 deletions configs/datasets/longbench/longbenchlcc/longbench_lcc_gen.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from mmengine.config import read_base

with read_base():
from .longbench_lcc_gen_6ba507 import LongBench_lcc_datasets # noqa: F401, F403
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import LongBenchCodeSimEvaluator, LongBenchlccDataset

LongBench_lcc_reader_cfg = dict(
input_columns=['context'],
output_column='answers',
train_split='test',
test_split='test'
)

LongBench_lcc_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(role='HUMAN', prompt='Please complete the code given below. \n{context}Next line of code:\n'),
], )),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=64)
)

LongBench_lcc_eval_cfg = dict(
evaluator=dict(type=LongBenchCodeSimEvaluator),
pred_role='BOT'
)

LongBench_lcc_datasets = [
dict(
type=LongBenchlccDataset,
abbr='LongBench_lcc',
path='THUDM/LongBench',
name='lcc',
reader_cfg=LongBench_lcc_reader_cfg,
infer_cfg=LongBench_lcc_infer_cfg,
eval_cfg=LongBench_lcc_eval_cfg)
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from mmengine.config import read_base

with read_base():
from .longbench_lsht_gen_e8a339 import LongBench_lsht_datasets # noqa: F401, F403
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import LongBenchClassificationEvaluator, LongBenchlshtDataset

LongBench_lsht_reader_cfg = dict(
input_columns=['context', 'input'],
output_column='all_labels',
train_split='test',
test_split='test'
)

LongBench_lsht_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(role='HUMAN', prompt='请判断给定新闻的类别,下面是一些例子。\n\n{context}\n{input}'),
], )),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=64)
)

LongBench_lsht_eval_cfg = dict(
evaluator=dict(type=LongBenchClassificationEvaluator),
pred_role='BOT'
)

LongBench_lsht_datasets = [
dict(
type=LongBenchlshtDataset,
abbr='LongBench_lsht',
path='THUDM/LongBench',
name='lsht',
reader_cfg=LongBench_lsht_reader_cfg,
infer_cfg=LongBench_lsht_infer_cfg,
eval_cfg=LongBench_lsht_eval_cfg)
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from mmengine.config import read_base

with read_base():
from .longbench_multifieldqa_en_gen_d3838e import LongBench_multifieldqa_en_datasets # noqa: F401, F403
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import LongBenchF1Evaluator, LongBenchmultifieldqa_enDataset

LongBench_multifieldqa_en_reader_cfg = dict(
input_columns=['context', 'input'],
output_column='answers',
train_split='test',
test_split='test'
)

LongBench_multifieldqa_en_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(role='HUMAN', prompt='Read the following text and answer briefly.\n\n{context}\n\nNow, answer the following question based on the above text, only give me the answer and do not output any other words.\n\nQuestion: {input}\nAnswer:'),
], )),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=64)
)

LongBench_multifieldqa_en_eval_cfg = dict(
evaluator=dict(type=LongBenchF1Evaluator),
pred_role='BOT'
)

LongBench_multifieldqa_en_datasets = [
dict(
type=LongBenchmultifieldqa_enDataset,
abbr='LongBench_multifieldqa_en',
path='THUDM/LongBench',
name='multifieldqa_en',
reader_cfg=LongBench_multifieldqa_en_reader_cfg,
infer_cfg=LongBench_multifieldqa_en_infer_cfg,
eval_cfg=LongBench_multifieldqa_en_eval_cfg)
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from mmengine.config import read_base

with read_base():
from .longbench_multifieldqa_zh_gen_e9a7ef import LongBench_multifieldqa_zh_datasets # noqa: F401, F403
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import LongBenchF1Evaluator, LongBenchmultifieldqa_zhDataset

LongBench_multifieldqa_zh_reader_cfg = dict(
input_columns=['context', 'input'],
output_column='answers',
train_split='test',
test_split='test'
)

LongBench_multifieldqa_zh_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(role='HUMAN', prompt='阅读以下文字并用中文简短回答:\n\n{context}\n\n现在请基于上面的文章回答下面的问题,只告诉我答案,不要输出任何其他字词。\n\n问题:{input}\n回答:'),
], )),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=64)
)

LongBench_multifieldqa_zh_eval_cfg = dict(
evaluator=dict(type=LongBenchF1Evaluator, language='zh'),
pred_role='BOT'
)

LongBench_multifieldqa_zh_datasets = [
dict(
type=LongBenchmultifieldqa_zhDataset,
abbr='LongBench_multifieldqa_zh',
path='THUDM/LongBench',
name='multifieldqa_zh',
reader_cfg=LongBench_multifieldqa_zh_reader_cfg,
infer_cfg=LongBench_multifieldqa_zh_infer_cfg,
eval_cfg=LongBench_multifieldqa_zh_eval_cfg)
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from mmengine.config import read_base

with read_base():
from .longbench_musique_gen_6b3efc import LongBench_musique_datasets # noqa: F401, F403
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import LongBenchF1Evaluator, LongBenchmusiqueDataset

LongBench_musique_reader_cfg = dict(
input_columns=['context', 'input'],
output_column='answers',
train_split='test',
test_split='test'
)

LongBench_musique_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(role='HUMAN', prompt='Answer the question based on the given passages. Only give me the answer and do not output any other words.\n\nThe following are given passages.\n{context}\n\nAnswer the question based on the given passages. Only give me the answer and do not output any other words.\n\nQuestion: {input}\nAnswer:'),
], )),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=32)
)

LongBench_musique_eval_cfg = dict(
evaluator=dict(type=LongBenchF1Evaluator),
pred_role='BOT'
)

LongBench_musique_datasets = [
dict(
type=LongBenchmusiqueDataset,
abbr='LongBench_musique',
path='THUDM/LongBench',
name='musique',
reader_cfg=LongBench_musique_reader_cfg,
infer_cfg=LongBench_musique_infer_cfg,
eval_cfg=LongBench_musique_eval_cfg)
]
Loading