Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Feature] Add Xiezhi SQuAD2.0 ANLI #101

Merged
merged 4 commits into from
Aug 10, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion configs/datasets/SuperGLUE_WSC/SuperGLUE_WSC_ppl.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from mmengine.config import read_base

with read_base():
from .SuperGLUE_WSC_ppl_d0f531 import WSC_datasets # noqa: F401, F403
from .SuperGLUE_WSC_ppl_cbf31c import WSC_datasets # noqa: F401, F403
49 changes: 49 additions & 0 deletions configs/datasets/SuperGLUE_WSC/SuperGLUE_WSC_ppl_cbf31c.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import WSCDataset_V3

WSC_reader_cfg = dict(
input_columns=["span1", "span2", "text"],
output_column="label",
)

WSC_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template={
'A':
dict(round=[
dict(
role="HUMAN",
prompt="Passage: {text}\nDoes the pronoun # {span2} # refer to * {span1} *?\nA. Yes\nB. No\nAnseer: "
),
dict(role='BOT', prompt='A'),
]),
'B':
dict(round=[
dict(
role="HUMAN",
prompt="Passage: {text}\nDoes the pronoun # {span2} # refer to * {span1} *?\nA. Yes\nB. No\nAnseer: "
),
dict(role='BOT', prompt='B'),
]),
},
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=PPLInferencer),
)

WSC_eval_cfg = dict(evaluator=dict(type=AccEvaluator), )

WSC_datasets = [
dict(
abbr="WSC",
type=WSCDataset_V3,
path="./data/SuperGLUE/WSC/val.jsonl",
reader_cfg=WSC_reader_cfg,
infer_cfg=WSC_infer_cfg,
eval_cfg=WSC_eval_cfg,
)
]
4 changes: 4 additions & 0 deletions configs/datasets/anli/anli_gen.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from mmengine.config import read_base

with read_base():
from .anli_gen_fc7328 import anli_datasets # noqa: F401, F403
42 changes: 42 additions & 0 deletions configs/datasets/anli/anli_gen_fc7328.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import AnliDataset
from opencompass.utils.text_postprocessors import first_capital_postprocess

anli_datasets = []
for _split in ['R1', 'R2', 'R3']:
anli_reader_cfg = dict(
input_columns=["context", "hypothesis"],
output_column="label",
)

anli_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(role="HUMAN", prompt="{context}\n{hypothesis}\nQuestion: What is the relation between the two sentences?\nA. Contradiction\nB. Entailment\nC. Neutral\nAnswer: "),
dict(role="BOT", prompt="{label}"),
]
),
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer),
)

anli_eval_cfg = dict(evaluator=dict(type=AccEvaluator),
pred_role="BOT",
pred_postprocessor=dict(type=first_capital_postprocess))

anli_datasets.append(
dict(
type=AnliDataset,
abbr=f"anli-{_split}",
path=f"data/anli/anli_v1.0/{_split}/dev.jsonl",
reader_cfg=anli_reader_cfg,
infer_cfg=anli_infer_cfg,
eval_cfg=anli_eval_cfg,
)
)
4 changes: 4 additions & 0 deletions configs/datasets/anli/anli_ppl.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from mmengine.config import read_base

with read_base():
from .anli_ppl_1d290e import anli_datasets # noqa: F401, F403
50 changes: 50 additions & 0 deletions configs/datasets/anli/anli_ppl_1d290e.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import AnliDataset

anli_datasets = []
for _split in ['R1', 'R2', 'R3']:
anli_reader_cfg = dict(
input_columns=["context", "hypothesis"],
output_column="label",
)

anli_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template={
"A":
dict(round=[
dict(role="HUMAN", prompt="{context}\n{hypothesis}\What is the relation between the two sentences?"),
dict(role="BOT", prompt="Contradiction"),
]),
"B":
dict(round=[
dict(role="HUMAN", prompt="{context}\n{hypothesis}\What is the relation between the two sentences?"),
dict(role="BOT", prompt="Entailment"),
]),
"C":
dict(round=[
dict(role="HUMAN", prompt="{context}\n{hypothesis}\What is the relation between the two sentences?"),
dict(role="BOT", prompt="Neutral"),
]),
},
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=PPLInferencer),
)

anli_eval_cfg = dict(evaluator=dict(type=AccEvaluator), )

anli_datasets.append(
dict(
type=AnliDataset,
abbr=f"anli-{_split}",
path=f"data/anli/anli_v1.0/{_split}/dev.jsonl",
reader_cfg=anli_reader_cfg,
infer_cfg=anli_infer_cfg,
eval_cfg=anli_eval_cfg,
)
)
4 changes: 4 additions & 0 deletions configs/datasets/squad20/squad20_gen.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from mmengine.config import read_base

with read_base():
from .squad20_gen_1710bc import squad20_datasets # noqa: F401, F403
32 changes: 32 additions & 0 deletions configs/datasets/squad20/squad20_gen_1710bc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import SQuAD20Dataset, SQuAD20Evaluator

squad20_reader_cfg = dict(
input_columns=['context', 'question'],
output_column='answers')

squad20_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(role='HUMAN', prompt='{context}\nAccording to the above passage, answer the following question. If it is impossible to answer according to the passage, answer `impossible to answer`:\nQuestion: {question}'),
dict(role='BOT', prompt='Answer:'),
], )),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=50))

squad20_eval_cfg = dict(
evaluator=dict(type=SQuAD20Evaluator), pred_role='BOT')

squad20_datasets = [
dict(
type=SQuAD20Dataset,
abbr='squad2.0',
path='./data/SQuAD2.0/dev-v2.0.json',
reader_cfg=squad20_reader_cfg,
infer_cfg=squad20_infer_cfg,
eval_cfg=squad20_eval_cfg)
]
4 changes: 4 additions & 0 deletions configs/datasets/xiezhi/xiezhi_gen.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from mmengine.config import read_base

with read_base():
from .xiezhi_gen_b86cf5 import xiezhi_datasets # noqa: F401, F403
50 changes: 50 additions & 0 deletions configs/datasets/xiezhi/xiezhi_gen_b86cf5.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import XiezhiDataset, XiezhiRetriever
from opencompass.utils.text_postprocessors import first_capital_postprocess

xiezhi_datasets = []

for split in ["spec_eng", "spec_chn", "inter_eng", "inter_chn"]:
if 'chn' in split:
q_hint, a_hint = "题目", "答案"
else:
q_hint, a_hint = "Question", "Answer"

xiezhi_reader_cfg = dict(
input_columns=["question", "A", "B", "C", "D", "labels"],
output_column="answer",
train_split="train",
test_split='test',
)
xiezhi_infer_cfg = dict(
ice_template=dict(
type=PromptTemplate,
template=dict(
begin="</E>",
round=[
dict(role="HUMAN", prompt=f"{q_hint}: {{question}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n{a_hint}: "),
dict(role="BOT", prompt="{answer}"),
]
),
ice_token="</E>",
),
retriever=dict(type=XiezhiRetriever, ice_num=3),
inferencer=dict(type=GenInferencer),
)

xiezhi_eval_cfg = dict(evaluator=dict(type=AccEvaluator),
pred_role="BOT",
pred_postprocessor=dict(type=first_capital_postprocess))

xiezhi_datasets.append(
dict(
type=XiezhiDataset,
abbr=f"xiezhi-{split}",
path="./data/xiezhi/",
name="xiezhi_" + split,
reader_cfg=xiezhi_reader_cfg,
infer_cfg=xiezhi_infer_cfg,
eval_cfg=xiezhi_eval_cfg,
))
4 changes: 4 additions & 0 deletions configs/datasets/xiezhi/xiezhi_ppl.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from mmengine.config import read_base

with read_base():
from .xiezhi_ppl_ea6bd7 import xiezhi_datasets # noqa: F401, F403
49 changes: 49 additions & 0 deletions configs/datasets/xiezhi/xiezhi_ppl_ea6bd7.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import XiezhiDataset, XiezhiRetriever

xiezhi_datasets = []

for split in ["spec_eng", "spec_chn", "inter_eng", "inter_chn"]:
if 'chn' in split:
q_hint, a_hint = "题目", "答案"
else:
q_hint, a_hint = "Question", "Answer"

xiezhi_reader_cfg = dict(
input_columns=["question", "A", "B", "C", "D", "labels"],
output_column="answer",
train_split="train",
test_split='test',
)
xiezhi_infer_cfg = dict(
ice_template=dict(
type=PromptTemplate,
template={
answer: dict(
begin="</E>",
round=[
dict(role="HUMAN", prompt=f"{q_hint}: {{question}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}"),
dict(role="BOT", prompt=f"{a_hint}: {answer}"),
])
for answer in ["A", "B", "C", "D"]
},
ice_token="</E>",
),
retriever=dict(type=XiezhiRetriever, ice_num=3),
inferencer=dict(type=PPLInferencer),
)

xiezhi_eval_cfg = dict(evaluator=dict(type=AccEvaluator))

xiezhi_datasets.append(
dict(
type=XiezhiDataset,
abbr=f"xiezhi-{split}",
path="./data/xiezhi/",
name="xiezhi_" + split,
reader_cfg=xiezhi_reader_cfg,
infer_cfg=xiezhi_infer_cfg,
eval_cfg=xiezhi_eval_cfg,
))
3 changes: 3 additions & 0 deletions opencompass/datasets/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from .afqmcd import * # noqa: F401, F403
from .agieval import * # noqa: F401, F403
from .anli import AnliDataset # noqa: F401, F403
from .arc import * # noqa: F401, F403
from .ax import * # noqa: F401, F403
from .bbh import * # noqa: F401, F403
Expand Down Expand Up @@ -48,6 +49,7 @@
from .record import * # noqa: F401, F403
from .safety import * # noqa: F401, F403
from .siqa import * # noqa: F401, F403
from .squad20 import SQuAD20Dataset, SQuAD20Evaluator # noqa: F401, F403
from .storycloze import * # noqa: F401, F403
from .strategyqa import * # noqa: F401, F403
from .summedits import * # noqa: F401, F403
Expand All @@ -63,5 +65,6 @@
from .winogrande import * # noqa: F401, F403
from .wsc import * # noqa: F401, F403
from .xcopa import * # noqa: F401, F403
from .xiezhi import XiezhiDataset, XiezhiRetriever # noqa: F401, F403
from .xlsum import * # noqa: F401, F403
from .xsum import * # noqa: F401, F403
18 changes: 18 additions & 0 deletions opencompass/datasets/anli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import json

from datasets import Dataset

from .base import BaseDataset


class AnliDataset(BaseDataset):

@staticmethod
def load(path: str):
dataset = []
with open(path, 'r') as f:
for line in f:
line = json.loads(line)
line['label'] = {'c': 'A', 'e': 'B', 'n': 'C'}[line['label']]
dataset.append(line)
return Dataset.from_list(dataset)
Loading