Skip to content

Commit

Permalink
[Feature] Add FinanceIQ dataset (#596)
Browse files Browse the repository at this point in the history
  • Loading branch information
ZhangRaymond authored Nov 16, 2023
1 parent 8160cb8 commit c0acd06
Show file tree
Hide file tree
Showing 6 changed files with 201 additions and 0 deletions.
4 changes: 4 additions & 0 deletions configs/datasets/FinanceIQ/FinanceIQ_gen.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from mmengine.config import read_base

with read_base():
from .FinanceIQ_gen_e0e6b5 import FinanceIQ_datasets # noqa: F401, F403
77 changes: 77 additions & 0 deletions configs/datasets/FinanceIQ/FinanceIQ_gen_e0e6b5.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import FixKRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import FinanceIQDataset
from opencompass.utils.text_postprocessors import first_capital_postprocess

financeIQ_subject_mapping_en = {
'certified_public_accountant': '注册会计师(CPA)',
'banking_qualification': '银行从业资格',
'securities_qualification': '证券从业资格',
'fund_qualification': '基金从业资格',
'insurance_qualification': '保险从业资格CICE',
'economic_analyst': '经济师',
'taxation_practitioner': '税务师',
'futures_qualification': '期货从业资格',
'certified_fin_planner': '理财规划师',
'actuary_fin_math': '精算师-金融数学',
}

financeIQ_subject_mapping = {
'注册会计师(CPA)': '注册会计师(CPA)',
'银行从业资格': '银行从业资格',
'证券从业资格': '证券从业资格',
'基金从业资格': '基金从业资格',
'保险从业资格CICE': '保险从业资格CICE',
'经济师': '经济师',
'税务师': '税务师',
'期货从业资格': '期货从业资格',
'理财规划师': '理财规划师',
'精算师-金融数学': '精算师-金融数学',
}

financeIQ_all_sets = list(financeIQ_subject_mapping.keys())

financeIQ_datasets = []
for _name in financeIQ_all_sets:
_ch_name = financeIQ_subject_mapping[_name]
financeIQ_infer_cfg = dict(
ice_template=dict(
type=PromptTemplate,
template=dict(
begin="</E>",
round=[
dict(
role="HUMAN",
prompt=
f"以下是关于{_ch_name}的单项选择题,请直接给出正确答案的选项。\n题目:{{question}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}"
),
dict(role="BOT", prompt='答案是: {answer}'),
]),
ice_token="</E>",
),
retriever=dict(type=FixKRetriever, fix_id_list=[0, 1, 2, 3, 4]),
inferencer=dict(type=GenInferencer),
)

financeIQ_eval_cfg = dict(
evaluator=dict(type=AccEvaluator),
pred_postprocessor=dict(type=first_capital_postprocess))

financeIQ_datasets.append(
dict(
type=FinanceIQDataset,
path="./data/FinanceIQ/",
name=_name,
abbr=f"FinanceIQ-{_name}",
reader_cfg=dict(
input_columns=["question", "A", "B", "C", "D"],
output_column="answer",
train_split="dev",
test_split='test'),
infer_cfg=financeIQ_infer_cfg,
eval_cfg=financeIQ_eval_cfg,
))

del _name, _ch_name
4 changes: 4 additions & 0 deletions configs/datasets/FinanceIQ/FinanceIQ_ppl.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from mmengine.config import read_base

with read_base():
from .FinanceIQ_ppl_42b9bd import FinanceIQ_datasets # noqa: F401, F403
76 changes: 76 additions & 0 deletions configs/datasets/FinanceIQ/FinanceIQ_ppl_42b9bd.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import FixKRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import FinanceIQDataset

financeIQ_subject_mapping_en = {
'certified_public_accountant': '注册会计师(CPA)',
'banking_qualification': '银行从业资格',
'securities_qualification': '证券从业资格',
'fund_qualification': '基金从业资格',
'insurance_qualification': '保险从业资格CICE',
'economic_analyst': '经济师',
'taxation_practitioner': '税务师',
'futures_qualification': '期货从业资格',
'certified_fin_planner': '理财规划师',
'actuary_fin_math': '精算师-金融数学',
}

financeIQ_subject_mapping = {
'注册会计师(CPA)': '注册会计师(CPA)',
'银行从业资格': '银行从业资格',
'证券从业资格': '证券从业资格',
'基金从业资格': '基金从业资格',
'保险从业资格CICE': '保险从业资格CICE',
'经济师': '经济师',
'税务师': '税务师',
'期货从业资格': '期货从业资格',
'理财规划师': '理财规划师',
'精算师-金融数学': '精算师-金融数学',
}

financeIQ_all_sets = list(financeIQ_subject_mapping.keys())

financeIQ_datasets = []
for _name in financeIQ_all_sets:
_ch_name = financeIQ_subject_mapping[_name]
financeIQ_infer_cfg = dict(
ice_template=dict(
type=PromptTemplate,
template={
answer: dict(
begin="</E>",
round=[
dict(
role="HUMAN",
prompt=f"以下是关于{_ch_name}的单项选择题,请直接给出正确答案的选项。\n题目:{{question}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}"
),
dict(role="BOT", prompt=f'答案是: {answer}'),
])
for answer in ["A", "B", "C", "D"]
},
ice_token="</E>",
),
retriever=dict(type=FixKRetriever, fix_id_list=[0, 1, 2, 3, 4]),
inferencer=dict(type=PPLInferencer),
)

financeIQ_eval_cfg = dict(evaluator=dict(type=AccEvaluator))

financeIQ_datasets.append(
dict(
type=FinanceIQDataset,
path="./data/FinanceIQ/",
name=_name,
abbr=f"FinanceIQ-{_name}",
reader_cfg=dict(
input_columns=["question", "A", "B", "C", "D"],
output_column="answer",
train_split="dev",
test_split='test'),
infer_cfg=financeIQ_infer_cfg,
eval_cfg=financeIQ_eval_cfg,
))

del _name, _ch_name
39 changes: 39 additions & 0 deletions opencompass/datasets/FinanceIQ.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import csv
import os.path as osp

from datasets import Dataset, DatasetDict

from opencompass.registry import LOAD_DATASET

from .base import BaseDataset


@LOAD_DATASET.register_module()
class FinanceIQDataset(BaseDataset):

# @staticmethod
# def load(path: str):
# from datasets import load_dataset
# return load_dataset('csv', data_files={'test': path})

@staticmethod
def load(path: str, name: str):
dataset = DatasetDict()
for split in ['dev', 'test']:
raw_data = []
filename = osp.join(path, split, f'{name}.csv')
with open(filename, encoding='utf-8') as f:
reader = csv.reader(f)
_ = next(reader) # skip the header
for row in reader:
assert len(row) == 7
raw_data.append({
'question': row[1],
'A': row[2],
'B': row[3],
'C': row[4],
'D': row[5],
'answer': row[6],
})
dataset[split] = Dataset.from_list(raw_data)
return dataset
1 change: 1 addition & 0 deletions opencompass/datasets/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
from .ds1000 import * # noqa: F401, F403
from .ds1000_interpreter import * # noqa: F401, F403
from .eprstmt import * # noqa: F401, F403
from .FinanceIQ import * # noqa: F401, F403
from .flores import * # noqa: F401, F403
from .game24 import * # noqa: F401, F403
from .GaokaoBench import * # noqa: F401, F403
Expand Down

0 comments on commit c0acd06

Please sign in to comment.