From 7505b3cadf51ab7c8596c79316c008b7b748f5b4 Mon Sep 17 00:00:00 2001
From: Fengzhe Zhou <zfz-960727@163.com>
Date: Tue, 14 May 2024 14:50:16 +0800
Subject: [PATCH] [Feature] Add huggingface apply_chat_template (#1098)

* add TheoremQA with 5-shot

* add huggingface_above_v4_33 classes

* use num_worker partitioner in cli

* update theoremqa

* update TheoremQA

* add TheoremQA

* rename theoremqa -> TheoremQA

* update TheoremQA output path

* rewrite many model configs

* update huggingface

* further update

* refine configs

* update configs

* update configs

* add configs/eval_llama3_instruct.py

* add summarizer multi faceted

* update bbh datasets

* update configs/models/hf_llama/lmdeploy_llama3_8b_instruct.py

* rename class

* update readme

* update hf above v4.33
---
 README.md                                     |  11 +-
 README_zh-CN.md                               |  13 +-
 configs/dataset_collections/chat_OC15.py      |  22 +
 .../TheoremQA/TheoremQA_5shot_gen_a4f581.py   |  46 --
 configs/datasets/bbh/bbh_gen_2879b0.py        |  56 +++
 configs/datasets/bbh/bbh_subset_settings.py   |  29 ++
 configs/datasets/collections/chat_medium.py   |   2 +-
 configs/datasets/collections/chat_small.py    |   2 +-
 ...py => deprecated_winogrande_gen_a9ede5.py} |   0
 .../winogrande/winogrande_5shot_gen_b36770.py |  46 ++
 configs/datasets/winogrande/winogrande_gen.py |   2 +-
 .../winogrande/winogrande_gen_458220.py       |  41 ++
 configs/eval_llama3_instruct.py               |  52 +++
 configs/models/aquila/hf_aquila2_34b.py       |  22 +-
 configs/models/aquila/hf_aquila2_7b.py        |  22 +-
 configs/models/aquila/hf_aquilachat2_34b.py   |   1 -
 .../models/aquila/hf_aquilachat2_34b_16k.py   |   1 -
 configs/models/aquila/hf_aquilachat2_7b.py    |   1 -
 .../models/aquila/hf_aquilachat2_7b_16k.py    |   1 -
 .../models/baichuan/hf_baichuan2_13b_base.py  |  19 +-
 .../models/baichuan/hf_baichuan2_7b_base.py   |  19 +-
 configs/models/bluelm/hf_bluelm_7b_base.py    |  22 +-
 .../models/bluelm/hf_bluelm_7b_base_32k.py    |  22 +-
 configs/models/chatglm/hf_chatglm3_6b.py      |  31 +-
 configs/models/chatglm/hf_chatglm3_6b_32k.py  |  29 +-
 configs/models/chatglm/hf_chatglm3_6b_base.py |  20 +-
 configs/models/codellama/hf_codellama_13b.py  |  19 +-
 .../codellama/hf_codellama_13b_instruct.py    |  19 +-
 .../codellama/hf_codellama_13b_python.py      |  19 +-
 configs/models/codellama/hf_codellama_34b.py  |  19 +-
 .../codellama/hf_codellama_34b_instruct.py    |  19 +-
 .../codellama/hf_codellama_34b_python.py      |  19 +-
 configs/models/codellama/hf_codellama_70b.py  |  12 +
 .../codellama/hf_codellama_70b_instruct.py    |  12 +
 .../codellama/hf_codellama_70b_python.py      |  12 +
 configs/models/codellama/hf_codellama_7b.py   |  19 +-
 .../codellama/hf_codellama_7b_instruct.py     |  19 +-
 .../codellama/hf_codellama_7b_python.py       |  19 +-
 .../models/deepseek/hf_deepseek_67b_base.py   |  22 +-
 .../models/deepseek/hf_deepseek_67b_chat.py   |  31 +-
 .../models/deepseek/hf_deepseek_7b_base.py    |  22 +-
 .../models/deepseek/hf_deepseek_7b_chat.py    |  31 +-
 .../hf_deepseek_coder_1_3b_instruct.py        |  34 +-
 .../hf_deepseek_coder_33b_instruct.py         |  34 +-
 .../hf_deepseek_coder_6_7b_instruct.py        |  34 +-
 .../deepseek/hf_deepseek_moe_16b_base.py      |  22 +-
 .../deepseek/hf_deepseek_moe_16b_chat.py      |  30 +-
 configs/models/falcon/hf_falcon_40b.py        |  17 +-
 configs/models/falcon/hf_falcon_7b.py         |  17 +-
 configs/models/gemma/hf_gemma_2b.py           |  21 +-
 configs/models/gemma/hf_gemma_2b_it.py        |  31 +-
 configs/models/gemma/hf_gemma_7b.py           |  21 +-
 configs/models/gemma/hf_gemma_7b_it.py        |  31 +-
 .../models/hf_internlm/hf_internlm2_1_8b.py   |  22 +-
 .../models/hf_internlm/hf_internlm2_20b.py    |  21 +-
 configs/models/hf_internlm/hf_internlm2_7b.py |  21 +-
 .../hf_internlm/hf_internlm2_base_20b.py      |  21 +-
 .../hf_internlm/hf_internlm2_base_7b.py       |  21 +-
 .../hf_internlm/hf_internlm2_chat_1_8b.py     |  35 +-
 .../hf_internlm/hf_internlm2_chat_1_8b_sft.py |  35 +-
 .../hf_internlm/hf_internlm2_chat_20b.py      |  35 +-
 .../hf_internlm/hf_internlm2_chat_20b_sft.py  |  35 +-
 .../hf_internlm/hf_internlm2_chat_7b.py       |  35 +-
 .../hf_internlm/hf_internlm2_chat_7b_sft.py   |  35 +-
 .../hf_internlm/hf_internlm2_chat_math_20b.py |  34 +-
 .../hf_internlm2_chat_math_20b_with_system.py |   1 -
 .../hf_internlm/hf_internlm2_chat_math_7b.py  |  34 +-
 .../hf_internlm2_chat_math_7b_with_system.py  |   1 -
 .../hf_internlm/hf_internlm2_math_20b.py      |  13 +
 .../hf_internlm/hf_internlm2_math_7b.py       |  13 +
 configs/models/hf_internlm/hf_internlm_20b.py |  17 +-
 configs/models/hf_internlm/hf_internlm_7b.py  |  20 +-
 .../hf_internlm/hf_internlm_chat_7b_8k.py     |  34 --
 .../hf_internlm/hf_internlm_chat_7b_v1_1.py   |  34 --
 .../hf_internlm/lmdeploy_internlm2_20b.py     |  27 ++
 .../lmdeploy_internlm2_chat_20b.py            |   9 +-
 .../hf_internlm/lmdeploy_internlm2_chat_7b.py |   9 +-
 configs/models/hf_llama/hf_llama2_13b.py      |  19 +-
 configs/models/hf_llama/hf_llama2_13b_chat.py |  30 +-
 configs/models/hf_llama/hf_llama2_70b.py      |  19 +-
 configs/models/hf_llama/hf_llama2_70b_chat.py |  30 +-
 configs/models/hf_llama/hf_llama2_7b.py       |  19 +-
 configs/models/hf_llama/hf_llama2_7b_chat.py  |  30 +-
 configs/models/hf_llama/hf_llama3_70b.py      |  21 +-
 .../models/hf_llama/hf_llama3_70b_instruct.py |  30 +-
 configs/models/hf_llama/hf_llama3_8b.py       |  21 +-
 .../models/hf_llama/hf_llama3_8b_instruct.py  |  30 +-
 configs/models/hf_llama/hf_llama_13b.py       |  19 +-
 configs/models/hf_llama/hf_llama_30b.py       |  19 +-
 configs/models/hf_llama/hf_llama_65b.py       |  19 +-
 configs/models/hf_llama/hf_llama_7b.py        |  19 +-
 .../hf_llama/lmdeploy_llama3_70b_instruct.py  |  24 +
 .../hf_llama/lmdeploy_llama3_8b_instruct.py   |  24 +
 .../mistral/hf_mistral_7b_instruct_v0_1.py    |  30 +-
 .../mistral/hf_mistral_7b_instruct_v0_2.py    |  30 +-
 configs/models/mistral/hf_mistral_7b_v0_1.py  |  19 +-
 configs/models/mistral/hf_mistral_7b_v0_2.py  |  20 +-
 .../mistral/hf_mixtral_8x22b_instruct_v0_1.py |  12 +
 .../models/mistral/hf_mixtral_8x22b_v0_1.py   |  12 +
 .../mistral/hf_mixtral_8x7b_instruct_v0_1.py  |  12 +
 .../models/mistral/hf_mixtral_8x7b_v0_1.py    |  12 +
 .../{mixtral => mistral}/mixtral_8x7b_32k.py  |   0
 .../mistral/vllm_mistral_7b_instruct_v0_1.py  |   1 -
 .../mistral/vllm_mistral_7b_instruct_v0_2.py  |   1 -
 .../models/mistral/vllm_mistral_7b_v0_1.py    |  17 +
 .../models/mistral/vllm_mistral_7b_v0_2.py    |  17 +
 .../vllm_mixtral_8x7b_instruct_v0_1.py        |   1 -
 .../mixtral/hf_mixtral_8x22b_instruct_v0_1.py |  34 --
 .../models/mixtral/hf_mixtral_8x22b_v0_1.py   |  24 -
 .../mixtral/hf_mixtral_8x7b_instruct_v0_1.py  |  34 --
 .../models/mixtral/hf_mixtral_8x7b_v0_1.py    |  24 -
 .../models/nanbeige/hf_nanbeige2_8b_chat.py   |  34 +-
 .../models/nanbeige/hf_nanbeige_16b_base.py   |  33 --
 .../nanbeige/hf_nanbeige_16b_base_32k.py      |  34 --
 .../nanbeige/hf_nanbeige_16b_chat_32k.py      |  34 --
 .../models/openbmb/hf_minicpm_2b_dpo_fp32.py  |  29 +-
 .../models/openbmb/hf_minicpm_2b_sft_bf16.py  |  12 +
 .../models/openbmb/hf_minicpm_2b_sft_fp32.py  |  29 +-
 configs/models/opt/hf_opt_125m.py             |  31 +-
 configs/models/opt/hf_opt_350m.py             |  31 +-
 configs/models/others/hf_command_r_plus.py    |  23 +-
 configs/models/others/hf_dbrx_base.py         |  12 +
 configs/models/others/hf_dbrx_instruct.py     |  32 +-
 .../models/others/hf_dolphin_21_mistral_7b.py |   1 -
 .../models/others/hf_fashiongpt_70b_v11.py    |   1 -
 .../models/others/hf_orionstar_yi_34b_chat.py |   1 -
 configs/models/others/hf_telechat_7b_chat.py  |   1 -
 .../others/vllm_orionstar_14b_longchat.py     |   1 -
 configs/models/qwen/hf_qwen1_5_0_5b.py        |  23 +-
 configs/models/qwen/hf_qwen1_5_0_5b_chat.py   |  31 +-
 configs/models/qwen/hf_qwen1_5_14b.py         |  23 +-
 configs/models/qwen/hf_qwen1_5_14b_chat.py    |  31 +-
 configs/models/qwen/hf_qwen1_5_1_8b.py        |  23 +-
 configs/models/qwen/hf_qwen1_5_1_8b_chat.py   |  31 +-
 configs/models/qwen/hf_qwen1_5_32b.py         |  23 +-
 configs/models/qwen/hf_qwen1_5_32b_chat.py    |  31 +-
 configs/models/qwen/hf_qwen1_5_4b.py          |  23 +-
 configs/models/qwen/hf_qwen1_5_4b_chat.py     |  31 +-
 configs/models/qwen/hf_qwen1_5_72b.py         |  23 +-
 configs/models/qwen/hf_qwen1_5_72b_chat.py    |  31 +-
 configs/models/qwen/hf_qwen1_5_7b.py          |  23 +-
 configs/models/qwen/hf_qwen1_5_7b_chat.py     |  31 +-
 configs/models/qwen/hf_qwen_14b.py            |  24 +-
 configs/models/qwen/hf_qwen_14b_chat.py       |  35 +-
 configs/models/qwen/hf_qwen_1_8b.py           |  24 +-
 configs/models/qwen/hf_qwen_1_8b_chat.py      |  33 +-
 configs/models/qwen/hf_qwen_72b.py            |  24 +-
 configs/models/qwen/hf_qwen_72b_chat.py       |  32 +-
 configs/models/qwen/hf_qwen_7b.py             |  24 +-
 configs/models/qwen/hf_qwen_7b_chat.py        |  35 +-
 configs/models/qwen/vllm_qwen1_5_14b_chat.py  |   1 -
 configs/models/qwen/vllm_qwen1_5_72b_chat.py  |   1 -
 configs/models/skywork/hf_skywork_13b.py      |  22 +-
 configs/models/vicuna/hf_vicuna_13b_v13.py    |  22 +-
 configs/models/vicuna/hf_vicuna_13b_v15.py    |  22 +-
 .../models/vicuna/hf_vicuna_13b_v15_16k.py    |  29 +-
 configs/models/vicuna/hf_vicuna_33b_v13.py    |  22 +-
 configs/models/vicuna/hf_vicuna_7b_v13.py     |  22 +-
 configs/models/vicuna/hf_vicuna_7b_v15.py     |  22 +-
 configs/models/vicuna/hf_vicuna_7b_v15_16k.py |  29 +-
 configs/models/yi/hf_yi_34b.py                |  20 +-
 configs/models/yi/hf_yi_34b_200k.py           |  24 -
 configs/models/yi/hf_yi_34b_chat.py           |  28 +-
 configs/models/yi/hf_yi_6b.py                 |  20 +-
 configs/models/yi/hf_yi_6b_200k.py            |  23 -
 configs/models/yi/hf_yi_6b_chat.py            |  29 +-
 configs/models/zephyr/hf_zephyr_7b_beta.py    |  28 +-
 configs/summarizers/chat_OC15.py              |  81 ++++
 .../summarizers/chat_OC15_multi_faceted.py    | 130 ++++++
 docs/en/get_started/quick_start.md            |  47 +-
 docs/zh_cn/get_started/quick_start.md         |  47 +-
 opencompass/cli/main.py                       |  55 +--
 opencompass/datasets/winogrande.py            |   9 +
 opencompass/models/__init__.py                |  22 +-
 opencompass/models/huggingface_above_v4_33.py | 414 ++++++++++++++++++
 opencompass/models/turbomind.py               |  43 +-
 opencompass/models/vllm.py                    |  14 +-
 .../icl_inferencer/icl_ll_inferencer.py       |  76 ++--
 .../icl_inferencer/icl_ppl_inferencer.py      | 102 ++---
 opencompass/partitioners/num_worker.py        |   8 +-
 opencompass/summarizers/__init__.py           |   3 +-
 opencompass/summarizers/default.py            |  12 +-
 opencompass/summarizers/multi_faceted.py      |  46 ++
 opencompass/utils/build.py                    |   1 -
 opencompass/utils/run.py                      |  65 +--
 tools/prompt_viewer.py                        |   2 +-
 186 files changed, 1949 insertions(+), 2912 deletions(-)
 create mode 100644 configs/dataset_collections/chat_OC15.py
 delete mode 100644 configs/datasets/TheoremQA/TheoremQA_5shot_gen_a4f581.py
 create mode 100644 configs/datasets/bbh/bbh_gen_2879b0.py
 create mode 100644 configs/datasets/bbh/bbh_subset_settings.py
 rename configs/datasets/winogrande/{winogrande_gen_a9ede5.py => deprecated_winogrande_gen_a9ede5.py} (100%)
 create mode 100644 configs/datasets/winogrande/winogrande_5shot_gen_b36770.py
 create mode 100644 configs/datasets/winogrande/winogrande_gen_458220.py
 create mode 100644 configs/eval_llama3_instruct.py
 create mode 100644 configs/models/codellama/hf_codellama_70b.py
 create mode 100644 configs/models/codellama/hf_codellama_70b_instruct.py
 create mode 100644 configs/models/codellama/hf_codellama_70b_python.py
 create mode 100644 configs/models/hf_internlm/hf_internlm2_math_20b.py
 create mode 100644 configs/models/hf_internlm/hf_internlm2_math_7b.py
 delete mode 100644 configs/models/hf_internlm/hf_internlm_chat_7b_8k.py
 delete mode 100644 configs/models/hf_internlm/hf_internlm_chat_7b_v1_1.py
 create mode 100644 configs/models/hf_internlm/lmdeploy_internlm2_20b.py
 create mode 100644 configs/models/hf_llama/lmdeploy_llama3_70b_instruct.py
 create mode 100644 configs/models/hf_llama/lmdeploy_llama3_8b_instruct.py
 create mode 100644 configs/models/mistral/hf_mixtral_8x22b_instruct_v0_1.py
 create mode 100644 configs/models/mistral/hf_mixtral_8x22b_v0_1.py
 create mode 100644 configs/models/mistral/hf_mixtral_8x7b_instruct_v0_1.py
 create mode 100644 configs/models/mistral/hf_mixtral_8x7b_v0_1.py
 rename configs/models/{mixtral => mistral}/mixtral_8x7b_32k.py (100%)
 create mode 100644 configs/models/mistral/vllm_mistral_7b_v0_1.py
 create mode 100644 configs/models/mistral/vllm_mistral_7b_v0_2.py
 rename configs/models/{mixtral => mistral}/vllm_mixtral_8x7b_instruct_v0_1.py (97%)
 delete mode 100644 configs/models/mixtral/hf_mixtral_8x22b_instruct_v0_1.py
 delete mode 100644 configs/models/mixtral/hf_mixtral_8x22b_v0_1.py
 delete mode 100644 configs/models/mixtral/hf_mixtral_8x7b_instruct_v0_1.py
 delete mode 100644 configs/models/mixtral/hf_mixtral_8x7b_v0_1.py
 delete mode 100644 configs/models/nanbeige/hf_nanbeige_16b_base.py
 delete mode 100644 configs/models/nanbeige/hf_nanbeige_16b_base_32k.py
 delete mode 100644 configs/models/nanbeige/hf_nanbeige_16b_chat_32k.py
 create mode 100644 configs/models/openbmb/hf_minicpm_2b_sft_bf16.py
 create mode 100644 configs/models/others/hf_dbrx_base.py
 delete mode 100644 configs/models/yi/hf_yi_34b_200k.py
 delete mode 100644 configs/models/yi/hf_yi_6b_200k.py
 create mode 100644 configs/summarizers/chat_OC15.py
 create mode 100644 configs/summarizers/chat_OC15_multi_faceted.py
 create mode 100644 opencompass/models/huggingface_above_v4_33.py
 create mode 100644 opencompass/summarizers/multi_faceted.py

diff --git a/README.md b/README.md
index eaac33b3b..484b49f5f 100644
--- a/README.md
+++ b/README.md
@@ -162,20 +162,11 @@ python tools/list_configs.py llama mmlu
 You can also evaluate other HuggingFace models via command line. Taking LLaMA-7b as an example:
 
 ```bash
-python run.py --datasets ceval_ppl mmlu_ppl \
---hf-path huggyllama/llama-7b \  # HuggingFace model path
---model-kwargs device_map='auto' \  # Arguments for model construction
---tokenizer-kwargs padding_side='left' truncation='left' use_fast=False \  # Arguments for tokenizer construction
---max-out-len 100 \  # Maximum number of tokens generated
---max-seq-len 2048 \  # Maximum sequence length the model can accept
---batch-size 8 \  # Batch size
---no-batch-padding \  # Don't enable batch padding, infer through for loop to avoid performance loss
---num-gpus 1  # Number of minimum required GPUs
+python run.py --datasets ceval_ppl mmlu_ppl --hf-type base --hf-path huggyllama/llama-7b
 ```
 
 > \[!TIP\]
 >
-> To run the command above, you will need to remove the comments starting from `# ` first.
 > configuration with `_ppl` is designed for base model typically.
 > configuration with `_gen` can be used for both base model and chat model.
 
diff --git a/README_zh-CN.md b/README_zh-CN.md
index 8adb68a03..b53cf89df 100644
--- a/README_zh-CN.md
+++ b/README_zh-CN.md
@@ -163,20 +163,9 @@ python tools/list_configs.py llama mmlu
 你也可以通过命令行去评测其它 HuggingFace 模型。同样以 LLaMA-7b 为例：
 
 ```bash
-python run.py --datasets ceval_ppl mmlu_ppl \
---hf-path huggyllama/llama-7b \  # HuggingFace 模型地址
---model-kwargs device_map='auto' \  # 构造 model 的参数
---tokenizer-kwargs padding_side='left' truncation='left' use_fast=False \  # 构造 tokenizer 的参数
---max-out-len 100 \  # 最长生成 token 数
---max-seq-len 2048 \  # 模型能接受的最大序列长度
---batch-size 8 \  # 批次大小
---no-batch-padding \  # 不打开 batch padding，通过 for loop 推理，避免精度损失
---num-gpus 1  # 运行该模型所需的最少 gpu 数
+python run.py --datasets ceval_ppl mmlu_ppl --hf-type base --hf-path huggyllama/llama-7b
 ```
 
-> **注意**<br />
-> 若需要运行上述命令，你需要删除所有从 `# ` 开始的注释。
-
 通过命令行或配置文件，OpenCompass 还支持评测 API 或自定义模型，以及更多样化的评测策略。请阅读[快速开始](https://opencompass.readthedocs.io/zh_CN/latest/get_started/quick_start.html)了解如何运行一个评测任务。
 
 更多教程请查看我们的[文档](https://opencompass.readthedocs.io/zh_CN/latest/index.html)。
diff --git a/configs/dataset_collections/chat_OC15.py b/configs/dataset_collections/chat_OC15.py
new file mode 100644
index 000000000..a7ef80859
--- /dev/null
+++ b/configs/dataset_collections/chat_OC15.py
@@ -0,0 +1,22 @@
+from mmengine.config import read_base
+
+with read_base():
+    from ..datasets.mmlu.mmlu_gen_4d595a import mmlu_datasets
+    from ..datasets.cmmlu.cmmlu_gen_c13365 import cmmlu_datasets
+    from ..datasets.ceval.ceval_gen_5f30c7 import ceval_datasets
+    from ..datasets.GaokaoBench.GaokaoBench_no_subjective_gen_4c31db import GaokaoBench_datasets
+    from ..datasets.triviaqa.triviaqa_wiki_1shot_gen_eaf81e import triviaqa_datasets
+    from ..datasets.nq.nq_open_1shot_gen_01cf41 import nq_datasets
+    from ..datasets.race.race_gen_69ee4f import race_datasets
+    from ..datasets.winogrande.winogrande_5shot_gen_b36770 import winogrande_datasets
+    from ..datasets.hellaswag.hellaswag_10shot_gen_e42710 import hellaswag_datasets
+    from ..datasets.bbh.bbh_gen_2879b0 import bbh_datasets
+    from ..datasets.gsm8k.gsm8k_gen_1d7fe4 import gsm8k_datasets
+    from ..datasets.math.math_0shot_gen_393424 import math_datasets
+    from ..datasets.TheoremQA.TheoremQA_5shot_gen_6f0af8 import TheoremQA_datasets
+    from ..datasets.humaneval.humaneval_gen_8e312c import humaneval_datasets
+    from ..datasets.mbpp.sanitized_mbpp_gen_830460 import sanitized_mbpp_datasets
+    from ..datasets.gpqa.gpqa_gen_4baadb import gpqa_datasets
+    from ..datasets.IFEval.IFEval_gen_3321a3 import ifeval_datasets
+
+datasets = sum((v for k, v in locals().items() if k.endswith("_datasets")), [])
diff --git a/configs/datasets/TheoremQA/TheoremQA_5shot_gen_a4f581.py b/configs/datasets/TheoremQA/TheoremQA_5shot_gen_a4f581.py
deleted file mode 100644
index 007400946..000000000
--- a/configs/datasets/TheoremQA/TheoremQA_5shot_gen_a4f581.py
+++ /dev/null
@@ -1,46 +0,0 @@
-from opencompass.openicl.icl_prompt_template import PromptTemplate
-from opencompass.openicl.icl_retriever import ZeroRetriever
-from opencompass.openicl.icl_inferencer import GenInferencer
-from opencompass.datasets import HFDataset, TheoremQA_postprocess_v3, TheoremQAEvaluatorV3
-
-TheoremQA_reader_cfg = dict(input_columns=["Question", "Answer_type"], output_column="Answer", train_split="test", test_split="test")
-
-TheoremQA_infer_cfg = dict(
-    prompt_template=dict(
-        type=PromptTemplate,
-        template=dict(
-            round=[
-                dict(role='HUMAN', prompt='You are supposed to provide a solution to a given problem.\n\n\nProblem:\nIn a 10 Gigabit Ethernet network, the average size of a frame is 1500 bytes. If a burst of noise lasting 1ms interrupts the network, how many frames are lost?'),
-                dict(role='BOT', prompt='Solution:\nFirst, calculate the data rate in bytes/s:\n\n10 Gigabit/s * (1 Byte / 8 bits) = 1.25 * 10^9 Bytes/s\n\nNext, calculate the data loss in bytes due to the noise:\n\n1 ms * 1.25 * 10^9 Bytes/s = 1.25 * 10^6 Bytes\n\nFinally, divide the data loss by the average frame size to get the number of frames lost:\n\n1.25 * 10^6 Bytes / 1500 Bytes/frame ≈ 833.33 frames\nThe answer is 833.33'),
-                dict(role='HUMAN', prompt='\nProblem:\nGiven x = 0.157, what is the value of x \\times \\frac{\\prod_{n=1}^\\infty (1 - \\frac{x^2}{n^2 \\pi^2})}{\\sin(x)}?'),
-                dict(role='BOT', prompt="Solution:\nTo evaluate the expression $x \\times \\frac{\\prod_{n=1}^{\\infty} (1 - \\frac{x^2}{n^2 \\pi^2})}{\\sin(x)}$ given x = 0.157, we first recognize that the product in the numerator is related to the sine function through the Euler's reflection formula for the sine function, which can be expressed as:\n\n$$\\sin(x) = x \\prod_{n=1}^{\\infty} \\left(1 - \\frac{x^2}{n^2 \\pi^2}\\right)$$\n\nTherefore, the given expression simplifies to: $x \\times \\frac{\\sin(x)}{\\sin(x)}$\n\nBecause sin(x) in the numerator and denominator cancels out, the expression simplifies further to just x.\n\nSo, given x = 0.157, the value of the expression is 0.157. This result is derived from the properties of the sine function and does not require computational evaluation.\nThe answer is 0.157"),
-                dict(role='HUMAN', prompt='\nProblem:\nConsider the basis C of \\mathbb{R}^2 consisting of vectors u_1 = [2, 4] and u_2 = [1, -1]. If y = [8, 12], find the C-coordinate vector of y.'),
-                dict(role='BOT', prompt="Solution:\nThe goal is to express y as a linear comPbination of the basis vectors of C, i.e., $y = a\\cdot u_1 + b\\cdot u_2$, where a and b are the scalar coefficients that we want to find. These coefficients will form the C-coordinate vector of y, which we'll denote as $[a, b]_C$.\n\nGiven:\n- $u_1 = [2, 4]$,\n- $u_2 = [1, -1]$,\n- $y = [8, 12]$.\n\nWe need to solve the system of linear equations:\n2a + 1b = 8\n4a - 1b = 12\n\nLet's solve this system of equations to find a and b.\n\nThe solution to the system of equations is $a = \\frac{10}{3} and b = \\frac{4}{3}$. Therefore, the C-coordinate vector of y in the basis consisting of vectors u_1 = [2, 4] and u_2 = [1, -1] is $\\left[\\frac{10}{3}, \\frac{4}{3}\\right]_C$. \nLet's calculate the numerical value of $\\left[\x0crac{10}{3}, \x0crac{4}{3}\right]_C$ as [3.33, 1.33].\nThe answer is [3.33, 1.33]"),
-                dict(role='HUMAN', prompt='\nProblem:\nOne can draw a simple, connected planar graph with 200 vertices and 397 edges. Is this statement Trur or False?'),
-                dict(role='BOT', prompt="Solution:\nTo determine the answer, we can use Euler's formula for planar graphs, which states that for any finite, connected, planar graph, $V - E + F = 2$, where V is the number of vertices, E is the number of edges, and F is the number of faces.\n\nGiven the modified question, we have V = 200 vertices and E = 397 edges. We want to find if we can have a graph that satisfies these conditions, adhering to Euler's formula.\n\nFirst, let's rearrange Euler's formula to solve for F:  F = E - V + 2\n\nSubstituting the given values: F = 397 - 200 + 2,  F = 199\n\nThis means a graph with 200 vertices and 397 edges would have 199 faces. However, to determine the truth of this possibility, we should check if this graph doesn't violate any other planar graph constraints, particularly regarding the number of edges.\n\nFor a simple, connected planar graph, there's also a relationship between vertices, edges, and faces given by the inequality: $E \\leq 3V - 6$\n\nSubstituting V = 200 gives: $E \\leq 3*200 - 6 = 594$\n\nWith E = 397, the condition $E \\leq 594$ is satisfied, meaning it's theoretically possible in terms of the edge condition for a planar graph.\n\nTherefore, one can draw a simple, connected planar graph with 200 vertices and 397 edges, resulting in 199 faces, without violating the conditions for it to be planar according to both Euler's formula and the constraint on the maximum number of edges.\nThe answer is True"),
-                dict(role='HUMAN', prompt='\nProblem:\nGiven a finite group G, and a collection of permutations H on a set. Then (a) there always exists H such that G is isomorphic to H; (b) for any H, G is isomorphic to H; (c) G can never be isomorphic to H; (d) none of the above. Which option is correct?'),
-                dict(role='BOT', prompt="Solution:\nThis is based on Cayley's theorem, which states that every group G is isomorphic to a subgroup of the symmetric group acting on G. \nIn other words, for every finite group G, there exists a collection of permutations H (which in this context, can be thought of as the set of permutations representing the action of G on itself) such that G is isomorphic to H.\n\nTherefore, there always exists H such that G is isomorphic to H.\nThe answer is (a)"),
-                dict(role='HUMAN', prompt='\nProblem:\n{Question}'),
-                dict(role='BOT', prompt='Solution:\n{Answer}'),
-            ]
-        ),
-    ),
-    retriever=dict(type=ZeroRetriever),
-    inferencer=dict(type=GenInferencer, max_out_len=1024, stopping_criteria=["USER:", "ASSISTANT:",  "### Instruction:", "Response:", "<start_of_turn>", "[INST]", "Problem:"]),
-)
-
-TheoremQA_eval_cfg = dict(
-    evaluator=dict(type=TheoremQAEvaluatorV3),
-    pred_postprocessor=dict(type=TheoremQA_postprocess_v3)
-)
-
-TheoremQA_datasets = [
-    dict(
-        abbr="TheoremQA",
-        type=HFDataset,
-        path="TIGER-Lab/TheoremQA",
-        reader_cfg=TheoremQA_reader_cfg,
-        infer_cfg=TheoremQA_infer_cfg,
-        eval_cfg=TheoremQA_eval_cfg,
-    )
-]
diff --git a/configs/datasets/bbh/bbh_gen_2879b0.py b/configs/datasets/bbh/bbh_gen_2879b0.py
new file mode 100644
index 000000000..d962939f4
--- /dev/null
+++ b/configs/datasets/bbh/bbh_gen_2879b0.py
@@ -0,0 +1,56 @@
+import os
+from mmengine.config import read_base
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.datasets import BBHDataset, bbh_mcq_postprocess, BBHEvaluator, BBHEvaluator_mcq
+
+with read_base():
+    from .bbh_subset_settings import settings
+
+bbh_datasets = []
+for name, test_type in settings:
+    with open(os.path.join(os.path.dirname(__file__), 'lib_prompt', f'{name}.txt'), 'r') as f:
+        hint = f.read()
+
+    task_prompt, body = hint.split('\n\nQ:', 1)
+    sections = ('Q:' + body).split('\n\n')
+    prompt_rounds = []
+    for index, section in enumerate(sections):
+        question, answer = section.split('\nA:')
+        answer = 'A:' + answer
+        if index == 0:
+            desc = task_prompt.strip() + '\n'
+        else:
+            desc = ''
+        prompt_rounds.append(dict(role="HUMAN", prompt=f"{desc}{question.strip()}"))
+        prompt_rounds.append(dict(role="BOT", prompt=answer.strip()))
+    prompt_rounds.append(dict(role="HUMAN", prompt="Q: {input}"))
+
+    bbh_reader_cfg = dict(input_columns=["input"], output_column="target")
+
+    bbh_infer_cfg = dict(
+        prompt_template=dict(type=PromptTemplate, template=dict(round=prompt_rounds)),
+        retriever=dict(type=ZeroRetriever),
+        inferencer=dict(type=GenInferencer, max_out_len=512))
+
+    if test_type == 'mcq':
+        bbh_eval_cfg = dict(
+            evaluator=dict(type=BBHEvaluator_mcq),
+            pred_role="BOT",
+            pred_postprocessor=dict(type=bbh_mcq_postprocess),
+            dataset_postprocessor=dict(type=bbh_mcq_postprocess))
+    else:
+        bbh_eval_cfg = dict(
+            evaluator=dict(type=BBHEvaluator),
+            pred_role="BOT")
+
+    bbh_datasets.append(
+        dict(
+            type=BBHDataset,
+            path="./data/BBH/data",
+            name=name,
+            abbr='bbh-' + name,
+            reader_cfg=bbh_reader_cfg.copy(),
+            infer_cfg=bbh_infer_cfg.copy(),
+            eval_cfg=bbh_eval_cfg.copy()))
diff --git a/configs/datasets/bbh/bbh_subset_settings.py b/configs/datasets/bbh/bbh_subset_settings.py
new file mode 100644
index 000000000..fbed75110
--- /dev/null
+++ b/configs/datasets/bbh/bbh_subset_settings.py
@@ -0,0 +1,29 @@
+settings = [
+    ('temporal_sequences', 'mcq'),
+    ('disambiguation_qa', 'mcq'),
+    ('date_understanding', 'mcq'),
+    ('tracking_shuffled_objects_three_objects', 'mcq'),
+    ('penguins_in_a_table', 'mcq'),
+    ('geometric_shapes', 'mcq'),
+    ('snarks', 'mcq'),
+    ('ruin_names', 'mcq'),
+    ('tracking_shuffled_objects_seven_objects', 'mcq'),
+    ('tracking_shuffled_objects_five_objects', 'mcq'),
+    ('logical_deduction_three_objects', 'mcq'),
+    ('hyperbaton', 'mcq'),
+    ('logical_deduction_five_objects', 'mcq'),
+    ('logical_deduction_seven_objects', 'mcq'),
+    ('movie_recommendation', 'mcq'),
+    ('salient_translation_error_detection', 'mcq'),
+    ('reasoning_about_colored_objects', 'mcq'),
+    ('multistep_arithmetic_two', 'free_form'),
+    ('navigate', 'free_form'),
+    ('dyck_languages', 'free_form'),
+    ('word_sorting', 'free_form'),
+    ('sports_understanding', 'free_form'),
+    ('boolean_expressions', 'free_form'),
+    ('object_counting', 'free_form'),
+    ('formal_fallacies', 'free_form'),
+    ('causal_judgement', 'free_form'),
+    ('web_of_lies', 'free_form'),
+]
diff --git a/configs/datasets/collections/chat_medium.py b/configs/datasets/collections/chat_medium.py
index bf2fef58e..c2874bc52 100644
--- a/configs/datasets/collections/chat_medium.py
+++ b/configs/datasets/collections/chat_medium.py
@@ -47,7 +47,7 @@
     from ..piqa.piqa_gen_1194eb import piqa_datasets
     from ..siqa.siqa_gen_e78df3 import siqa_datasets
     from ..strategyqa.strategyqa_gen_1180a7 import strategyqa_datasets
-    from ..winogrande.winogrande_gen_a9ede5 import winogrande_datasets
+    from ..winogrande.deprecated_winogrande_gen_a9ede5 import winogrande_datasets
     from ..obqa.obqa_gen_9069e4 import obqa_datasets
     from ..nq.nq_gen_c788f6 import nq_datasets
     from ..triviaqa.triviaqa_gen_2121ce import triviaqa_datasets
diff --git a/configs/datasets/collections/chat_small.py b/configs/datasets/collections/chat_small.py
index dce154200..476480217 100644
--- a/configs/datasets/collections/chat_small.py
+++ b/configs/datasets/collections/chat_small.py
@@ -31,7 +31,7 @@
     from ..summedits.summedits_gen_315438 import summedits_datasets
     from ..hellaswag.hellaswag_gen_6faab5 import hellaswag_datasets
     from ..piqa.piqa_gen_1194eb import piqa_datasets
-    from ..winogrande.winogrande_gen_a9ede5 import winogrande_datasets
+    from ..winogrande.deprecated_winogrande_gen_a9ede5 import winogrande_datasets
     from ..obqa.obqa_gen_9069e4 import obqa_datasets
     from ..nq.nq_gen_c788f6 import nq_datasets
     from ..triviaqa.triviaqa_gen_2121ce import triviaqa_datasets
diff --git a/configs/datasets/winogrande/winogrande_gen_a9ede5.py b/configs/datasets/winogrande/deprecated_winogrande_gen_a9ede5.py
similarity index 100%
rename from configs/datasets/winogrande/winogrande_gen_a9ede5.py
rename to configs/datasets/winogrande/deprecated_winogrande_gen_a9ede5.py
diff --git a/configs/datasets/winogrande/winogrande_5shot_gen_b36770.py b/configs/datasets/winogrande/winogrande_5shot_gen_b36770.py
new file mode 100644
index 000000000..60dca51e8
--- /dev/null
+++ b/configs/datasets/winogrande/winogrande_5shot_gen_b36770.py
@@ -0,0 +1,46 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import FixKRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import winograndeDataset_V3
+from opencompass.utils.text_postprocessors import first_option_postprocess
+
+winogrande_reader_cfg = dict(
+    input_columns=["prompt", "only_option1", "only_option2"],
+    output_column="answer",
+    train_split="train_xs",
+    test_split="dev",
+)
+
+winogrande_infer_cfg = dict(
+    ice_template=dict(
+        type=PromptTemplate,
+        template=dict(
+            begin="</E>",
+            round=[
+                dict(role="HUMAN", prompt="Question: {prompt}\nA. {only_option1}\nB. {only_option2}\nAnswer:"),
+                dict(role="BOT", prompt="{answer}"),
+            ]
+        ),
+        ice_token="</E>",
+    ),
+    retriever=dict(type=FixKRetriever, fix_id_list=[0, 2, 4, 6, 8]),
+    inferencer=dict(type=GenInferencer),
+)
+
+winogrande_eval_cfg = dict(
+    evaluator=dict(type=AccEvaluator),
+    pred_role="BOT",
+    pred_postprocessor=dict(type=first_option_postprocess, options="AB"),
+)
+
+winogrande_datasets = [
+    dict(
+        abbr="winogrande",
+        type=winograndeDataset_V3,
+        path="./data/winogrande",
+        reader_cfg=winogrande_reader_cfg,
+        infer_cfg=winogrande_infer_cfg,
+        eval_cfg=winogrande_eval_cfg,
+    )
+]
diff --git a/configs/datasets/winogrande/winogrande_gen.py b/configs/datasets/winogrande/winogrande_gen.py
index ddf8330d6..0aca8fe90 100644
--- a/configs/datasets/winogrande/winogrande_gen.py
+++ b/configs/datasets/winogrande/winogrande_gen.py
@@ -1,4 +1,4 @@
 from mmengine.config import read_base
 
 with read_base():
-    from .winogrande_gen_a9ede5 import winogrande_datasets  # noqa: F401, F403
+    from .winogrande_gen_458220 import winogrande_datasets  # noqa: F401, F403
diff --git a/configs/datasets/winogrande/winogrande_gen_458220.py b/configs/datasets/winogrande/winogrande_gen_458220.py
new file mode 100644
index 000000000..0a83c0297
--- /dev/null
+++ b/configs/datasets/winogrande/winogrande_gen_458220.py
@@ -0,0 +1,41 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import winograndeDataset_V2
+from opencompass.utils.text_postprocessors import first_option_postprocess
+
+winogrande_reader_cfg = dict(
+    input_columns=["prompt", "only_option1", "only_option2"],
+    output_column="answer",
+)
+
+winogrande_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template=dict(
+            round=[
+                dict(role="HUMAN", prompt="Question: {prompt}\nA. {only_option1}\nB. {only_option2}\nAnswer:"),
+            ]
+        ),
+    ),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=GenInferencer),
+)
+
+winogrande_eval_cfg = dict(
+    evaluator=dict(type=AccEvaluator),
+    pred_role="BOT",
+    pred_postprocessor=dict(type=first_option_postprocess, options='AB'),
+)
+
+winogrande_datasets = [
+    dict(
+        abbr="winogrande",
+        type=winograndeDataset_V2,
+        path='./data/winogrande',
+        reader_cfg=winogrande_reader_cfg,
+        infer_cfg=winogrande_infer_cfg,
+        eval_cfg=winogrande_eval_cfg,
+    )
+]
diff --git a/configs/eval_llama3_instruct.py b/configs/eval_llama3_instruct.py
new file mode 100644
index 000000000..9645c2b6a
--- /dev/null
+++ b/configs/eval_llama3_instruct.py
@@ -0,0 +1,52 @@
+from mmengine.config import read_base
+
+with read_base():
+    from .dataset_collections.chat_OC15 import datasets
+
+    from .models.hf_llama.hf_llama3_8b_instruct import models as hf_llama3_8b_instruct_model
+
+    from .summarizers.chat_OC15 import summarizer
+
+
+work_dir = 'outputs/debug/llama3-instruct'
+
+models = sum([v for k, v in locals().items() if k.endswith("_model")], [])
+
+# dataset               version    metric                        mode    llama-3-8b-instruct-hf
+# --------------------  ---------  ----------------------------  ------  ------------------------
+# average               -          naive_average                 gen     55.64
+# mmlu                  -          naive_average                 gen     68.30
+# cmmlu                 -          naive_average                 gen     53.29
+# ceval                 -          naive_average                 gen     52.32
+# GaokaoBench           -          weighted_average              gen     45.91
+# triviaqa_wiki_1shot   eaf81e     score                         gen     79.01
+# nq_open_1shot         01cf41     score                         gen     30.25
+# race-high             9a54b6     accuracy                      gen     81.22
+# winogrande            b36770     accuracy                      gen     66.46
+# hellaswag             e42710     accuracy                      gen     74.33
+# bbh                   -          naive_average                 gen     67.25
+# gsm8k                 1d7fe4     accuracy                      gen     79.08
+# math                  393424     accuracy                      gen     27.78
+# TheoremQA             6f0af8     score                         gen     19.50
+# openai_humaneval      8e312c     humaneval_pass@1              gen     55.49
+# sanitized_mbpp        830460     score                         gen     66.54
+# GPQA_diamond          4baadb     accuracy                      gen     25.76
+# IFEval                3321a3     Prompt-level-strict-accuracy  gen     67.84
+#                       -          -                             -       -
+# mmlu                  -          naive_average                 gen     68.30
+# mmlu-stem             -          naive_average                 gen     57.92
+# mmlu-social-science   -          naive_average                 gen     77.83
+# mmlu-humanities       -          naive_average                 gen     71.20
+# mmlu-other            -          naive_average                 gen     71.79
+# cmmlu                 -          naive_average                 gen     53.29
+# cmmlu-stem            -          naive_average                 gen     45.40
+# cmmlu-social-science  -          naive_average                 gen     54.63
+# cmmlu-humanities      -          naive_average                 gen     54.14
+# cmmlu-other           -          naive_average                 gen     59.52
+# cmmlu-china-specific  -          naive_average                 gen     49.33
+# ceval                 -          naive_average                 gen     52.32
+# ceval-stem            -          naive_average                 gen     48.16
+# ceval-social-science  -          naive_average                 gen     57.50
+# ceval-humanities      -          naive_average                 gen     53.26
+# ceval-other           -          naive_average                 gen     54.26
+# ceval-hard            -          naive_average                 gen     35.59
diff --git a/configs/models/aquila/hf_aquila2_34b.py b/configs/models/aquila/hf_aquila2_34b.py
index e0194a5ad..bb84b5499 100644
--- a/configs/models/aquila/hf_aquila2_34b.py
+++ b/configs/models/aquila/hf_aquila2_34b.py
@@ -1,24 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
+from opencompass.models import HuggingFaceBaseModel
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFaceBaseModel,
         abbr='aquila2-34b-hf',
-        path="BAAI/Aquila2-34B",
-        tokenizer_path='BAAI/Aquila2-34B',
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True,
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-            use_fast=False,
-        ),
-        max_out_len=100,
-        max_seq_len=2048,
+        path='BAAI/Aquila2-34B',
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=2, num_procs=1),
+        run_cfg=dict(num_gpus=2),
     )
 ]
diff --git a/configs/models/aquila/hf_aquila2_7b.py b/configs/models/aquila/hf_aquila2_7b.py
index 95af1f7de..0dab25381 100644
--- a/configs/models/aquila/hf_aquila2_7b.py
+++ b/configs/models/aquila/hf_aquila2_7b.py
@@ -1,24 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
+from opencompass.models import HuggingFaceBaseModel
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFaceBaseModel,
         abbr='aquila2-7b-hf',
-        path="BAAI/Aquila2-7B",
-        tokenizer_path='BAAI/Aquila2-7B',
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True,
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-            use_fast=False,
-        ),
-        max_out_len=100,
-        max_seq_len=2048,
+        path='BAAI/Aquila2-7B',
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=1, num_procs=1),
+        run_cfg=dict(num_gpus=1),
     )
 ]
diff --git a/configs/models/aquila/hf_aquilachat2_34b.py b/configs/models/aquila/hf_aquilachat2_34b.py
index 112b39dfd..b096239e8 100644
--- a/configs/models/aquila/hf_aquilachat2_34b.py
+++ b/configs/models/aquila/hf_aquilachat2_34b.py
@@ -5,7 +5,6 @@
         dict(role='HUMAN', begin='### Human: ', end='\n'),
         dict(role='BOT', begin='### Assistant: ', end='</s>', generate=True),
     ],
-    eos_token_id=100007,
 )
 
 models = [
diff --git a/configs/models/aquila/hf_aquilachat2_34b_16k.py b/configs/models/aquila/hf_aquilachat2_34b_16k.py
index ccf28dde1..b3c6b6c2a 100644
--- a/configs/models/aquila/hf_aquilachat2_34b_16k.py
+++ b/configs/models/aquila/hf_aquilachat2_34b_16k.py
@@ -6,7 +6,6 @@
         dict(role='HUMAN', begin='Human: ', end='###'),
         dict(role='BOT', begin='Assistant: ', end='</s>', generate=True),
     ],
-    eos_token_id=100007,
 )
 
 models = [
diff --git a/configs/models/aquila/hf_aquilachat2_7b.py b/configs/models/aquila/hf_aquilachat2_7b.py
index ff964d05a..3b318c3c9 100644
--- a/configs/models/aquila/hf_aquilachat2_7b.py
+++ b/configs/models/aquila/hf_aquilachat2_7b.py
@@ -5,7 +5,6 @@
         dict(role='HUMAN', begin='<|startofpiece|>', end=''),
         dict(role='BOT', begin='<|endofpiece|>', end='</s>', generate=True),
     ],
-    eos_token_id=2,
 )
 
 models = [
diff --git a/configs/models/aquila/hf_aquilachat2_7b_16k.py b/configs/models/aquila/hf_aquilachat2_7b_16k.py
index 557942593..40b8619e7 100644
--- a/configs/models/aquila/hf_aquilachat2_7b_16k.py
+++ b/configs/models/aquila/hf_aquilachat2_7b_16k.py
@@ -6,7 +6,6 @@
         dict(role='HUMAN', begin='Human: ', end='###'),
         dict(role='BOT', begin='Assistant: ', end='</s>', generate=True),
     ],
-    eos_token_id=100007,
 )
 
 models = [
diff --git a/configs/models/baichuan/hf_baichuan2_13b_base.py b/configs/models/baichuan/hf_baichuan2_13b_base.py
index 39651a24e..3d00925b0 100644
--- a/configs/models/baichuan/hf_baichuan2_13b_base.py
+++ b/configs/models/baichuan/hf_baichuan2_13b_base.py
@@ -1,21 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
+from opencompass.models import HuggingFaceBaseModel
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFaceBaseModel,
         abbr='baichuan2-13b-base-hf',
-        path="baichuan-inc/Baichuan2-13B-Base",
-        tokenizer_path='baichuan-inc/Baichuan2-13B-Base',
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-            use_fast=False,
-        ),
-        max_out_len=100,
-        max_seq_len=2048,
+        path='baichuan-inc/Baichuan2-13B-Base',
+        max_out_len=1024,
         batch_size=8,
-        model_kwargs=dict(device_map='auto', trust_remote_code=True),
-        run_cfg=dict(num_gpus=2, num_procs=1),
+        run_cfg=dict(num_gpus=1),
     )
 ]
diff --git a/configs/models/baichuan/hf_baichuan2_7b_base.py b/configs/models/baichuan/hf_baichuan2_7b_base.py
index f351c870a..88ebfde45 100644
--- a/configs/models/baichuan/hf_baichuan2_7b_base.py
+++ b/configs/models/baichuan/hf_baichuan2_7b_base.py
@@ -1,21 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
+from opencompass.models import HuggingFaceBaseModel
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFaceBaseModel,
         abbr='baichuan2-7b-base-hf',
-        path="baichuan-inc/Baichuan2-7B-Base",
-        tokenizer_path='baichuan-inc/Baichuan2-7B-Base',
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-            use_fast=False,
-        ),
-        max_out_len=100,
-        max_seq_len=2048,
+        path='baichuan-inc/Baichuan2-7B-Base',
+        max_out_len=1024,
         batch_size=8,
-        model_kwargs=dict(device_map='auto', trust_remote_code=True),
-        run_cfg=dict(num_gpus=1, num_procs=1),
+        run_cfg=dict(num_gpus=1),
     )
 ]
diff --git a/configs/models/bluelm/hf_bluelm_7b_base.py b/configs/models/bluelm/hf_bluelm_7b_base.py
index d7689864c..98ff33bae 100644
--- a/configs/models/bluelm/hf_bluelm_7b_base.py
+++ b/configs/models/bluelm/hf_bluelm_7b_base.py
@@ -1,24 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
+from opencompass.models import HuggingFaceBaseModel
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFaceBaseModel,
         abbr='bluelm-7b-base-hf',
-        path="vivo-ai/BlueLM-7B-Base",
-        tokenizer_path='vivo-ai/BlueLM-7B-Base',
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True,
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-            use_fast=False,
-        ),
-        max_out_len=100,
-        max_seq_len=2048,
+        path='vivo-ai/BlueLM-7B-Base',
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=1, num_procs=1),
+        run_cfg=dict(num_gpus=1),
     )
 ]
diff --git a/configs/models/bluelm/hf_bluelm_7b_base_32k.py b/configs/models/bluelm/hf_bluelm_7b_base_32k.py
index f319456f6..755dc25de 100644
--- a/configs/models/bluelm/hf_bluelm_7b_base_32k.py
+++ b/configs/models/bluelm/hf_bluelm_7b_base_32k.py
@@ -1,24 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
+from opencompass.models import HuggingFaceBaseModel
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFaceBaseModel,
         abbr='bluelm-7b-base-32k-hf',
-        path="vivo-ai/BlueLM-7B-Base-32K",
-        tokenizer_path='vivo-ai/BlueLM-7B-Base-32K',
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True,
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-            use_fast=False,
-        ),
-        max_out_len=100,
-        max_seq_len=4096,
+        path='vivo-ai/BlueLM-7B-Base-32K',
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=1, num_procs=1),
+        run_cfg=dict(num_gpus=1),
     )
 ]
diff --git a/configs/models/chatglm/hf_chatglm3_6b.py b/configs/models/chatglm/hf_chatglm3_6b.py
index c7182e1d2..449409638 100644
--- a/configs/models/chatglm/hf_chatglm3_6b.py
+++ b/configs/models/chatglm/hf_chatglm3_6b.py
@@ -1,31 +1,12 @@
-from opencompass.models import HuggingFaceChatGLM3
-
-api_meta_template = dict(
-    round=[
-        dict(role='HUMAN', api_role='HUMAN'),
-        dict(role='BOT', api_role='BOT', generate=True),
-    ]
-)
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
     dict(
-        type=HuggingFaceChatGLM3,
+        type=HuggingFacewithChatTemplate,
         abbr='chatglm3-6b-hf',
         path='THUDM/chatglm3-6b',
-        tokenizer_path='THUDM/chatglm3-6b',
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True,
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-        ),
-        meta_template=api_meta_template,
-        max_out_len=100,
-        max_seq_len=4096,
-        batch_size=1,
-        run_cfg=dict(num_gpus=1, num_procs=1)
+        max_out_len=1024,
+        batch_size=8,
+        run_cfg=dict(num_gpus=1),
     )
-]
\ No newline at end of file
+]
diff --git a/configs/models/chatglm/hf_chatglm3_6b_32k.py b/configs/models/chatglm/hf_chatglm3_6b_32k.py
index 26fc9b49e..2badff913 100644
--- a/configs/models/chatglm/hf_chatglm3_6b_32k.py
+++ b/configs/models/chatglm/hf_chatglm3_6b_32k.py
@@ -1,31 +1,12 @@
-from opencompass.models import HuggingFaceChatGLM3
-
-api_meta_template = dict(
-    round=[
-        dict(role='HUMAN', api_role='HUMAN'),
-        dict(role='BOT', api_role='BOT', generate=True),
-    ]
-)
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
     dict(
-        type=HuggingFaceChatGLM3,
+        type=HuggingFacewithChatTemplate,
         abbr='chatglm3-6b-32k-hf',
         path='THUDM/chatglm3-6b-32k',
-        tokenizer_path='THUDM/chatglm3-6b-32k',
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True,
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-        ),
-        meta_template=api_meta_template,
-        max_out_len=100,
-        max_seq_len=4096,
-        batch_size=1,
-        run_cfg=dict(num_gpus=1, num_procs=1)
+        max_out_len=1024,
+        batch_size=8,
+        run_cfg=dict(num_gpus=1),
     )
 ]
diff --git a/configs/models/chatglm/hf_chatglm3_6b_base.py b/configs/models/chatglm/hf_chatglm3_6b_base.py
index 17f5d5ba0..f88c0aac4 100644
--- a/configs/models/chatglm/hf_chatglm3_6b_base.py
+++ b/configs/models/chatglm/hf_chatglm3_6b_base.py
@@ -1,24 +1,12 @@
-from opencompass.models import HuggingFace
-
+from opencompass.models import HuggingFaceBaseModel
 
 models = [
     dict(
-        type=HuggingFace,
+        type=HuggingFaceBaseModel,
         abbr='chatglm3-6b-base-hf',
         path='THUDM/chatglm3-6b-base',
-        tokenizer_path='THUDM/chatglm3-6b-base',
-        model_kwargs=dict(
-            trust_remote_code=True,
-            device_map='auto',
-        ),
-        tokenizer_kwargs=dict(
-           padding_side='left',
-           truncation_side='left',
-           trust_remote_code=True,
-        ),
-        max_out_len=100,
-        max_seq_len=4096,
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=1, num_procs=1),
+        run_cfg=dict(num_gpus=1),
     )
 ]
diff --git a/configs/models/codellama/hf_codellama_13b.py b/configs/models/codellama/hf_codellama_13b.py
index 2267f923c..4596c9dd1 100644
--- a/configs/models/codellama/hf_codellama_13b.py
+++ b/configs/models/codellama/hf_codellama_13b.py
@@ -1,21 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
+from opencompass.models import HuggingFaceBaseModel
 
 models = [
-    # CodeLlama 13B
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFaceBaseModel,
         abbr='CodeLlama-13b',
-        path="codellama/CodeLlama-13b-hf",
-        tokenizer_path='codellama/CodeLlama-13b-hf',
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-        ),
+        path='codellama/CodeLlama-13b-hf',
         max_out_len=1024,
-        max_seq_len=2048,
         batch_size=8,
-        model_kwargs=dict(trust_remote_code=True, device_map='auto'),
-        run_cfg=dict(num_gpus=2, num_procs=1),
-    ),
+        run_cfg=dict(num_gpus=1),
+    )
 ]
diff --git a/configs/models/codellama/hf_codellama_13b_instruct.py b/configs/models/codellama/hf_codellama_13b_instruct.py
index 01830015e..a636c0fd6 100644
--- a/configs/models/codellama/hf_codellama_13b_instruct.py
+++ b/configs/models/codellama/hf_codellama_13b_instruct.py
@@ -1,21 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
-    # CodeLlama 13B Instruct
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFacewithChatTemplate,
         abbr='CodeLlama-13b-Instruct',
-        path="codellama/CodeLlama-13b-Instruct-hf",
-        tokenizer_path='codellama/CodeLlama-13b-Instruct-hf',
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-        ),
+        path='codellama/CodeLlama-13b-Instruct-hf',
         max_out_len=1024,
-        max_seq_len=2048,
         batch_size=8,
-        model_kwargs=dict(trust_remote_code=True, device_map='auto'),
-        run_cfg=dict(num_gpus=2, num_procs=1),
-    ),
+        run_cfg=dict(num_gpus=1),
+    )
 ]
diff --git a/configs/models/codellama/hf_codellama_13b_python.py b/configs/models/codellama/hf_codellama_13b_python.py
index 4c5ea0d17..07f44aa8d 100644
--- a/configs/models/codellama/hf_codellama_13b_python.py
+++ b/configs/models/codellama/hf_codellama_13b_python.py
@@ -1,21 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
+from opencompass.models import HuggingFaceBaseModel
 
 models = [
-    # CodeLlama 13B Python
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFaceBaseModel,
         abbr='CodeLlama-13b-Python',
-        path="codellama/CodeLlama-13b-Python-hf",
-        tokenizer_path='codellama/CodeLlama-13b-Python-hf',
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-        ),
+        path='codellama/CodeLlama-13b-Python-hf',
         max_out_len=1024,
-        max_seq_len=2048,
         batch_size=8,
-        model_kwargs=dict(trust_remote_code=True, device_map='auto'),
-        run_cfg=dict(num_gpus=2, num_procs=1),
-    ),
+        run_cfg=dict(num_gpus=1),
+    )
 ]
diff --git a/configs/models/codellama/hf_codellama_34b.py b/configs/models/codellama/hf_codellama_34b.py
index e6dbef890..599fadda8 100644
--- a/configs/models/codellama/hf_codellama_34b.py
+++ b/configs/models/codellama/hf_codellama_34b.py
@@ -1,21 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
+from opencompass.models import HuggingFaceBaseModel
 
 models = [
-    # CodeLlama 34B
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFaceBaseModel,
         abbr='CodeLlama-34b',
-        path="codellama/CodeLlama-34b-hf",
-        tokenizer_path='codellama/CodeLlama-34b-hf',
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-        ),
+        path='codellama/CodeLlama-34b-hf',
         max_out_len=1024,
-        max_seq_len=2048,
         batch_size=8,
-        model_kwargs=dict(trust_remote_code=True, device_map='auto'),
-        run_cfg=dict(num_gpus=4, num_procs=1),
-    ),
+        run_cfg=dict(num_gpus=2),
+    )
 ]
diff --git a/configs/models/codellama/hf_codellama_34b_instruct.py b/configs/models/codellama/hf_codellama_34b_instruct.py
index 63894fd2f..ae2ae79be 100644
--- a/configs/models/codellama/hf_codellama_34b_instruct.py
+++ b/configs/models/codellama/hf_codellama_34b_instruct.py
@@ -1,21 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
-    # CodeLlama 34B Instruct
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFacewithChatTemplate,
         abbr='CodeLlama-34b-Instruct',
-        path="codellama/CodeLlama-34b-Instruct-hf",
-        tokenizer_path='codellama/CodeLlama-34b-Instruct-hf',
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-        ),
+        path='codellama/CodeLlama-34b-Instruct-hf',
         max_out_len=1024,
-        max_seq_len=2048,
         batch_size=8,
-        model_kwargs=dict(trust_remote_code=True, device_map='auto'),
-        run_cfg=dict(num_gpus=4, num_procs=1),
-    ),
+        run_cfg=dict(num_gpus=2),
+    )
 ]
diff --git a/configs/models/codellama/hf_codellama_34b_python.py b/configs/models/codellama/hf_codellama_34b_python.py
index 4ac82de8d..a58d61f87 100644
--- a/configs/models/codellama/hf_codellama_34b_python.py
+++ b/configs/models/codellama/hf_codellama_34b_python.py
@@ -1,21 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
+from opencompass.models import HuggingFaceBaseModel
 
 models = [
-    # CodeLlama 34B Python
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFaceBaseModel,
         abbr='CodeLlama-34b-Python',
-        path="codellama/CodeLlama-34b-Python-hf",
-        tokenizer_path='codellama/CodeLlama-34b-Python-hf',
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-        ),
+        path='codellama/CodeLlama-34b-Python-hf',
         max_out_len=1024,
-        max_seq_len=2048,
         batch_size=8,
-        model_kwargs=dict(trust_remote_code=True, device_map='auto'),
-        run_cfg=dict(num_gpus=4, num_procs=1),
-    ),
+        run_cfg=dict(num_gpus=2),
+    )
 ]
diff --git a/configs/models/codellama/hf_codellama_70b.py b/configs/models/codellama/hf_codellama_70b.py
new file mode 100644
index 000000000..7fd8e8667
--- /dev/null
+++ b/configs/models/codellama/hf_codellama_70b.py
@@ -0,0 +1,12 @@
+from opencompass.models import HuggingFaceBaseModel
+
+models = [
+    dict(
+        type=HuggingFaceBaseModel,
+        abbr='CodeLlama-70b',
+        path='codellama/CodeLlama-70b-hf',
+        max_out_len=1024,
+        batch_size=8,
+        run_cfg=dict(num_gpus=4),
+    )
+]
diff --git a/configs/models/codellama/hf_codellama_70b_instruct.py b/configs/models/codellama/hf_codellama_70b_instruct.py
new file mode 100644
index 000000000..38c158711
--- /dev/null
+++ b/configs/models/codellama/hf_codellama_70b_instruct.py
@@ -0,0 +1,12 @@
+from opencompass.models import HuggingFacewithChatTemplate
+
+models = [
+    dict(
+        type=HuggingFacewithChatTemplate,
+        abbr='CodeLlama-70b-Instruct',
+        path='codellama/CodeLlama-70b-Instruct-hf',
+        max_out_len=1024,
+        batch_size=8,
+        run_cfg=dict(num_gpus=4),
+    )
+]
diff --git a/configs/models/codellama/hf_codellama_70b_python.py b/configs/models/codellama/hf_codellama_70b_python.py
new file mode 100644
index 000000000..83d62f2de
--- /dev/null
+++ b/configs/models/codellama/hf_codellama_70b_python.py
@@ -0,0 +1,12 @@
+from opencompass.models import HuggingFaceBaseModel
+
+models = [
+    dict(
+        type=HuggingFaceBaseModel,
+        abbr='CodeLlama-70b-Python',
+        path='codellama/CodeLlama-70b-Python-hf',
+        max_out_len=1024,
+        batch_size=8,
+        run_cfg=dict(num_gpus=4),
+    )
+]
diff --git a/configs/models/codellama/hf_codellama_7b.py b/configs/models/codellama/hf_codellama_7b.py
index b66f50957..a4c57dfdc 100644
--- a/configs/models/codellama/hf_codellama_7b.py
+++ b/configs/models/codellama/hf_codellama_7b.py
@@ -1,21 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
+from opencompass.models import HuggingFaceBaseModel
 
 models = [
-    # CodeLlama 7B
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFaceBaseModel,
         abbr='CodeLlama-7b',
-        path="codellama/CodeLlama-7b-hf",
-        tokenizer_path='codellama/CodeLlama-7b-hf',
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-        ),
+        path='codellama/CodeLlama-7b-hf',
         max_out_len=1024,
-        max_seq_len=2048,
         batch_size=8,
-        model_kwargs=dict(trust_remote_code=True, device_map='auto'),
-        run_cfg=dict(num_gpus=1, num_procs=1),
-    ),
+        run_cfg=dict(num_gpus=1),
+    )
 ]
diff --git a/configs/models/codellama/hf_codellama_7b_instruct.py b/configs/models/codellama/hf_codellama_7b_instruct.py
index 1ae4ef846..aadb87e20 100644
--- a/configs/models/codellama/hf_codellama_7b_instruct.py
+++ b/configs/models/codellama/hf_codellama_7b_instruct.py
@@ -1,21 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
-    # CodeLlama 7B Instruct
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFacewithChatTemplate,
         abbr='CodeLlama-7b-Instruct',
-        path="codellama/CodeLlama-7b-Instruct-hf",
-        tokenizer_path='codellama/CodeLlama-7b-Instruct-hf',
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-        ),
+        path='codellama/CodeLlama-7b-Instruct-hf',
         max_out_len=1024,
-        max_seq_len=2048,
         batch_size=8,
-        model_kwargs=dict(trust_remote_code=True, device_map='auto'),
-        run_cfg=dict(num_gpus=1, num_procs=1),
-    ),
+        run_cfg=dict(num_gpus=1),
+    )
 ]
diff --git a/configs/models/codellama/hf_codellama_7b_python.py b/configs/models/codellama/hf_codellama_7b_python.py
index b0cae6da5..6504fa36e 100644
--- a/configs/models/codellama/hf_codellama_7b_python.py
+++ b/configs/models/codellama/hf_codellama_7b_python.py
@@ -1,21 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
+from opencompass.models import HuggingFaceBaseModel
 
 models = [
-    # CodeLlama 7B Python
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFaceBaseModel,
         abbr='CodeLlama-7b-Python',
-        path="codellama/CodeLlama-7b-Python-hf",
-        tokenizer_path='codellama/CodeLlama-7b-Python-hf',
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-        ),
+        path='codellama/CodeLlama-7b-Python-hf',
         max_out_len=1024,
-        max_seq_len=2048,
         batch_size=8,
-        model_kwargs=dict(trust_remote_code=True, device_map='auto'),
-        run_cfg=dict(num_gpus=1, num_procs=1),
-    ),
+        run_cfg=dict(num_gpus=1),
+    )
 ]
diff --git a/configs/models/deepseek/hf_deepseek_67b_base.py b/configs/models/deepseek/hf_deepseek_67b_base.py
index c7378daeb..1ec3e2113 100644
--- a/configs/models/deepseek/hf_deepseek_67b_base.py
+++ b/configs/models/deepseek/hf_deepseek_67b_base.py
@@ -1,24 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
+from opencompass.models import HuggingFaceBaseModel
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFaceBaseModel,
         abbr='deepseek-67b-base-hf',
-        path="deepseek-ai/deepseek-llm-67b-base",
-        tokenizer_path='deepseek-ai/deepseek-llm-67b-base',
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True,
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-            use_fast=False,
-        ),
-        max_out_len=100,
-        max_seq_len=2048,
+        path='deepseek-ai/deepseek-llm-67b-base',
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=4, num_procs=1),
+        run_cfg=dict(num_gpus=4),
     )
 ]
diff --git a/configs/models/deepseek/hf_deepseek_67b_chat.py b/configs/models/deepseek/hf_deepseek_67b_chat.py
index 8eaadfc7b..d56d567e4 100644
--- a/configs/models/deepseek/hf_deepseek_67b_chat.py
+++ b/configs/models/deepseek/hf_deepseek_67b_chat.py
@@ -1,33 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
-
-_meta_template = dict(
-    begin='<｜begin▁of▁sentence｜>',
-    round=[
-        dict(role="HUMAN", begin='User: ', end='\n\n'),
-        dict(role="BOT", begin="Assistant: ", end='<｜end▁of▁sentence｜>', generate=True),
-    ],
-)
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFacewithChatTemplate,
         abbr='deepseek-67b-chat-hf',
-        path="deepseek-ai/deepseek-llm-67b-chat",
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True,
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-            use_fast=False,
-        ),
-        meta_template=_meta_template,
-        max_out_len=100,
-        max_seq_len=2048,
+        path='deepseek-ai/deepseek-llm-67b-chat',
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=4, num_procs=1),
-        batch_padding=True,
+        run_cfg=dict(num_gpus=4),
     )
 ]
diff --git a/configs/models/deepseek/hf_deepseek_7b_base.py b/configs/models/deepseek/hf_deepseek_7b_base.py
index 9985932f3..0aea3b621 100644
--- a/configs/models/deepseek/hf_deepseek_7b_base.py
+++ b/configs/models/deepseek/hf_deepseek_7b_base.py
@@ -1,24 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
+from opencompass.models import HuggingFaceBaseModel
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFaceBaseModel,
         abbr='deepseek-7b-base-hf',
-        path="deepseek-ai/deepseek-llm-7b-base",
-        tokenizer_path='deepseek-ai/deepseek-llm-7b-base',
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True,
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-            use_fast=False,
-        ),
-        max_out_len=100,
-        max_seq_len=2048,
+        path='deepseek-ai/deepseek-llm-7b-base',
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=1, num_procs=1),
+        run_cfg=dict(num_gpus=1),
     )
 ]
diff --git a/configs/models/deepseek/hf_deepseek_7b_chat.py b/configs/models/deepseek/hf_deepseek_7b_chat.py
index 2531961b4..3ed5044af 100644
--- a/configs/models/deepseek/hf_deepseek_7b_chat.py
+++ b/configs/models/deepseek/hf_deepseek_7b_chat.py
@@ -1,33 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
-
-_meta_template = dict(
-    begin='<｜begin▁of▁sentence｜>',
-    round=[
-        dict(role="HUMAN", begin='User: ', end='\n\n'),
-        dict(role="BOT", begin="Assistant: ", end='<｜end▁of▁sentence｜>', generate=True),
-    ],
-)
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFacewithChatTemplate,
         abbr='deepseek-7b-chat-hf',
-        path="deepseek-ai/deepseek-llm-7b-chat",
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True,
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-            use_fast=False,
-        ),
-        meta_template=_meta_template,
-        max_out_len=100,
-        max_seq_len=2048,
+        path='deepseek-ai/deepseek-llm-7b-chat',
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=1, num_procs=1),
-        batch_padding=True,
+        run_cfg=dict(num_gpus=1),
     )
 ]
diff --git a/configs/models/deepseek/hf_deepseek_coder_1_3b_instruct.py b/configs/models/deepseek/hf_deepseek_coder_1_3b_instruct.py
index c53b07b95..85cafd9b9 100644
--- a/configs/models/deepseek/hf_deepseek_coder_1_3b_instruct.py
+++ b/configs/models/deepseek/hf_deepseek_coder_1_3b_instruct.py
@@ -1,34 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
-
-_meta_template = dict(
-    round=[
-        dict(role="HUMAN", begin='### Instruction:\n', end='\n'),
-        dict(role="BOT", begin="### Response:\n", end='<|EOT|>', generate=True),
-    ],
-    eos_token_id=100001,
-)
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFacewithChatTemplate,
         abbr='deepseek-coder-1.3b-hf',
-        path="deepseek-ai/deepseek-coder-1.3b-instruct",
-        tokenizer_path='deepseek-ai/deepseek-coder-1.3b-instruct',
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True,
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-            use_fast=False,
-        ),
-        meta_template=_meta_template,
-        max_out_len=2048,
-        max_seq_len=2048,
+        path='deepseek-ai/deepseek-coder-1.3b-instruct',
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=1, num_procs=1),
-        end_str='<|EOT|>',
+        run_cfg=dict(num_gpus=1),
     )
-]
\ No newline at end of file
+]
diff --git a/configs/models/deepseek/hf_deepseek_coder_33b_instruct.py b/configs/models/deepseek/hf_deepseek_coder_33b_instruct.py
index 87bdf3caf..3e3e21dde 100644
--- a/configs/models/deepseek/hf_deepseek_coder_33b_instruct.py
+++ b/configs/models/deepseek/hf_deepseek_coder_33b_instruct.py
@@ -1,34 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
-
-_meta_template = dict(
-    round=[
-        dict(role="HUMAN", begin='### Instruction:\n', end='\n'),
-        dict(role="BOT", begin="### Response:\n", end='<|EOT|>', generate=True),
-    ],
-    eos_token_id=100001,
-)
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFacewithChatTemplate,
         abbr='deepseek-coder-33b-hf',
-        path="deepseek-ai/deepseek-coder-33b-instruct",
-        tokenizer_path='deepseek-ai/deepseek-coder-33b-instruct',
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True,
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-            use_fast=False,
-        ),
-        meta_template=_meta_template,
-        max_out_len=2048,
-        max_seq_len=2048,
+        path='deepseek-ai/deepseek-coder-33b-instruct',
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=4, num_procs=1),
-        end_str='<|EOT|>',
+        run_cfg=dict(num_gpus=2),
     )
-]
\ No newline at end of file
+]
diff --git a/configs/models/deepseek/hf_deepseek_coder_6_7b_instruct.py b/configs/models/deepseek/hf_deepseek_coder_6_7b_instruct.py
index 59669a9a0..110e77ec3 100644
--- a/configs/models/deepseek/hf_deepseek_coder_6_7b_instruct.py
+++ b/configs/models/deepseek/hf_deepseek_coder_6_7b_instruct.py
@@ -1,34 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
-
-_meta_template = dict(
-    round=[
-        dict(role="HUMAN", begin='### Instruction:\n', end='\n'),
-        dict(role="BOT", begin="### Response:\n", end='<|EOT|>', generate=True),
-    ],
-    eos_token_id=100001,
-)
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFacewithChatTemplate,
         abbr='deepseek-coder-6.7b-hf',
-        path="deepseek-ai/deepseek-coder-6.7b-instruct",
-        tokenizer_path='deepseek-ai/deepseek-coder-6.7b-instruct',
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True,
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-            use_fast=False,
-        ),
-        meta_template=_meta_template,
-        max_out_len=2048,
-        max_seq_len=2048,
+        path='deepseek-ai/deepseek-coder-6.7b-instruct',
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=1, num_procs=1),
-        end_str='<|EOT|>',
+        run_cfg=dict(num_gpus=1),
     )
-]
\ No newline at end of file
+]
diff --git a/configs/models/deepseek/hf_deepseek_moe_16b_base.py b/configs/models/deepseek/hf_deepseek_moe_16b_base.py
index 0c7f75c8e..4f299d74f 100644
--- a/configs/models/deepseek/hf_deepseek_moe_16b_base.py
+++ b/configs/models/deepseek/hf_deepseek_moe_16b_base.py
@@ -1,24 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
+from opencompass.models import HuggingFaceBaseModel
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFaceBaseModel,
         abbr='deepseek-moe-16b-base-hf',
-        path="deepseek-ai/deepseek-moe-16b-base",
-        tokenizer_path='deepseek-ai/deepseek-moe-16b-base',
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True,
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-        ),
-        max_out_len=100,
-        min_out_len=3,
-        max_seq_len=2048,
+        path='deepseek-ai/deepseek-moe-16b-base',
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=2, num_procs=1),
+        run_cfg=dict(num_gpus=1),
     )
 ]
diff --git a/configs/models/deepseek/hf_deepseek_moe_16b_chat.py b/configs/models/deepseek/hf_deepseek_moe_16b_chat.py
index a009ff0c0..85460f653 100644
--- a/configs/models/deepseek/hf_deepseek_moe_16b_chat.py
+++ b/configs/models/deepseek/hf_deepseek_moe_16b_chat.py
@@ -1,32 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
-
-_meta_template = dict(
-    begin='<｜begin▁of▁sentence｜>',
-    round=[
-        dict(role="HUMAN", begin='User: ', end='\n\n'),
-        dict(role="BOT", begin="Assistant: ", end='<｜end▁of▁sentence｜>', generate=True),
-    ],
-)
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFacewithChatTemplate,
         abbr='deepseek-moe-16b-chat-hf',
-        path="deepseek-ai/deepseek-moe-16b-chat",
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True,
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-        ),
-        meta_template=_meta_template,
-        max_out_len=100,
-        max_seq_len=2048,
+        path='deepseek-ai/deepseek-moe-16b-chat',
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=1, num_procs=1),
-        batch_padding=True,
+        run_cfg=dict(num_gpus=1),
     )
 ]
diff --git a/configs/models/falcon/hf_falcon_40b.py b/configs/models/falcon/hf_falcon_40b.py
index da089dfc4..d5cf827ba 100644
--- a/configs/models/falcon/hf_falcon_40b.py
+++ b/configs/models/falcon/hf_falcon_40b.py
@@ -1,21 +1,12 @@
-# Only torch >=2.0 is supported for falcon-40b
-from opencompass.models import HuggingFaceCausalLM
+from opencompass.models import HuggingFaceBaseModel
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFaceBaseModel,
         abbr='falcon-40b-hf',
         path='tiiuae/falcon-40b',
-        tokenizer_path='tiiuae/falcon-40b',
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-        ),
-        max_out_len=100,
-        max_seq_len=2048,
+        max_out_len=1024,
         batch_size=8,
-        model_kwargs=dict(trust_remote_code=True, device_map='auto', revision='561820f7eef0cc56a31ea38af15ca1acb07fab5d'),
-        run_cfg=dict(num_gpus=4, num_procs=1),
+        run_cfg=dict(num_gpus=4),
     )
 ]
diff --git a/configs/models/falcon/hf_falcon_7b.py b/configs/models/falcon/hf_falcon_7b.py
index cab4f61ac..6c42355cd 100644
--- a/configs/models/falcon/hf_falcon_7b.py
+++ b/configs/models/falcon/hf_falcon_7b.py
@@ -1,21 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
-
+from opencompass.models import HuggingFaceBaseModel
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFaceBaseModel,
         abbr='falcon-7b-hf',
         path='tiiuae/falcon-7b',
-        tokenizer_path='tiiuae/falcon-7b',
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-        ),
-        max_out_len=100,
-        max_seq_len=2048,
+        max_out_len=1024,
         batch_size=8,
-        model_kwargs=dict(trust_remote_code=True, device_map='auto', revision='2f5c3cd4eace6be6c0f12981f377fb35e5bf6ee5'),
-        run_cfg=dict(num_gpus=1, num_procs=1),
+        run_cfg=dict(num_gpus=1),
     )
 ]
diff --git a/configs/models/gemma/hf_gemma_2b.py b/configs/models/gemma/hf_gemma_2b.py
index ec731c481..966ff397f 100644
--- a/configs/models/gemma/hf_gemma_2b.py
+++ b/configs/models/gemma/hf_gemma_2b.py
@@ -1,23 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
+from opencompass.models import HuggingFaceBaseModel
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFaceBaseModel,
         abbr='gemma-2b-hf',
-        path="google/gemma-2b",
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-            use_fast=False,
-        ),
-        max_out_len=100,
-        max_seq_len=2048,
+        path='google/gemma-2b',
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=1, num_procs=1),
+        run_cfg=dict(num_gpus=1),
     )
 ]
diff --git a/configs/models/gemma/hf_gemma_2b_it.py b/configs/models/gemma/hf_gemma_2b_it.py
index 0075484b0..32f89e7d6 100644
--- a/configs/models/gemma/hf_gemma_2b_it.py
+++ b/configs/models/gemma/hf_gemma_2b_it.py
@@ -1,33 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
-
-_meta_template = dict(
-    round=[
-        dict(role="HUMAN", begin='<start_of_turn>user\n', end='<end_of_turn>\n'),
-        dict(role="BOT", begin="<start_of_turn>model\n", end='<end_of_turn>\n', generate=True),
-    ],
-)
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFacewithChatTemplate,
         abbr='gemma-2b-it-hf',
-        path="google/gemma-2b-it",
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-            use_fast=False,
-        ),
-        meta_template=_meta_template,
-        min_out_len=1,
-        max_out_len=100,
-        max_seq_len=2048,
+        path='google/gemma-2b-it',
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=1, num_procs=1),
-        batch_padding=True,
+        run_cfg=dict(num_gpus=1),
     )
 ]
diff --git a/configs/models/gemma/hf_gemma_7b.py b/configs/models/gemma/hf_gemma_7b.py
index 842ea263c..d7c3b6d48 100644
--- a/configs/models/gemma/hf_gemma_7b.py
+++ b/configs/models/gemma/hf_gemma_7b.py
@@ -1,23 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
+from opencompass.models import HuggingFaceBaseModel
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFaceBaseModel,
         abbr='gemma-7b-hf',
-        path="google/gemma-7b",
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-            use_fast=False,
-        ),
-        max_out_len=100,
-        max_seq_len=2048,
+        path='google/gemma-7b',
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=1, num_procs=1),
+        run_cfg=dict(num_gpus=1),
     )
 ]
diff --git a/configs/models/gemma/hf_gemma_7b_it.py b/configs/models/gemma/hf_gemma_7b_it.py
index b913db6e0..4699a17b2 100644
--- a/configs/models/gemma/hf_gemma_7b_it.py
+++ b/configs/models/gemma/hf_gemma_7b_it.py
@@ -1,33 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
-
-_meta_template = dict(
-    round=[
-        dict(role="HUMAN", begin='<start_of_turn>user\n', end='<end_of_turn>\n'),
-        dict(role="BOT", begin="<start_of_turn>model\n", end='<end_of_turn>\n', generate=True),
-    ],
-)
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFacewithChatTemplate,
         abbr='gemma-7b-it-hf',
-        path="google/gemma-7b-it",
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-            use_fast=False,
-        ),
-        meta_template=_meta_template,
-        min_out_len=1,
-        max_out_len=100,
-        max_seq_len=2048,
+        path='google/gemma-7b-it',
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=1, num_procs=1),
-        batch_padding=True,
+        run_cfg=dict(num_gpus=1),
     )
 ]
diff --git a/configs/models/hf_internlm/hf_internlm2_1_8b.py b/configs/models/hf_internlm/hf_internlm2_1_8b.py
index 7f0ae9cd3..3494c8a6f 100644
--- a/configs/models/hf_internlm/hf_internlm2_1_8b.py
+++ b/configs/models/hf_internlm/hf_internlm2_1_8b.py
@@ -1,26 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
-
+from opencompass.models import HuggingFaceBaseModel
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFaceBaseModel,
         abbr='internlm2-1.8b-hf',
         path="internlm/internlm2-1_8b",
-        tokenizer_path='internlm/internlm2-1_8b',
-        model_kwargs=dict(
-            trust_remote_code=True,
-            device_map='auto',
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            use_fast=False,
-            trust_remote_code=True,
-        ),
-        max_out_len=100,
-        min_out_len=1,
-        max_seq_len=2048,
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=1, num_procs=1),
+        run_cfg=dict(num_gpus=1),
     )
 ]
diff --git a/configs/models/hf_internlm/hf_internlm2_20b.py b/configs/models/hf_internlm/hf_internlm2_20b.py
index 10ec2e411..f99518f97 100644
--- a/configs/models/hf_internlm/hf_internlm2_20b.py
+++ b/configs/models/hf_internlm/hf_internlm2_20b.py
@@ -1,26 +1,13 @@
-from opencompass.models import HuggingFaceCausalLM
+from opencompass.models import HuggingFaceBaseModel
 
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFaceBaseModel,
         abbr='internlm2-20b-hf',
         path="internlm/internlm2-20b",
-        tokenizer_path='internlm/internlm2-20b',
-        model_kwargs=dict(
-            trust_remote_code=True,
-            device_map='auto',
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            use_fast=False,
-            trust_remote_code=True,
-        ),
-        max_out_len=100,
-        min_out_len=1,
-        max_seq_len=2048,
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=2, num_procs=1),
+        run_cfg=dict(num_gpus=2),
     )
 ]
diff --git a/configs/models/hf_internlm/hf_internlm2_7b.py b/configs/models/hf_internlm/hf_internlm2_7b.py
index 990d48f02..054f5e968 100644
--- a/configs/models/hf_internlm/hf_internlm2_7b.py
+++ b/configs/models/hf_internlm/hf_internlm2_7b.py
@@ -1,26 +1,13 @@
-from opencompass.models import HuggingFaceCausalLM
+from opencompass.models import HuggingFaceBaseModel
 
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFaceBaseModel,
         abbr='internlm2-7b-hf',
         path="internlm/internlm2-7b",
-        tokenizer_path='internlm/internlm2-7b',
-        model_kwargs=dict(
-            trust_remote_code=True,
-            device_map='auto',
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            use_fast=False,
-            trust_remote_code=True,
-        ),
-        max_out_len=100,
-        min_out_len=1,
-        max_seq_len=2048,
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=1, num_procs=1),
+        run_cfg=dict(num_gpus=1),
     )
 ]
diff --git a/configs/models/hf_internlm/hf_internlm2_base_20b.py b/configs/models/hf_internlm/hf_internlm2_base_20b.py
index 1b8250046..cb8ff73ed 100644
--- a/configs/models/hf_internlm/hf_internlm2_base_20b.py
+++ b/configs/models/hf_internlm/hf_internlm2_base_20b.py
@@ -1,26 +1,13 @@
-from opencompass.models import HuggingFaceCausalLM
+from opencompass.models import HuggingFaceBaseModel
 
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFaceBaseModel,
         abbr='internlm2-base-20b-hf',
         path="internlm/internlm2-base-20b",
-        tokenizer_path='internlm/internlm2-base-20b',
-        model_kwargs=dict(
-            trust_remote_code=True,
-            device_map='auto',
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            use_fast=False,
-            trust_remote_code=True,
-        ),
-        max_out_len=100,
-        min_out_len=1,
-        max_seq_len=2048,
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=2, num_procs=1),
+        run_cfg=dict(num_gpus=2),
     )
 ]
diff --git a/configs/models/hf_internlm/hf_internlm2_base_7b.py b/configs/models/hf_internlm/hf_internlm2_base_7b.py
index ae43fe847..d47cf3e0e 100644
--- a/configs/models/hf_internlm/hf_internlm2_base_7b.py
+++ b/configs/models/hf_internlm/hf_internlm2_base_7b.py
@@ -1,26 +1,13 @@
-from opencompass.models import HuggingFaceCausalLM
+from opencompass.models import HuggingFaceBaseModel
 
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFaceBaseModel,
         abbr='internlm2-base-7b-hf',
         path="internlm/internlm2-base-7b",
-        tokenizer_path='internlm/internlm2-base-7b',
-        model_kwargs=dict(
-            trust_remote_code=True,
-            device_map='auto',
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            use_fast=False,
-            trust_remote_code=True,
-        ),
-        max_out_len=100,
-        min_out_len=1,
-        max_seq_len=2048,
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=1, num_procs=1),
+        run_cfg=dict(num_gpus=1),
     )
 ]
diff --git a/configs/models/hf_internlm/hf_internlm2_chat_1_8b.py b/configs/models/hf_internlm/hf_internlm2_chat_1_8b.py
index 81c7d35d1..18a707141 100644
--- a/configs/models/hf_internlm/hf_internlm2_chat_1_8b.py
+++ b/configs/models/hf_internlm/hf_internlm2_chat_1_8b.py
@@ -1,36 +1,13 @@
-from opencompass.models import HuggingFaceCausalLM
-
-
-_meta_template = dict(
-    round=[
-        dict(role='HUMAN', begin='<|im_start|>user\n', end='<|im_end|>\n'),
-        dict(role='BOT', begin='<|im_start|>assistant\n', end='<|im_end|>\n', generate=True),
-    ],
-)
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFacewithChatTemplate,
         abbr='internlm2-chat-1.8b-hf',
-        path="internlm/internlm2-chat-1_8b",
-        tokenizer_path='internlm/internlm2-chat-1_8b',
-        model_kwargs=dict(
-            trust_remote_code=True,
-            device_map='auto',
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            use_fast=False,
-            trust_remote_code=True,
-        ),
-        max_out_len=100,
-        max_seq_len=2048,
+        path='internlm/internlm2-chat-1_8b',
+        max_out_len=1024,
         batch_size=8,
-        meta_template=_meta_template,
-        run_cfg=dict(num_gpus=1, num_procs=1),
-        end_str='<|im_end|>',
-        generation_kwargs = {"eos_token_id": [2, 92542]},
-        batch_padding=True,
+        run_cfg=dict(num_gpus=1),
+        stop_words=['</s>', '<|im_end|>'],
     )
 ]
diff --git a/configs/models/hf_internlm/hf_internlm2_chat_1_8b_sft.py b/configs/models/hf_internlm/hf_internlm2_chat_1_8b_sft.py
index 6228ea570..aee7d21fd 100644
--- a/configs/models/hf_internlm/hf_internlm2_chat_1_8b_sft.py
+++ b/configs/models/hf_internlm/hf_internlm2_chat_1_8b_sft.py
@@ -1,36 +1,13 @@
-from opencompass.models import HuggingFaceCausalLM
-
-
-_meta_template = dict(
-    round=[
-        dict(role='HUMAN', begin='<|im_start|>user\n', end='<|im_end|>\n'),
-        dict(role='BOT', begin='<|im_start|>assistant\n', end='<|im_end|>\n', generate=True),
-    ],
-)
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFacewithChatTemplate,
         abbr='internlm2-chat-1.8b-sft-hf',
-        path="internlm/internlm2-chat-1_8b-sft",
-        tokenizer_path='internlm/internlm2-chat-1_8b-sft',
-        model_kwargs=dict(
-            trust_remote_code=True,
-            device_map='auto',
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            use_fast=False,
-            trust_remote_code=True,
-        ),
-        max_out_len=100,
-        max_seq_len=2048,
+        path='internlm/internlm2-chat-1_8b-sft',
+        max_out_len=1024,
         batch_size=8,
-        meta_template=_meta_template,
-        run_cfg=dict(num_gpus=1, num_procs=1),
-        end_str='<|im_end|>',
-        generation_kwargs = {"eos_token_id": [2, 92542]},
-        batch_padding=True,
+        run_cfg=dict(num_gpus=1),
+        stop_words=['</s>', '<|im_end|>'],
     )
 ]
diff --git a/configs/models/hf_internlm/hf_internlm2_chat_20b.py b/configs/models/hf_internlm/hf_internlm2_chat_20b.py
index c35e1701b..69af8f885 100644
--- a/configs/models/hf_internlm/hf_internlm2_chat_20b.py
+++ b/configs/models/hf_internlm/hf_internlm2_chat_20b.py
@@ -1,36 +1,13 @@
-from opencompass.models import HuggingFaceCausalLM
-
-
-_meta_template = dict(
-    round=[
-        dict(role='HUMAN', begin='<|im_start|>user\n', end='<|im_end|>\n'),
-        dict(role='BOT', begin='<|im_start|>assistant\n', end='<|im_end|>\n', generate=True),
-    ],
-)
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFacewithChatTemplate,
         abbr='internlm2-chat-20b-hf',
-        path="internlm/internlm2-chat-20b",
-        tokenizer_path='internlm/internlm2-chat-20b',
-        model_kwargs=dict(
-            trust_remote_code=True,
-            device_map='auto',
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            use_fast=False,
-            trust_remote_code=True,
-        ),
-        max_out_len=100,
-        max_seq_len=2048,
+        path='internlm/internlm2-chat-20b',
+        max_out_len=1024,
         batch_size=8,
-        meta_template=_meta_template,
-        run_cfg=dict(num_gpus=2, num_procs=1),
-        end_str='<|im_end|>',
-        generation_kwargs = {"eos_token_id": [2, 92542]},
-        batch_padding=True,
+        run_cfg=dict(num_gpus=2),
+        stop_words=['</s>', '<|im_end|>'],
     )
 ]
diff --git a/configs/models/hf_internlm/hf_internlm2_chat_20b_sft.py b/configs/models/hf_internlm/hf_internlm2_chat_20b_sft.py
index 53844f5c8..94a0b8e9a 100644
--- a/configs/models/hf_internlm/hf_internlm2_chat_20b_sft.py
+++ b/configs/models/hf_internlm/hf_internlm2_chat_20b_sft.py
@@ -1,36 +1,13 @@
-from opencompass.models import HuggingFaceCausalLM
-
-
-_meta_template = dict(
-    round=[
-        dict(role='HUMAN', begin='<|im_start|>user\n', end='<|im_end|>\n'),
-        dict(role='BOT', begin='<|im_start|>assistant\n', end='<|im_end|>\n', generate=True),
-    ],
-)
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFacewithChatTemplate,
         abbr='internlm2-chat-20b-sft-hf',
-        path="internlm/internlm2-chat-20b-sft",
-        tokenizer_path='internlm/internlm2-chat-20b-sft',
-        model_kwargs=dict(
-            trust_remote_code=True,
-            device_map='auto',
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            use_fast=False,
-            trust_remote_code=True,
-        ),
-        max_out_len=100,
-        max_seq_len=2048,
+        path='internlm/internlm2-chat-20b-sft',
+        max_out_len=1024,
         batch_size=8,
-        meta_template=_meta_template,
-        run_cfg=dict(num_gpus=2, num_procs=1),
-        end_str='<|im_end|>',
-        generation_kwargs = {"eos_token_id": [2, 92542]},
-        batch_padding=True,
+        run_cfg=dict(num_gpus=2),
+        stop_words=['</s>', '<|im_end|>'],
     )
 ]
diff --git a/configs/models/hf_internlm/hf_internlm2_chat_7b.py b/configs/models/hf_internlm/hf_internlm2_chat_7b.py
index 3e0b349da..0fd754555 100644
--- a/configs/models/hf_internlm/hf_internlm2_chat_7b.py
+++ b/configs/models/hf_internlm/hf_internlm2_chat_7b.py
@@ -1,36 +1,13 @@
-from opencompass.models import HuggingFaceCausalLM
-
-
-_meta_template = dict(
-    round=[
-        dict(role='HUMAN', begin='<|im_start|>user\n', end='<|im_end|>\n'),
-        dict(role='BOT', begin='<|im_start|>assistant\n', end='<|im_end|>\n', generate=True),
-    ],
-)
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFacewithChatTemplate,
         abbr='internlm2-chat-7b-hf',
-        path="internlm/internlm2-chat-7b",
-        tokenizer_path='internlm/internlm2-chat-7b',
-        model_kwargs=dict(
-            trust_remote_code=True,
-            device_map='auto',
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            use_fast=False,
-            trust_remote_code=True,
-        ),
-        max_out_len=100,
-        max_seq_len=2048,
+        path='internlm/internlm2-chat-7b',
+        max_out_len=1024,
         batch_size=8,
-        meta_template=_meta_template,
-        run_cfg=dict(num_gpus=1, num_procs=1),
-        end_str='<|im_end|>',
-        generation_kwargs = {"eos_token_id": [2, 92542]},
-        batch_padding=True,
+        run_cfg=dict(num_gpus=1),
+        stop_words=['</s>', '<|im_end|>'],
     )
 ]
diff --git a/configs/models/hf_internlm/hf_internlm2_chat_7b_sft.py b/configs/models/hf_internlm/hf_internlm2_chat_7b_sft.py
index 07164a675..242ba3605 100644
--- a/configs/models/hf_internlm/hf_internlm2_chat_7b_sft.py
+++ b/configs/models/hf_internlm/hf_internlm2_chat_7b_sft.py
@@ -1,36 +1,13 @@
-from opencompass.models import HuggingFaceCausalLM
-
-
-_meta_template = dict(
-    round=[
-        dict(role='HUMAN', begin='<|im_start|>user\n', end='<|im_end|>\n'),
-        dict(role='BOT', begin='<|im_start|>assistant\n', end='<|im_end|>\n', generate=True),
-    ],
-)
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFacewithChatTemplate,
         abbr='internlm2-chat-7b-sft-hf',
-        path="internlm/internlm2-chat-7b-sft",
-        tokenizer_path='internlm/internlm2-chat-7b-sft',
-        model_kwargs=dict(
-            trust_remote_code=True,
-            device_map='auto',
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            use_fast=False,
-            trust_remote_code=True,
-        ),
-        max_out_len=100,
-        max_seq_len=2048,
+        path='internlm/internlm2-chat-7b-sft',
+        max_out_len=1024,
         batch_size=8,
-        meta_template=_meta_template,
-        run_cfg=dict(num_gpus=1, num_procs=1),
-        end_str='<|im_end|>',
-        generation_kwargs = {"eos_token_id": [2, 92542]},
-        batch_padding=True,
+        run_cfg=dict(num_gpus=1),
+        stop_words=['</s>', '<|im_end|>'],
     )
 ]
diff --git a/configs/models/hf_internlm/hf_internlm2_chat_math_20b.py b/configs/models/hf_internlm/hf_internlm2_chat_math_20b.py
index 3e77198d0..77a9bd05e 100644
--- a/configs/models/hf_internlm/hf_internlm2_chat_math_20b.py
+++ b/configs/models/hf_internlm/hf_internlm2_chat_math_20b.py
@@ -1,35 +1,13 @@
-from opencompass.models import HuggingFaceCausalLM
-
-
-_meta_template = dict(
-    round=[
-        dict(role='HUMAN', begin='[UNUSED_TOKEN_146]user\n', end='[UNUSED_TOKEN_145]\n'),
-        dict(role='BOT', begin='[UNUSED_TOKEN_146]assistant\n', end='[UNUSED_TOKEN_145]\n', generate=True),
-    ],
-    eos_token_id=92542
-)
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFacewithChatTemplate,
         abbr='internlm2-chat-math-20b-hf',
-        path="internlm/internlm2-math-20b",
-        tokenizer_path='internlm/internlm2-math-20b',
-        model_kwargs=dict(
-            trust_remote_code=True,
-            device_map='auto',
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            use_fast=False,
-            trust_remote_code=True,
-        ),
-        max_out_len=100,
-        max_seq_len=2048,
+        path='internlm/internlm2-math-20b',
+        max_out_len=1024,
         batch_size=8,
-        meta_template=_meta_template,
-        run_cfg=dict(num_gpus=2, num_procs=1),
-        end_str='[UNUSED_TOKEN_145]',
+        run_cfg=dict(num_gpus=2),
+        stop_words=['</s>', '<|im_end|>'],
     )
 ]
diff --git a/configs/models/hf_internlm/hf_internlm2_chat_math_20b_with_system.py b/configs/models/hf_internlm/hf_internlm2_chat_math_20b_with_system.py
index 80e12a18a..bd01fffa8 100644
--- a/configs/models/hf_internlm/hf_internlm2_chat_math_20b_with_system.py
+++ b/configs/models/hf_internlm/hf_internlm2_chat_math_20b_with_system.py
@@ -7,7 +7,6 @@
         dict(role='SYSTEM', begin='[UNUSED_TOKEN_146]system\n', end='[UNUSED_TOKEN_145]\n'),
         dict(role='BOT', begin='[UNUSED_TOKEN_146]assistant\n', end='[UNUSED_TOKEN_145]\n', generate=True),
     ],
-    eos_token_id=92542
 )
 
 models = [
diff --git a/configs/models/hf_internlm/hf_internlm2_chat_math_7b.py b/configs/models/hf_internlm/hf_internlm2_chat_math_7b.py
index afc9a0981..181e8e1c1 100644
--- a/configs/models/hf_internlm/hf_internlm2_chat_math_7b.py
+++ b/configs/models/hf_internlm/hf_internlm2_chat_math_7b.py
@@ -1,35 +1,13 @@
-from opencompass.models import HuggingFaceCausalLM
-
-
-_meta_template = dict(
-    round=[
-        dict(role='HUMAN', begin='[UNUSED_TOKEN_146]user\n', end='[UNUSED_TOKEN_145]\n'),
-        dict(role='BOT', begin='[UNUSED_TOKEN_146]assistant\n', end='[UNUSED_TOKEN_145]\n', generate=True),
-    ],
-    eos_token_id=92542
-)
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFacewithChatTemplate,
         abbr='internlm2-chat-math-7b-hf',
-        path="internlm/internlm2-math-7b",
-        tokenizer_path='internlm/internlm2-math-7b',
-        model_kwargs=dict(
-            trust_remote_code=True,
-            device_map='auto',
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            use_fast=False,
-            trust_remote_code=True,
-        ),
-        max_out_len=100,
-        max_seq_len=2048,
+        path='internlm/internlm2-math-7b',
+        max_out_len=1024,
         batch_size=8,
-        meta_template=_meta_template,
-        run_cfg=dict(num_gpus=1, num_procs=1),
-        end_str='[UNUSED_TOKEN_145]',
+        run_cfg=dict(num_gpus=1),
+        stop_words=['</s>', '<|im_end|>'],
     )
 ]
diff --git a/configs/models/hf_internlm/hf_internlm2_chat_math_7b_with_system.py b/configs/models/hf_internlm/hf_internlm2_chat_math_7b_with_system.py
index aa9b5d09a..545ab627f 100644
--- a/configs/models/hf_internlm/hf_internlm2_chat_math_7b_with_system.py
+++ b/configs/models/hf_internlm/hf_internlm2_chat_math_7b_with_system.py
@@ -7,7 +7,6 @@
         dict(role='SYSTEM', begin='[UNUSED_TOKEN_146]system\n', end='[UNUSED_TOKEN_145]\n'),
         dict(role='BOT', begin='[UNUSED_TOKEN_146]assistant\n', end='[UNUSED_TOKEN_145]\n', generate=True),
     ],
-    eos_token_id=92542
 )
 
 models = [
diff --git a/configs/models/hf_internlm/hf_internlm2_math_20b.py b/configs/models/hf_internlm/hf_internlm2_math_20b.py
new file mode 100644
index 000000000..c0d68572a
--- /dev/null
+++ b/configs/models/hf_internlm/hf_internlm2_math_20b.py
@@ -0,0 +1,13 @@
+from opencompass.models import HuggingFaceBaseModel
+
+
+models = [
+    dict(
+        type=HuggingFaceBaseModel,
+        abbr='internlm2-math-20b-hf',
+        path="internlm/internlm2-math-20b",
+        max_out_len=1024,
+        batch_size=8,
+        run_cfg=dict(num_gpus=2),
+    )
+]
diff --git a/configs/models/hf_internlm/hf_internlm2_math_7b.py b/configs/models/hf_internlm/hf_internlm2_math_7b.py
new file mode 100644
index 000000000..bf103d640
--- /dev/null
+++ b/configs/models/hf_internlm/hf_internlm2_math_7b.py
@@ -0,0 +1,13 @@
+from opencompass.models import HuggingFaceBaseModel
+
+
+models = [
+    dict(
+        type=HuggingFaceBaseModel,
+        abbr='internlm2-math-7b-hf',
+        path="internlm/internlm2-math-7b",
+        max_out_len=1024,
+        batch_size=8,
+        run_cfg=dict(num_gpus=1),
+    )
+]
diff --git a/configs/models/hf_internlm/hf_internlm_20b.py b/configs/models/hf_internlm/hf_internlm_20b.py
index 9af675338..e112f85b4 100644
--- a/configs/models/hf_internlm/hf_internlm_20b.py
+++ b/configs/models/hf_internlm/hf_internlm_20b.py
@@ -1,22 +1,13 @@
-from opencompass.models import HuggingFaceCausalLM
+from opencompass.models import HuggingFaceBaseModel
 
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFaceBaseModel,
         abbr='internlm-20b-hf',
         path="internlm/internlm-20b",
-        tokenizer_path='internlm/internlm-20b',
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            use_fast=False,
-            trust_remote_code=True,
-        ),
-        max_out_len=100,
-        max_seq_len=2048,
+        max_out_len=1024,
         batch_size=8,
-        model_kwargs=dict(trust_remote_code=True, device_map='auto'),
-        run_cfg=dict(num_gpus=2, num_procs=1),
+        run_cfg=dict(num_gpus=2),
     )
 ]
diff --git a/configs/models/hf_internlm/hf_internlm_7b.py b/configs/models/hf_internlm/hf_internlm_7b.py
index 649e0c756..15a2294dc 100644
--- a/configs/models/hf_internlm/hf_internlm_7b.py
+++ b/configs/models/hf_internlm/hf_internlm_7b.py
@@ -1,25 +1,13 @@
-from opencompass.models import HuggingFaceCausalLM
+from opencompass.models import HuggingFaceBaseModel
 
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFaceBaseModel,
         abbr='internlm-7b-hf',
         path="internlm/internlm-7b",
-        tokenizer_path='internlm/internlm-7b',
-        model_kwargs=dict(
-            trust_remote_code=True,
-            device_map='auto',
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            use_fast=False,
-            trust_remote_code=True,
-        ),
-        max_out_len=100,
-        max_seq_len=2048,
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=1, num_procs=1),
+        run_cfg=dict(num_gpus=1),
     )
 ]
diff --git a/configs/models/hf_internlm/hf_internlm_chat_7b_8k.py b/configs/models/hf_internlm/hf_internlm_chat_7b_8k.py
deleted file mode 100644
index 5e0152d56..000000000
--- a/configs/models/hf_internlm/hf_internlm_chat_7b_8k.py
+++ /dev/null
@@ -1,34 +0,0 @@
-from opencompass.models import HuggingFaceCausalLM
-
-
-_meta_template = dict(
-    round=[
-        dict(role='HUMAN', begin='<|User|>:', end='\n'),
-        dict(role='BOT', begin='<|Bot|>:', end='<eoa>\n', generate=True),
-    ],
-)
-
-models = [
-    dict(
-        type=HuggingFaceCausalLM,
-        abbr='internlm-chat-7b-8k-hf',
-        path="internlm/internlm-chat-7b-8k",
-        tokenizer_path='internlm/internlm-chat-7b-8k',
-        model_kwargs=dict(
-            trust_remote_code=True,
-            device_map='auto',
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            use_fast=False,
-            trust_remote_code=True,
-        ),
-        max_out_len=100,
-        max_seq_len=2048,
-        batch_size=8,
-        meta_template=_meta_template,
-        run_cfg=dict(num_gpus=1, num_procs=1),
-        end_str='<eoa>',
-    )
-]
diff --git a/configs/models/hf_internlm/hf_internlm_chat_7b_v1_1.py b/configs/models/hf_internlm/hf_internlm_chat_7b_v1_1.py
deleted file mode 100644
index 7471e68c3..000000000
--- a/configs/models/hf_internlm/hf_internlm_chat_7b_v1_1.py
+++ /dev/null
@@ -1,34 +0,0 @@
-from opencompass.models import HuggingFaceCausalLM
-
-
-_meta_template = dict(
-    round=[
-        dict(role='HUMAN', begin='<|User|>:', end='\n'),
-        dict(role='BOT', begin='<|Bot|>:', end='<eoa>\n', generate=True),
-    ],
-)
-
-models = [
-    dict(
-        type=HuggingFaceCausalLM,
-        abbr='internlm-chat-7b-v1.1-hf',
-        path="internlm/internlm-chat-7b-v1_1",
-        tokenizer_path='internlm/internlm-chat-7b-v1_1',
-        model_kwargs=dict(
-            trust_remote_code=True,
-            device_map='auto',
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            use_fast=False,
-            trust_remote_code=True,
-        ),
-        max_out_len=100,
-        max_seq_len=2048,
-        batch_size=8,
-        meta_template=_meta_template,
-        run_cfg=dict(num_gpus=1, num_procs=1),
-        end_str='<eoa>',
-    )
-]
diff --git a/configs/models/hf_internlm/lmdeploy_internlm2_20b.py b/configs/models/hf_internlm/lmdeploy_internlm2_20b.py
new file mode 100644
index 000000000..730cb7645
--- /dev/null
+++ b/configs/models/hf_internlm/lmdeploy_internlm2_20b.py
@@ -0,0 +1,27 @@
+from opencompass.models.turbomind import TurboMindModel
+
+
+models = [
+    dict(
+        type=TurboMindModel,
+        abbr="internlm2-20b-turbomind",
+        path="internlm/internlm2-20b",
+        engine_config=dict(
+            session_len=32768,
+            max_batch_size=32,
+            model_name="internlm2-20b",
+            tp=2,
+        ),
+        gen_config=dict(
+            top_k=1,
+            top_p=0.8,
+            temperature=1.0,
+            max_new_tokens=2000,
+        ),
+        max_out_len=2000,
+        max_seq_len=32768,
+        batch_size=32,
+        concurrency=8,
+        run_cfg=dict(num_gpus=2, num_procs=1),
+    )
+]
diff --git a/configs/models/hf_internlm/lmdeploy_internlm2_chat_20b.py b/configs/models/hf_internlm/lmdeploy_internlm2_chat_20b.py
index fcad86d97..0e84ff8bc 100644
--- a/configs/models/hf_internlm/lmdeploy_internlm2_chat_20b.py
+++ b/configs/models/hf_internlm/lmdeploy_internlm2_chat_20b.py
@@ -15,9 +15,8 @@
         path="internlm/internlm2-chat-20b",
         meta_template=_meta_template,
         engine_config=dict(
-            session_len=210000,
-            max_batch_size=8,
-            rope_scaling_factor=3.0,
+            session_len=32768,
+            max_batch_size=32,
             model_name="internlm2-chat-20b",
             tp=2,
             stop_words=[2, 92542],
@@ -29,8 +28,8 @@
             max_new_tokens=2000,
         ),
         max_out_len=2000,
-        max_seq_len=210000,
-        batch_size=1,
+        max_seq_len=32768,
+        batch_size=32,
         concurrency=8,
         run_cfg=dict(num_gpus=2, num_procs=1),
     )
diff --git a/configs/models/hf_internlm/lmdeploy_internlm2_chat_7b.py b/configs/models/hf_internlm/lmdeploy_internlm2_chat_7b.py
index 424fc1c97..cb192e092 100644
--- a/configs/models/hf_internlm/lmdeploy_internlm2_chat_7b.py
+++ b/configs/models/hf_internlm/lmdeploy_internlm2_chat_7b.py
@@ -15,9 +15,8 @@
         path="internlm/internlm2-chat-7b",
         meta_template=_meta_template,
         engine_config=dict(
-            session_len=210000,
-            max_batch_size=8,
-            rope_scaling_factor=2.0,
+            session_len=32768,
+            max_batch_size=32,
             model_name="internlm2-chat-7b",
             tp=1,
             stop_words=[2, 92542],
@@ -29,8 +28,8 @@
             max_new_tokens=2000,
         ),
         max_out_len=2000,
-        max_seq_len=210000,
-        batch_size=1,
+        max_seq_len=32768,
+        batch_size=32,
         concurrency=8,
         run_cfg=dict(num_gpus=1, num_procs=1),
     )
diff --git a/configs/models/hf_llama/hf_llama2_13b.py b/configs/models/hf_llama/hf_llama2_13b.py
index 4103c874e..4044f87e1 100644
--- a/configs/models/hf_llama/hf_llama2_13b.py
+++ b/configs/models/hf_llama/hf_llama2_13b.py
@@ -1,21 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
-
+from opencompass.models import HuggingFaceBaseModel
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFaceBaseModel,
         abbr='llama-2-13b-hf',
-        path="meta-llama/Llama-2-13b-hf",
-        tokenizer_path='meta-llama/Llama-2-13b-hf',
-        tokenizer_kwargs=dict(padding_side='left',
-                              truncation_side='left',
-                              use_fast=False,
-                              ),
-        max_out_len=100,
-        max_seq_len=2048,
+        path='meta-llama/Llama-2-13b-hf',
+        max_out_len=1024,
         batch_size=8,
-        model_kwargs=dict(device_map='auto'),
-        batch_padding=False, # if false, inference with for-loop without batch padding
-        run_cfg=dict(num_gpus=2, num_procs=1),
+        run_cfg=dict(num_gpus=1),
     )
 ]
diff --git a/configs/models/hf_llama/hf_llama2_13b_chat.py b/configs/models/hf_llama/hf_llama2_13b_chat.py
index ef85562e6..8460ad422 100644
--- a/configs/models/hf_llama/hf_llama2_13b_chat.py
+++ b/configs/models/hf_llama/hf_llama2_13b_chat.py
@@ -1,32 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
-
-_meta_template = dict(
-    round=[
-        dict(role="HUMAN", begin='[INST] ', end=' [/INST]'),
-        dict(role="BOT", begin=' ', end=' ', generate=True),
-    ],
-)
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFacewithChatTemplate,
         abbr='llama-2-13b-chat-hf',
-        path="meta-llama/Llama-2-13b-chat-hf",
-        tokenizer_path='meta-llama/Llama-2-13b-chat-hf',
-        model_kwargs=dict(
-            device_map='auto'
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            use_fast=False,
-        ),
-        meta_template=_meta_template,
-        max_out_len=100,
-        max_seq_len=2048,
+        path='meta-llama/Llama-2-13b-chat-hf',
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=2, num_procs=1),
-        end_str='[INST]',
-        batch_padding=True,
+        run_cfg=dict(num_gpus=1),
     )
 ]
diff --git a/configs/models/hf_llama/hf_llama2_70b.py b/configs/models/hf_llama/hf_llama2_70b.py
index 9bc12a2ad..97d28a4b4 100644
--- a/configs/models/hf_llama/hf_llama2_70b.py
+++ b/configs/models/hf_llama/hf_llama2_70b.py
@@ -1,21 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
-
+from opencompass.models import HuggingFaceBaseModel
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFaceBaseModel,
         abbr='llama-2-70b-hf',
-        path="meta-llama/Llama-2-70b-hf",
-        tokenizer_path='meta-llama/Llama-2-70b-hf',
-        tokenizer_kwargs=dict(padding_side='left',
-                              truncation_side='left',
-                              use_fast=False,
-                              ),
-        max_out_len=100,
-        max_seq_len=2048,
+        path='meta-llama/Llama-2-70b-hf',
+        max_out_len=1024,
         batch_size=8,
-        model_kwargs=dict(device_map='auto'),
-        batch_padding=False, # if false, inference with for-loop without batch padding
-        run_cfg=dict(num_gpus=4, num_procs=1),
+        run_cfg=dict(num_gpus=4),
     )
 ]
diff --git a/configs/models/hf_llama/hf_llama2_70b_chat.py b/configs/models/hf_llama/hf_llama2_70b_chat.py
index ff25d27d1..6f6351fa3 100644
--- a/configs/models/hf_llama/hf_llama2_70b_chat.py
+++ b/configs/models/hf_llama/hf_llama2_70b_chat.py
@@ -1,32 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
-
-_meta_template = dict(
-    round=[
-        dict(role="HUMAN", begin='[INST] ', end=' [/INST]'),
-        dict(role="BOT", begin=' ', end=' ', generate=True),
-    ],
-)
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFacewithChatTemplate,
         abbr='llama-2-70b-chat-hf',
-        path="meta-llama/Llama-2-70b-chat-hf",
-        tokenizer_path='meta-llama/Llama-2-70b-chat-hf',
-        model_kwargs=dict(
-            device_map='auto'
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            use_fast=False,
-        ),
-        meta_template=_meta_template,
-        max_out_len=100,
-        max_seq_len=2048,
+        path='meta-llama/Llama-2-70b-chat-hf',
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=4, num_procs=1),
-        end_str='[INST]',
-        batch_padding=True,
+        run_cfg=dict(num_gpus=4),
     )
 ]
diff --git a/configs/models/hf_llama/hf_llama2_7b.py b/configs/models/hf_llama/hf_llama2_7b.py
index 3d00990e6..beb4d667d 100644
--- a/configs/models/hf_llama/hf_llama2_7b.py
+++ b/configs/models/hf_llama/hf_llama2_7b.py
@@ -1,21 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
-
+from opencompass.models import HuggingFaceBaseModel
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFaceBaseModel,
         abbr='llama-2-7b-hf',
-        path="meta-llama/Llama-2-7b-hf",
-        tokenizer_path='meta-llama/Llama-2-7b-hf',
-        tokenizer_kwargs=dict(padding_side='left',
-                              truncation_side='left',
-                              use_fast=False,
-                              ),
-        max_out_len=100,
-        max_seq_len=2048,
+        path='meta-llama/Llama-2-7b-hf',
+        max_out_len=1024,
         batch_size=8,
-        model_kwargs=dict(device_map='auto'),
-        batch_padding=False, # if false, inference with for-loop without batch padding
-        run_cfg=dict(num_gpus=1, num_procs=1),
+        run_cfg=dict(num_gpus=1),
     )
 ]
diff --git a/configs/models/hf_llama/hf_llama2_7b_chat.py b/configs/models/hf_llama/hf_llama2_7b_chat.py
index 4c8807297..e1f953310 100644
--- a/configs/models/hf_llama/hf_llama2_7b_chat.py
+++ b/configs/models/hf_llama/hf_llama2_7b_chat.py
@@ -1,32 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
-
-_meta_template = dict(
-    round=[
-        dict(role="HUMAN", begin='[INST] ', end=' [/INST]'),
-        dict(role="BOT", begin=' ', end=' ', generate=True),
-    ],
-)
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFacewithChatTemplate,
         abbr='llama-2-7b-chat-hf',
-        path="meta-llama/Llama-2-7b-chat-hf",
-        tokenizer_path='meta-llama/Llama-2-7b-chat-hf',
-        model_kwargs=dict(
-            device_map='auto'
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            use_fast=False,
-        ),
-        meta_template=_meta_template,
-        max_out_len=100,
-        max_seq_len=2048,
+        path='meta-llama/Llama-2-7b-chat-hf',
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=1, num_procs=1),
-        end_str='[INST]',
-        batch_padding=True,
+        run_cfg=dict(num_gpus=1),
     )
 ]
diff --git a/configs/models/hf_llama/hf_llama3_70b.py b/configs/models/hf_llama/hf_llama3_70b.py
index f35c18ade..b3cce9504 100644
--- a/configs/models/hf_llama/hf_llama3_70b.py
+++ b/configs/models/hf_llama/hf_llama3_70b.py
@@ -1,21 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
-
+from opencompass.models import HuggingFaceBaseModel
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
-        abbr="llama-3-70b-hf",
-        path="meta-llama/Meta-Llama-3-70B",
-        model_kwargs=dict(device_map="auto"),
-        tokenizer_kwargs=dict(
-            padding_side="left",
-            truncation_side="left",
-            use_fast=False,
-        ),
-        max_out_len=100,
-        max_seq_len=2048,
+        type=HuggingFaceBaseModel,
+        abbr='llama-3-70b-hf',
+        path='meta-llama/Meta-Llama-3-70B',
+        max_out_len=1024,
         batch_size=8,
-        batch_padding=True,
-        run_cfg=dict(num_gpus=4, num_procs=1),
+        run_cfg=dict(num_gpus=1),
     )
 ]
diff --git a/configs/models/hf_llama/hf_llama3_70b_instruct.py b/configs/models/hf_llama/hf_llama3_70b_instruct.py
index c19c66157..cb7e85545 100644
--- a/configs/models/hf_llama/hf_llama3_70b_instruct.py
+++ b/configs/models/hf_llama/hf_llama3_70b_instruct.py
@@ -1,29 +1,13 @@
-from opencompass.models import HuggingFaceCausalLM
-
-_meta_template = dict(
-    round=[
-        dict(role="HUMAN", begin="<|start_header_id|>user<|end_header_id|>\n\n", end="<|eot_id|>"),
-        dict(role="BOT", begin="<|start_header_id|>assistant<|end_header_id|>\n\n", end="<|eot_id|>", generate=True),
-    ],
-)
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
-        abbr="llama-3-70b-instruct-hf",
-        path="meta-llama/Meta-Llama-3-70B-Instruct",
-        model_kwargs=dict(device_map="auto"),
-        tokenizer_kwargs=dict(
-            padding_side="left",
-            truncation_side="left",
-            use_fast=False,
-        ),
-        meta_template=_meta_template,
-        max_out_len=100,
-        max_seq_len=2048,
+        type=HuggingFacewithChatTemplate,
+        abbr='llama-3-70b-instruct-hf',
+        path='meta-llama/Meta-Llama-3-70B-Instruct',
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=4, num_procs=1),
-        generation_kwargs={"eos_token_id": [128001, 128009]},
-        batch_padding=True,
+        run_cfg=dict(num_gpus=4),
+        stop_words=['<|end_of_text|>', '<|eot_id|>'],
     )
 ]
diff --git a/configs/models/hf_llama/hf_llama3_8b.py b/configs/models/hf_llama/hf_llama3_8b.py
index cbf2a9dab..3ae9f2c36 100644
--- a/configs/models/hf_llama/hf_llama3_8b.py
+++ b/configs/models/hf_llama/hf_llama3_8b.py
@@ -1,21 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
-
+from opencompass.models import HuggingFaceBaseModel
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
-        abbr="llama-3-8b-hf",
-        path="meta-llama/Meta-Llama-3-8B",
-        model_kwargs=dict(device_map="auto"),
-        tokenizer_kwargs=dict(
-            padding_side="left",
-            truncation_side="left",
-            use_fast=False,
-        ),
-        max_out_len=100,
-        max_seq_len=2048,
+        type=HuggingFaceBaseModel,
+        abbr='llama-3-8b-hf',
+        path='meta-llama/Meta-Llama-3-8B',
+        max_out_len=1024,
         batch_size=8,
-        batch_padding=True,
-        run_cfg=dict(num_gpus=1, num_procs=1),
+        run_cfg=dict(num_gpus=1),
     )
 ]
diff --git a/configs/models/hf_llama/hf_llama3_8b_instruct.py b/configs/models/hf_llama/hf_llama3_8b_instruct.py
index e0b439d95..1e2fd8f0a 100644
--- a/configs/models/hf_llama/hf_llama3_8b_instruct.py
+++ b/configs/models/hf_llama/hf_llama3_8b_instruct.py
@@ -1,29 +1,13 @@
-from opencompass.models import HuggingFaceCausalLM
-
-_meta_template = dict(
-    round=[
-        dict(role="HUMAN", begin="<|start_header_id|>user<|end_header_id|>\n\n", end="<|eot_id|>"),
-        dict(role="BOT", begin="<|start_header_id|>assistant<|end_header_id|>\n\n", end="<|eot_id|>", generate=True),
-    ],
-)
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
-        abbr="llama-3-8b-instruct-hf",
-        path="meta-llama/Meta-Llama-3-8B-Instruct",
-        model_kwargs=dict(device_map="auto"),
-        tokenizer_kwargs=dict(
-            padding_side="left",
-            truncation_side="left",
-            use_fast=False,
-        ),
-        meta_template=_meta_template,
-        max_out_len=100,
-        max_seq_len=2048,
+        type=HuggingFacewithChatTemplate,
+        abbr='llama-3-8b-instruct-hf',
+        path='meta-llama/Meta-Llama-3-8B-Instruct',
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=1, num_procs=1),
-        generation_kwargs={"eos_token_id": [128001, 128009]},
-        batch_padding=True,
+        run_cfg=dict(num_gpus=1),
+        stop_words=['<|end_of_text|>', '<|eot_id|>'],
     )
 ]
diff --git a/configs/models/hf_llama/hf_llama_13b.py b/configs/models/hf_llama/hf_llama_13b.py
index 40389b7c8..70d1b9cb8 100644
--- a/configs/models/hf_llama/hf_llama_13b.py
+++ b/configs/models/hf_llama/hf_llama_13b.py
@@ -1,21 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
+from opencompass.models import HuggingFaceBaseModel
 
 models = [
-    # LLaMA 13B
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFaceBaseModel,
         abbr='llama-13b-hf',
-        path="huggyllama/llama-13b",
-        tokenizer_path='huggyllama/llama-13b',
-        tokenizer_kwargs=dict(padding_side='left',
-                              truncation_side='left',
-                              use_fast=False,
-                              ),
-        max_out_len=100,
-        max_seq_len=2048,
+        path='huggyllama/llama-13b',
+        max_out_len=1024,
         batch_size=8,
-        model_kwargs=dict(device_map='auto'),
-        batch_padding=False, # if false, inference with for-loop without batch padding
-        run_cfg=dict(num_gpus=2, num_procs=1),
+        run_cfg=dict(num_gpus=1),
     )
 ]
diff --git a/configs/models/hf_llama/hf_llama_30b.py b/configs/models/hf_llama/hf_llama_30b.py
index 493923bbd..063a69275 100644
--- a/configs/models/hf_llama/hf_llama_30b.py
+++ b/configs/models/hf_llama/hf_llama_30b.py
@@ -1,21 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
+from opencompass.models import HuggingFaceBaseModel
 
 models = [
-    # LLaMA 30B
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFaceBaseModel,
         abbr='llama-30b-hf',
-        path="huggyllama/llama-30b",
-        tokenizer_path='huggyllama/llama-30b',
-        tokenizer_kwargs=dict(padding_side='left',
-                              truncation_side='left',
-                              use_fast=False,
-                              ),
-        max_out_len=100,
-        max_seq_len=2048,
+        path='huggyllama/llama-30b',
+        max_out_len=1024,
         batch_size=8,
-        model_kwargs=dict(device_map='auto'),
-        batch_padding=False, # if false, inference with for-loop without batch padding
-        run_cfg=dict(num_gpus=4, num_procs=1),
+        run_cfg=dict(num_gpus=2),
     )
 ]
diff --git a/configs/models/hf_llama/hf_llama_65b.py b/configs/models/hf_llama/hf_llama_65b.py
index 1b26f26f2..9db5fcc95 100644
--- a/configs/models/hf_llama/hf_llama_65b.py
+++ b/configs/models/hf_llama/hf_llama_65b.py
@@ -1,21 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
+from opencompass.models import HuggingFaceBaseModel
 
 models = [
-    # LLaMA 65B
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFaceBaseModel,
         abbr='llama-65b-hf',
-        path="huggyllama/llama-65b",
-        tokenizer_path='huggyllama/llama-65b',
-        tokenizer_kwargs=dict(padding_side='left',
-                              truncation_side='left',
-                              use_fast=False,
-                              ),
-        max_out_len=100,
-        max_seq_len=2048,
+        path='huggyllama/llama-65b',
+        max_out_len=1024,
         batch_size=8,
-        model_kwargs=dict(device_map='auto'),
-        batch_padding=False, # if false, inference with for-loop without batch padding
-        run_cfg=dict(num_gpus=4, num_procs=1),
+        run_cfg=dict(num_gpus=4),
     )
 ]
diff --git a/configs/models/hf_llama/hf_llama_7b.py b/configs/models/hf_llama/hf_llama_7b.py
index 4e09dd748..1100f1196 100644
--- a/configs/models/hf_llama/hf_llama_7b.py
+++ b/configs/models/hf_llama/hf_llama_7b.py
@@ -1,21 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
+from opencompass.models import HuggingFaceBaseModel
 
 models = [
-    # LLaMA 7B
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFaceBaseModel,
         abbr='llama-7b-hf',
-        path="huggyllama/llama-7b",
-        tokenizer_path='huggyllama/llama-7b',
-        tokenizer_kwargs=dict(padding_side='left',
-                              truncation_side='left',
-                              use_fast=False,
-                              ),
-        max_out_len=100,
-        max_seq_len=2048,
+        path='huggyllama/llama-7b',
+        max_out_len=1024,
         batch_size=8,
-        model_kwargs=dict(device_map='auto'),
-        batch_padding=False, # if false, inference with for-loop without batch padding
-        run_cfg=dict(num_gpus=1, num_procs=1),
+        run_cfg=dict(num_gpus=1),
     )
 ]
diff --git a/configs/models/hf_llama/lmdeploy_llama3_70b_instruct.py b/configs/models/hf_llama/lmdeploy_llama3_70b_instruct.py
new file mode 100644
index 000000000..4d93d6c83
--- /dev/null
+++ b/configs/models/hf_llama/lmdeploy_llama3_70b_instruct.py
@@ -0,0 +1,24 @@
+from opencompass.models import TurboMindModel
+
+_meta_template = dict(
+    round=[
+        dict(role="HUMAN", begin='<|begin_of_text|>user<|end_header_id|>\n\n', end='<|eot_id|>'),
+        dict(role="BOT", begin='<|begin_of_text|>assistant<|end_header_id|>\n\n', end='<|eot_id|>', generate=True),
+    ],
+)
+
+models = [
+    dict(
+        type=TurboMindModel,
+        abbr='llama-3-70b-instruct-lmdeploy',
+        path='meta-llama/Meta-Llama-3-70B-Instruct',
+        engine_config=dict(session_len=4096, max_batch_size=16, tp=4),
+        gen_config=dict(top_k=1, temperature=1, top_p=0.9, max_new_tokens=1024, stop_words=[128001, 128009]),
+        max_out_len=1024,
+        max_seq_len=4096,
+        batch_size=16,
+        concurrency=16,
+        meta_template=_meta_template,
+        run_cfg=dict(num_gpus=4),
+    )
+]
diff --git a/configs/models/hf_llama/lmdeploy_llama3_8b_instruct.py b/configs/models/hf_llama/lmdeploy_llama3_8b_instruct.py
new file mode 100644
index 000000000..b393072bd
--- /dev/null
+++ b/configs/models/hf_llama/lmdeploy_llama3_8b_instruct.py
@@ -0,0 +1,24 @@
+from opencompass.models import TurboMindModel
+
+_meta_template = dict(
+    round=[
+        dict(role="HUMAN", begin='<|begin_of_text|>user<|end_header_id|>\n\n', end='<|eot_id|>'),
+        dict(role="BOT", begin='<|begin_of_text|>assistant<|end_header_id|>\n\n', end='<|eot_id|>', generate=True),
+    ],
+)
+
+models = [
+    dict(
+        type=TurboMindModel,
+        abbr='llama-3-8b-instruct-lmdeploy',
+        path='meta-llama/Meta-Llama-3-8B-Instruct',
+        engine_config=dict(session_len=4096, max_batch_size=16, tp=1),
+        gen_config=dict(top_k=1, temperature=1, top_p=0.9, max_new_tokens=1024, stop_words=[128001, 128009]),
+        max_out_len=1024,
+        max_seq_len=4096,
+        batch_size=16,
+        concurrency=16,
+        meta_template=_meta_template,
+        run_cfg=dict(num_gpus=1),
+    )
+]
diff --git a/configs/models/mistral/hf_mistral_7b_instruct_v0_1.py b/configs/models/mistral/hf_mistral_7b_instruct_v0_1.py
index b8149a51d..cb0f1a65a 100644
--- a/configs/models/mistral/hf_mistral_7b_instruct_v0_1.py
+++ b/configs/models/mistral/hf_mistral_7b_instruct_v0_1.py
@@ -1,34 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
-
-
-_meta_template = dict(
-    begin="<s>",
-    round=[
-        dict(role="HUMAN", begin='[INST] ', end=' [/INST]'),
-        dict(role="BOT", begin="", end='</s> ', generate=True),
-    ],
-)
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
     dict(
+        type=HuggingFacewithChatTemplate,
         abbr='mistral-7b-instruct-v0.1-hf',
-        type=HuggingFaceCausalLM,
         path='mistralai/Mistral-7B-Instruct-v0.1',
-        tokenizer_path='mistralai/Mistral-7B-Instruct-v0.1',
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True,
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-        ),
-        meta_template=_meta_template,
-        max_out_len=100,
-        max_seq_len=2048,
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=1, num_procs=1),
-        batch_padding=True,
+        run_cfg=dict(num_gpus=1),
     )
 ]
diff --git a/configs/models/mistral/hf_mistral_7b_instruct_v0_2.py b/configs/models/mistral/hf_mistral_7b_instruct_v0_2.py
index e109ca58e..188698c77 100644
--- a/configs/models/mistral/hf_mistral_7b_instruct_v0_2.py
+++ b/configs/models/mistral/hf_mistral_7b_instruct_v0_2.py
@@ -1,34 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
-
-
-_meta_template = dict(
-    begin="<s>",
-    round=[
-        dict(role="HUMAN", begin='[INST] ', end=' [/INST]'),
-        dict(role="BOT", begin="", end='</s> ', generate=True),
-    ],
-)
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
     dict(
+        type=HuggingFacewithChatTemplate,
         abbr='mistral-7b-instruct-v0.2-hf',
-        type=HuggingFaceCausalLM,
         path='mistralai/Mistral-7B-Instruct-v0.2',
-        tokenizer_path='mistralai/Mistral-7B-Instruct-v0.2',
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True,
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-        ),
-        meta_template=_meta_template,
-        max_out_len=100,
-        max_seq_len=2048,
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=1, num_procs=1),
-        batch_padding=True,
+        run_cfg=dict(num_gpus=1),
     )
 ]
diff --git a/configs/models/mistral/hf_mistral_7b_v0_1.py b/configs/models/mistral/hf_mistral_7b_v0_1.py
index bae2ce32a..3446cf37c 100644
--- a/configs/models/mistral/hf_mistral_7b_v0_1.py
+++ b/configs/models/mistral/hf_mistral_7b_v0_1.py
@@ -1,24 +1,13 @@
-from opencompass.models import HuggingFaceCausalLM
+from opencompass.models import HuggingFaceBaseModel
 
 
 models = [
     dict(
+        type=HuggingFaceBaseModel,
         abbr='mistral-7b-v0.1-hf',
-        type=HuggingFaceCausalLM,
         path='mistralai/Mistral-7B-v0.1',
-        tokenizer_path='mistralai/Mistral-7B-v0.1',
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True,
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-        ),
-        max_out_len=100,
-        max_seq_len=2048,
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=1, num_procs=1),
+        run_cfg=dict(num_gpus=1),
     )
 ]
diff --git a/configs/models/mistral/hf_mistral_7b_v0_2.py b/configs/models/mistral/hf_mistral_7b_v0_2.py
index 02a0a0886..df696e428 100644
--- a/configs/models/mistral/hf_mistral_7b_v0_2.py
+++ b/configs/models/mistral/hf_mistral_7b_v0_2.py
@@ -1,23 +1,13 @@
-from opencompass.models import HuggingFaceCausalLM
+from opencompass.models import HuggingFaceBaseModel
 
 
 models = [
     dict(
+        type=HuggingFaceBaseModel,
         abbr='mistral-7b-v0.2-hf',
-        type=HuggingFaceCausalLM,
-        path='alpindale/Mistral-7B-v0.2-hf',
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True,
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-        ),
-        max_out_len=100,
-        max_seq_len=2048,
+        path='mistral-community/Mistral-7B-v0.2',
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=1, num_procs=1),
+        run_cfg=dict(num_gpus=1),
     )
 ]
diff --git a/configs/models/mistral/hf_mixtral_8x22b_instruct_v0_1.py b/configs/models/mistral/hf_mixtral_8x22b_instruct_v0_1.py
new file mode 100644
index 000000000..588c18a70
--- /dev/null
+++ b/configs/models/mistral/hf_mixtral_8x22b_instruct_v0_1.py
@@ -0,0 +1,12 @@
+from opencompass.models import HuggingFacewithChatTemplate
+
+models = [
+    dict(
+        type=HuggingFacewithChatTemplate,
+        abbr='mixtral-8x22b-instruct-v0.1-hf',
+        path='mistralai/Mixtral-8x22B-Instruct-v0.1',
+        max_out_len=1024,
+        batch_size=4,
+        run_cfg=dict(num_gpus=8),
+    )
+]
diff --git a/configs/models/mistral/hf_mixtral_8x22b_v0_1.py b/configs/models/mistral/hf_mixtral_8x22b_v0_1.py
new file mode 100644
index 000000000..d84021ab0
--- /dev/null
+++ b/configs/models/mistral/hf_mixtral_8x22b_v0_1.py
@@ -0,0 +1,12 @@
+from opencompass.models import HuggingFaceBaseModel
+
+models = [
+    dict(
+        type=HuggingFaceBaseModel,
+        abbr='mixtral-8x22b-v0.1-hf',
+        path='mistralai/Mixtral-8x22B-v0.1',
+        max_out_len=1024,
+        batch_size=4,
+        run_cfg=dict(num_gpus=8),
+    )
+]
diff --git a/configs/models/mistral/hf_mixtral_8x7b_instruct_v0_1.py b/configs/models/mistral/hf_mixtral_8x7b_instruct_v0_1.py
new file mode 100644
index 000000000..c910d2100
--- /dev/null
+++ b/configs/models/mistral/hf_mixtral_8x7b_instruct_v0_1.py
@@ -0,0 +1,12 @@
+from opencompass.models import HuggingFacewithChatTemplate
+
+models = [
+    dict(
+        type=HuggingFacewithChatTemplate,
+        abbr='mixtral-8x7b-instruct-v0.1-hf',
+        path='mistralai/Mixtral-8x7B-Instruct-v0.1',
+        max_out_len=1024,
+        batch_size=8,
+        run_cfg=dict(num_gpus=4),
+    )
+]
diff --git a/configs/models/mistral/hf_mixtral_8x7b_v0_1.py b/configs/models/mistral/hf_mixtral_8x7b_v0_1.py
new file mode 100644
index 000000000..252042264
--- /dev/null
+++ b/configs/models/mistral/hf_mixtral_8x7b_v0_1.py
@@ -0,0 +1,12 @@
+from opencompass.models import HuggingFaceBaseModel
+
+models = [
+    dict(
+        type=HuggingFaceBaseModel,
+        abbr='mixtral-8x7b-v0.1-hf',
+        path='mistralai/Mixtral-8x7B-v0.1',
+        max_out_len=1024,
+        batch_size=8,
+        run_cfg=dict(num_gpus=4),
+    )
+]
diff --git a/configs/models/mixtral/mixtral_8x7b_32k.py b/configs/models/mistral/mixtral_8x7b_32k.py
similarity index 100%
rename from configs/models/mixtral/mixtral_8x7b_32k.py
rename to configs/models/mistral/mixtral_8x7b_32k.py
diff --git a/configs/models/mistral/vllm_mistral_7b_instruct_v0_1.py b/configs/models/mistral/vllm_mistral_7b_instruct_v0_1.py
index 9c9ab08f1..4454c32f5 100644
--- a/configs/models/mistral/vllm_mistral_7b_instruct_v0_1.py
+++ b/configs/models/mistral/vllm_mistral_7b_instruct_v0_1.py
@@ -7,7 +7,6 @@
         dict(role="HUMAN", begin='[INST]', end='[/INST]'),
         dict(role="BOT", begin="", end='</s>', generate=True),
     ],
-    eos_token_id=2
 )
 
 models = [
diff --git a/configs/models/mistral/vllm_mistral_7b_instruct_v0_2.py b/configs/models/mistral/vllm_mistral_7b_instruct_v0_2.py
index b6c565c25..010f9bf12 100644
--- a/configs/models/mistral/vllm_mistral_7b_instruct_v0_2.py
+++ b/configs/models/mistral/vllm_mistral_7b_instruct_v0_2.py
@@ -7,7 +7,6 @@
         dict(role="HUMAN", begin='[INST]', end='[/INST]'),
         dict(role="BOT", begin="", end='</s>', generate=True),
     ],
-    eos_token_id=2
 )
 
 models = [
diff --git a/configs/models/mistral/vllm_mistral_7b_v0_1.py b/configs/models/mistral/vllm_mistral_7b_v0_1.py
new file mode 100644
index 000000000..32486ce2d
--- /dev/null
+++ b/configs/models/mistral/vllm_mistral_7b_v0_1.py
@@ -0,0 +1,17 @@
+from opencompass.models import VLLM
+
+
+models = [
+    dict(
+        type=VLLM,
+        abbr='mistral-7b-v0.1-vllm',
+        path='mistralai/Mistral-7B-v0.1',
+        max_out_len=100,
+        max_seq_len=2048,
+        batch_size=32,
+        model_kwargs=dict(dtype='bfloat16'),
+        generation_kwargs=dict(temperature=0, top_p=1, max_tokens=2048, stop_token_ids=[2]),
+        run_cfg=dict(num_gpus=1, num_procs=1),
+        stop_words=['[INST]'],
+    )
+]
diff --git a/configs/models/mistral/vllm_mistral_7b_v0_2.py b/configs/models/mistral/vllm_mistral_7b_v0_2.py
new file mode 100644
index 000000000..22931d9ad
--- /dev/null
+++ b/configs/models/mistral/vllm_mistral_7b_v0_2.py
@@ -0,0 +1,17 @@
+from opencompass.models import VLLM
+
+
+models = [
+    dict(
+        type=VLLM,
+        abbr='mistral-7b-v0.2-vllm',
+        path='mistral-community/Mistral-7B-v0.2',
+        max_out_len=100,
+        max_seq_len=2048,
+        batch_size=32,
+        model_kwargs=dict(dtype='bfloat16'),
+        generation_kwargs=dict(temperature=0, top_p=1, max_tokens=2048, stop_token_ids=[2]),
+        run_cfg=dict(num_gpus=1, num_procs=1),
+        stop_words=['[INST]'],
+    )
+]
diff --git a/configs/models/mixtral/vllm_mixtral_8x7b_instruct_v0_1.py b/configs/models/mistral/vllm_mixtral_8x7b_instruct_v0_1.py
similarity index 97%
rename from configs/models/mixtral/vllm_mixtral_8x7b_instruct_v0_1.py
rename to configs/models/mistral/vllm_mixtral_8x7b_instruct_v0_1.py
index 6f26822f4..894be13ca 100644
--- a/configs/models/mixtral/vllm_mixtral_8x7b_instruct_v0_1.py
+++ b/configs/models/mistral/vllm_mixtral_8x7b_instruct_v0_1.py
@@ -7,7 +7,6 @@
         dict(role="HUMAN", begin='[INST]', end='[/INST]'),
         dict(role="BOT", begin="", end='</s>', generate=True),
     ],
-    eos_token_id=2
 )
 
 models = [
diff --git a/configs/models/mixtral/hf_mixtral_8x22b_instruct_v0_1.py b/configs/models/mixtral/hf_mixtral_8x22b_instruct_v0_1.py
deleted file mode 100644
index 89283ef49..000000000
--- a/configs/models/mixtral/hf_mixtral_8x22b_instruct_v0_1.py
+++ /dev/null
@@ -1,34 +0,0 @@
-from opencompass.models import HuggingFaceCausalLM
-
-
-_meta_template = dict(
-    begin="<s>",
-    round=[
-        dict(role="HUMAN", begin='[INST] ', end=' [/INST]'),
-        dict(role="BOT", begin="", end='</s> ', generate=True),
-    ],
-)
-
-models = [
-    dict(
-        abbr='mixtral-8x22b-instruct-v0.1',
-        type=HuggingFaceCausalLM,
-        path='mistralai/Mixtral-8x22B-Instruct-v0.1',
-        tokenizer_path='mistralai/Mixtral-8x22B-Instruct-v0.1',
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True,
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-        ),
-        meta_template=_meta_template,
-        max_out_len=100,
-        max_seq_len=2048,
-        batch_size=8,
-        run_cfg=dict(num_gpus=2, num_procs=1),
-        batch_padding=True,
-    )
-]
diff --git a/configs/models/mixtral/hf_mixtral_8x22b_v0_1.py b/configs/models/mixtral/hf_mixtral_8x22b_v0_1.py
deleted file mode 100644
index aa60b4089..000000000
--- a/configs/models/mixtral/hf_mixtral_8x22b_v0_1.py
+++ /dev/null
@@ -1,24 +0,0 @@
-from opencompass.models import HuggingFaceCausalLM
-
-
-models = [
-    dict(
-        abbr='mixtral-8x22b-v0.1',
-        type=HuggingFaceCausalLM,
-        path='mistralai/Mixtral-8x22B-v0.1',
-        tokenizer_path='mistralai/Mixtral-8x22B-v0.1',
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True,
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-        ),
-        max_out_len=100,
-        max_seq_len=2048,
-        batch_size=8,
-        run_cfg=dict(num_gpus=2, num_procs=1),
-    )
-]
diff --git a/configs/models/mixtral/hf_mixtral_8x7b_instruct_v0_1.py b/configs/models/mixtral/hf_mixtral_8x7b_instruct_v0_1.py
deleted file mode 100644
index 0c31f3c80..000000000
--- a/configs/models/mixtral/hf_mixtral_8x7b_instruct_v0_1.py
+++ /dev/null
@@ -1,34 +0,0 @@
-from opencompass.models import HuggingFaceCausalLM
-
-
-_meta_template = dict(
-    begin="<s>",
-    round=[
-        dict(role="HUMAN", begin='[INST] ', end=' [/INST]'),
-        dict(role="BOT", begin="", end='</s> ', generate=True),
-    ],
-)
-
-models = [
-    dict(
-        abbr='mixtral-8x7b-instruct-v0.1',
-        type=HuggingFaceCausalLM,
-        path='mistralai/Mixtral-8x7B-Instruct-v0.1',
-        tokenizer_path='mistralai/Mixtral-8x7B-Instruct-v0.1',
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True,
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-        ),
-        meta_template=_meta_template,
-        max_out_len=100,
-        max_seq_len=2048,
-        batch_size=8,
-        run_cfg=dict(num_gpus=2, num_procs=1),
-        batch_padding=True,
-    )
-]
diff --git a/configs/models/mixtral/hf_mixtral_8x7b_v0_1.py b/configs/models/mixtral/hf_mixtral_8x7b_v0_1.py
deleted file mode 100644
index 71d6489e5..000000000
--- a/configs/models/mixtral/hf_mixtral_8x7b_v0_1.py
+++ /dev/null
@@ -1,24 +0,0 @@
-from opencompass.models import HuggingFaceCausalLM
-
-
-models = [
-    dict(
-        abbr='mixtral-8x7b-v0.1',
-        type=HuggingFaceCausalLM,
-        path='mistralai/Mixtral-8x7B-v0.1',
-        tokenizer_path='mistralai/Mixtral-8x7B-v0.1',
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True,
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-        ),
-        max_out_len=100,
-        max_seq_len=2048,
-        batch_size=8,
-        run_cfg=dict(num_gpus=2, num_procs=1),
-    )
-]
diff --git a/configs/models/nanbeige/hf_nanbeige2_8b_chat.py b/configs/models/nanbeige/hf_nanbeige2_8b_chat.py
index a399a5d49..cb9dd4649 100644
--- a/configs/models/nanbeige/hf_nanbeige2_8b_chat.py
+++ b/configs/models/nanbeige/hf_nanbeige2_8b_chat.py
@@ -1,36 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
-
-_meta_template = dict(
-    begin="<|im_start|>system\n你是一个名为\"南北阁\"的人工智能助手，正在与人类用户进行交谈。你的目标是以最有帮助和最逻辑的方式回答问题，同时确保内容的安全性。你的回答中不应包含任何有害、政治化、宗教化、不道德、种族主义、非法的内容。请确保你的回答不带有社会偏见，符合社会主义价值观。如果遇到的问题无意义或事实上不连贯，请不要回答错误的内容，而是解释问题为何无效或不连贯。如果你不知道问题的答案，也请勿提供错误的信息。<|im_end|>\n",
-    round=[
-        dict(role='HUMAN', begin='<|im_start|>user\n', end='<|im_end|>\n'),
-        dict(role='BOT', begin='<|im_start|>assistant\n', end='<|im_end|>\n', generate=True),
-    ],
-)
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFacewithChatTemplate,
         abbr='nanbeige2-8b-chat-hf',
-        path="Nanbeige/Nanbeige2-8B-Chat",
-        tokenizer_path='Nanbeige/Nanbeige2-8B-Chat',
-        model_kwargs=dict(
-            device_map='auto',
-            torch_dtype='auto',
-            trust_remote_code=True,
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='right',
-            truncation_side='left',
-            trust_remote_code=True,
-            use_fast=False,
-        ),
-        meta_template=_meta_template,
-        batch_padding=False,
-        max_out_len=100,
-        max_seq_len=4096,
+        path='Nanbeige/Nanbeige2-8B-Chat',
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=1, num_procs=1),
-        end_str='<|im_end|>',
+        run_cfg=dict(num_gpus=1),
     )
 ]
diff --git a/configs/models/nanbeige/hf_nanbeige_16b_base.py b/configs/models/nanbeige/hf_nanbeige_16b_base.py
deleted file mode 100644
index 322f18a4e..000000000
--- a/configs/models/nanbeige/hf_nanbeige_16b_base.py
+++ /dev/null
@@ -1,33 +0,0 @@
-from opencompass.models import HuggingFaceCausalLM
-
-_meta_template = dict(
-    round=[
-        dict(role='HUMAN', begin='', end=''),
-        dict(role='BOT', begin='', end='\n\n', generate=True),
-    ],
-)
-
-models = [
-    dict(
-        abbr='nanbeige-16b-base-hf',
-        type=HuggingFaceCausalLM,
-        path='Nanbeige/Nanbeige-16B-Base',
-        tokenizer_path='Nanbeige/Nanbeige-16B-Base',
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True,
-            torch_dtype='auto',
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='right',
-            truncation_side='left',
-            trust_remote_code=True
-        ),
-        meta_template=_meta_template,
-        batch_padding=False,
-        max_out_len=1024,
-        max_seq_len=4096,
-        batch_size=1,
-        run_cfg=dict(num_gpus=1, num_procs=1),
-    )
-]
diff --git a/configs/models/nanbeige/hf_nanbeige_16b_base_32k.py b/configs/models/nanbeige/hf_nanbeige_16b_base_32k.py
deleted file mode 100644
index d0c1c2eab..000000000
--- a/configs/models/nanbeige/hf_nanbeige_16b_base_32k.py
+++ /dev/null
@@ -1,34 +0,0 @@
-from opencompass.models import HuggingFaceCausalLM
-
-_meta_template = dict(
-    round=[
-        dict(role='HUMAN', begin='', end=''),
-        dict(role='BOT', begin='', end='\n\n', generate=True),
-    ],
-)
-
-models = [
-    dict(
-        type=HuggingFaceCausalLM,
-        abbr='nanbeige-16b-base-32k-hf',
-        path="Nanbeige/Nanbeige-16B-Base-32K",
-        tokenizer_path='Nanbeige/Nanbeige-16B-Base-32K',
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True,
-            torch_dtype='auto',
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='right',
-            truncation_side='left',
-            trust_remote_code=True,
-            use_fast=False,
-        ),
-        meta_template=_meta_template,
-        batch_padding=False,
-        max_out_len=1024,
-        max_seq_len=8192,
-        batch_size=8,
-        run_cfg=dict(num_gpus=1, num_procs=1),
-    )
-]
diff --git a/configs/models/nanbeige/hf_nanbeige_16b_chat_32k.py b/configs/models/nanbeige/hf_nanbeige_16b_chat_32k.py
deleted file mode 100644
index 8363ae961..000000000
--- a/configs/models/nanbeige/hf_nanbeige_16b_chat_32k.py
+++ /dev/null
@@ -1,34 +0,0 @@
-from opencompass.models import HuggingFaceCausalLM
-
-_meta_template = dict(
-    round=[
-        dict(role='HUMAN', begin='### Human: \n', end='\n\n'),
-        dict(role='BOT', begin='### Assistant: ', end='</s>', generate=True),
-    ],
-)
-
-models = [
-    dict(
-        type=HuggingFaceCausalLM,
-        abbr='nanbeige-16b-chat-32k-hf',
-        path="Nanbeige/Nanbeige-16B-Chat-32K",
-        tokenizer_path='Nanbeige/Nanbeige-16B-Chat-32K',
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True,
-            torch_dtype='auto',
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-            use_fast=False,
-        ),
-        meta_template=_meta_template,
-        max_out_len=100,
-        max_seq_len=2048,
-        batch_size=8,
-        run_cfg=dict(num_gpus=2, num_procs=1),
-        end_str='</s>',
-    )
-]
diff --git a/configs/models/openbmb/hf_minicpm_2b_dpo_fp32.py b/configs/models/openbmb/hf_minicpm_2b_dpo_fp32.py
index 1b40ef2a2..d09690290 100644
--- a/configs/models/openbmb/hf_minicpm_2b_dpo_fp32.py
+++ b/configs/models/openbmb/hf_minicpm_2b_dpo_fp32.py
@@ -1,31 +1,12 @@
-from opencompass.models import HuggingFace
-
-_meta_template = dict(
-    round=[
-        dict(role="HUMAN", begin='<用户>'),
-        dict(role="BOT", begin="<AI>", generate=True),
-    ],
-)
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
     dict(
-        type=HuggingFace,
-        abbr='minicpm-2b-dpo-hf',
+        type=HuggingFacewithChatTemplate,
+        abbr='minicpm-2b-dpo-fp32-hf',
         path='openbmb/MiniCPM-2B-dpo-fp32',
-        model_kwargs=dict(
-            trust_remote_code=True,
-            device_map='auto',
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-        ),
-        meta_template=_meta_template,
-        max_out_len=100,
-        max_seq_len=2048,
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=1, num_procs=1),
-        batch_padding=True,
+        run_cfg=dict(num_gpus=1),
     )
 ]
diff --git a/configs/models/openbmb/hf_minicpm_2b_sft_bf16.py b/configs/models/openbmb/hf_minicpm_2b_sft_bf16.py
new file mode 100644
index 000000000..43303b246
--- /dev/null
+++ b/configs/models/openbmb/hf_minicpm_2b_sft_bf16.py
@@ -0,0 +1,12 @@
+from opencompass.models import HuggingFacewithChatTemplate
+
+models = [
+    dict(
+        type=HuggingFacewithChatTemplate,
+        abbr='minicpm-2b-sft-bf16-hf',
+        path='openbmb/MiniCPM-2B-sft-bf16',
+        max_out_len=1024,
+        batch_size=8,
+        run_cfg=dict(num_gpus=1),
+    )
+]
diff --git a/configs/models/openbmb/hf_minicpm_2b_sft_fp32.py b/configs/models/openbmb/hf_minicpm_2b_sft_fp32.py
index b8ea8c325..a13fbcd6d 100644
--- a/configs/models/openbmb/hf_minicpm_2b_sft_fp32.py
+++ b/configs/models/openbmb/hf_minicpm_2b_sft_fp32.py
@@ -1,31 +1,12 @@
-from opencompass.models import HuggingFace
-
-_meta_template = dict(
-    round=[
-        dict(role="HUMAN", begin='<用户>'),
-        dict(role="BOT", begin="<AI>", generate=True),
-    ],
-)
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
     dict(
-        type=HuggingFace,
-        abbr='minicpm-2b-sft-hf',
+        type=HuggingFacewithChatTemplate,
+        abbr='minicpm-2b-sft-fp32-hf',
         path='openbmb/MiniCPM-2B-sft-fp32',
-        model_kwargs=dict(
-            trust_remote_code=True,
-            device_map='auto',
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-        ),
-        meta_template=_meta_template,
-        max_out_len=100,
-        max_seq_len=2048,
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=1, num_procs=1),
-        batch_padding=True,
+        run_cfg=dict(num_gpus=1),
     )
 ]
diff --git a/configs/models/opt/hf_opt_125m.py b/configs/models/opt/hf_opt_125m.py
index 760e65b02..ec0c68d36 100644
--- a/configs/models/opt/hf_opt_125m.py
+++ b/configs/models/opt/hf_opt_125m.py
@@ -1,23 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
+from opencompass.models import HuggingFaceBaseModel
 
-# OPT-125M
-opt125m = dict(
-       type=HuggingFaceCausalLM,
-       # the folowing are HuggingFaceCausalLM init parameters
-       path='facebook/opt-125m',
-       tokenizer_path='facebook/opt-125m',
-       tokenizer_kwargs=dict(
-           padding_side='left',
-           truncation_side='left',
-           proxies=None,
-           trust_remote_code=True),
-       model_kwargs=dict(device_map='auto'),
-       max_seq_len=2048,
-       # the folowing are not HuggingFaceCausalLM init parameters
-       abbr='opt125m',                # Model abbreviation
-       max_out_len=100,               # Maximum number of generated tokens
-       batch_size=128,
-       run_cfg=dict(num_gpus=1),   # Run configuration for specifying resource requirements
+models = [
+    dict(
+        type=HuggingFaceBaseModel,
+        abbr='opt-125m-hf',
+        path='facebook/opt-125m',
+        max_out_len=1024,
+        batch_size=64,
+        run_cfg=dict(num_gpus=1),
     )
-
-models = [opt125m]
+]
diff --git a/configs/models/opt/hf_opt_350m.py b/configs/models/opt/hf_opt_350m.py
index 33cbacc00..6a25db0a7 100644
--- a/configs/models/opt/hf_opt_350m.py
+++ b/configs/models/opt/hf_opt_350m.py
@@ -1,23 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
+from opencompass.models import HuggingFaceBaseModel
 
-# OPT-350M
-opt350m = dict(
-       type=HuggingFaceCausalLM,
-       # the folowing are HuggingFaceCausalLM init parameters
-       path='facebook/opt-350m',
-       tokenizer_path='facebook/opt-350m',
-       tokenizer_kwargs=dict(
-           padding_side='left',
-           truncation_side='left',
-           proxies=None,
-           trust_remote_code=True),
-       model_kwargs=dict(device_map='auto'),
-       max_seq_len=2048,
-       # the folowing are not HuggingFaceCausalLM init parameters
-       abbr='opt350m',                    # Model abbreviation
-       max_out_len=100,                   # Maximum number of generated tokens          
-       batch_size=64,
-       run_cfg=dict(num_gpus=1),    # Run configuration for specifying resource requirements
+models = [
+    dict(
+        type=HuggingFaceBaseModel,
+        abbr='opt-350m-hf',
+        path='facebook/opt-350m',
+        max_out_len=1024,
+        batch_size=32,
+        run_cfg=dict(num_gpus=1),
     )
-
-models = [opt350m]
+]
diff --git a/configs/models/others/hf_command_r_plus.py b/configs/models/others/hf_command_r_plus.py
index ce41ab3d6..bdbf924f6 100644
--- a/configs/models/others/hf_command_r_plus.py
+++ b/configs/models/others/hf_command_r_plus.py
@@ -1,25 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
-
-_meta_template = dict(
-    round=[
-        dict(role="HUMAN", begin='<|START_OF_TURN_TOKEN|><|USER_TOKEN|>', end='<|END_OF_TURN_TOKEN|>'),
-        dict(role="BOT", begin="<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>", end='<|END_OF_TURN_TOKEN|>', generate=True),
-    ],
-)
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFacewithChatTemplate,
         abbr='command-r-plus-hf',
-        path="CohereForAI/c4ai-command-r-plus",
-        model_kwargs=dict(device_map='auto', trust_remote_code=True),
-        tokenizer_kwargs=dict(padding_side='left', truncation_side='left', trust_remote_code=True),
-        meta_template=_meta_template,
-        max_out_len=100,
-        max_seq_len=2048,
+        path='CohereForAI/c4ai-command-r-plus',
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=8, num_procs=1),
-        end_str='<|END_OF_TURN_TOKEN|>',
-        batch_padding=True,
+        run_cfg=dict(num_gpus=8),
     )
 ]
diff --git a/configs/models/others/hf_dbrx_base.py b/configs/models/others/hf_dbrx_base.py
new file mode 100644
index 000000000..985e6add3
--- /dev/null
+++ b/configs/models/others/hf_dbrx_base.py
@@ -0,0 +1,12 @@
+from opencompass.models import HuggingFaceBaseModel
+
+models = [
+    dict(
+        type=HuggingFaceBaseModel,
+        abbr='dbrx-base-hf',
+        path='databricks/dbrx-base',
+        max_out_len=1024,
+        batch_size=8,
+        run_cfg=dict(num_gpus=8),
+    )
+]
diff --git a/configs/models/others/hf_dbrx_instruct.py b/configs/models/others/hf_dbrx_instruct.py
index af0a54b74..a207f3bb9 100644
--- a/configs/models/others/hf_dbrx_instruct.py
+++ b/configs/models/others/hf_dbrx_instruct.py
@@ -1,34 +1,12 @@
-
-from opencompass.models import HuggingFaceCausalLM
-
-
-_meta_template = dict(
-    round=[
-        dict(role='HUMAN', begin='<|im_start|>user\n', end='<|im_end|>\n'),
-        dict(role='BOT', begin='<|im_start|>assistant\n', end='<|im_end|>\n', generate=True),
-    ],
-)
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFacewithChatTemplate,
         abbr='dbrx-instruct-hf',
-        path="databricks/dbrx-instruct",
-        model_kwargs=dict(
-            trust_remote_code=True,
-            device_map='auto',
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            use_fast=False,
-            trust_remote_code=True,
-        ),
-        max_out_len=100,
-        max_seq_len=2048,
+        path='databricks/dbrx-instruct',
+        max_out_len=1024,
         batch_size=8,
-        meta_template=_meta_template,
-        run_cfg=dict(num_gpus=8, num_procs=1),
-        batch_padding=True,
+        run_cfg=dict(num_gpus=8),
     )
 ]
diff --git a/configs/models/others/hf_dolphin_21_mistral_7b.py b/configs/models/others/hf_dolphin_21_mistral_7b.py
index ecc0b1963..89da9f5fb 100644
--- a/configs/models/others/hf_dolphin_21_mistral_7b.py
+++ b/configs/models/others/hf_dolphin_21_mistral_7b.py
@@ -6,7 +6,6 @@
         dict(role="HUMAN", begin='<|im_start|>user\n', end='<|im_end|>\n'),
         dict(role="BOT", begin="<|im_start|>assistant\n", end='<|im_end|>\n', generate=True),
     ],
-    eos_token_id=2
 )
 
 models = [
diff --git a/configs/models/others/hf_fashiongpt_70b_v11.py b/configs/models/others/hf_fashiongpt_70b_v11.py
index dbb2d7e4e..f4ddcaab2 100644
--- a/configs/models/others/hf_fashiongpt_70b_v11.py
+++ b/configs/models/others/hf_fashiongpt_70b_v11.py
@@ -6,7 +6,6 @@
         dict(role="HUMAN", begin='### User:\n', end='\n'),
         dict(role="BOT", begin="### Assistant:\n", generate=True),
     ],
-    eos_token_id=2
 )
 
 models = [
diff --git a/configs/models/others/hf_orionstar_yi_34b_chat.py b/configs/models/others/hf_orionstar_yi_34b_chat.py
index 9fba307b8..ab8928dbf 100644
--- a/configs/models/others/hf_orionstar_yi_34b_chat.py
+++ b/configs/models/others/hf_orionstar_yi_34b_chat.py
@@ -7,7 +7,6 @@
         dict(role="HUMAN", begin='Human: ', end='\n\n'),
         dict(role="BOT", begin="Assistant: <|endoftext|>", end='<|endoftext|>', generate=True),
     ],
-    eos_token_id=2
 )
 
 models = [
diff --git a/configs/models/others/hf_telechat_7b_chat.py b/configs/models/others/hf_telechat_7b_chat.py
index 58c496e39..60dbe28ea 100644
--- a/configs/models/others/hf_telechat_7b_chat.py
+++ b/configs/models/others/hf_telechat_7b_chat.py
@@ -6,7 +6,6 @@
         dict(role="HUMAN", begin='<_user>'),
         dict(role="BOT", begin="<_bot>", end='<_end>', generate=True),
     ],
-    eos_token_id=160133
 )
 
 models = [
diff --git a/configs/models/others/vllm_orionstar_14b_longchat.py b/configs/models/others/vllm_orionstar_14b_longchat.py
index 67ca61d03..873f31e26 100644
--- a/configs/models/others/vllm_orionstar_14b_longchat.py
+++ b/configs/models/others/vllm_orionstar_14b_longchat.py
@@ -7,7 +7,6 @@
         dict(role="HUMAN", begin='Human: ', end='\n'),
         dict(role="BOT", begin="Assistant: ", end='</s>', generate=True),
     ],
-    eos_token_id=2
 )
 
 models = [
diff --git a/configs/models/qwen/hf_qwen1_5_0_5b.py b/configs/models/qwen/hf_qwen1_5_0_5b.py
index 62a219f08..60014be65 100644
--- a/configs/models/qwen/hf_qwen1_5_0_5b.py
+++ b/configs/models/qwen/hf_qwen1_5_0_5b.py
@@ -1,25 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
+from opencompass.models import HuggingFaceBaseModel
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFaceBaseModel,
         abbr='qwen1.5-0.5b-hf',
-        path="Qwen/Qwen1.5-0.5B",
-        tokenizer_path='Qwen/Qwen1.5-0.5B',
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-            use_fast=False,
-        ),
-        pad_token_id=151645,
-        max_out_len=100,
-        max_seq_len=2048,
+        path='Qwen/Qwen1.5-0.5B',
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=1, num_procs=1),
+        run_cfg=dict(num_gpus=1),
     )
 ]
diff --git a/configs/models/qwen/hf_qwen1_5_0_5b_chat.py b/configs/models/qwen/hf_qwen1_5_0_5b_chat.py
index c7413332d..eb5c22dad 100644
--- a/configs/models/qwen/hf_qwen1_5_0_5b_chat.py
+++ b/configs/models/qwen/hf_qwen1_5_0_5b_chat.py
@@ -1,33 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
-
-_meta_template = dict(
-    round=[
-        dict(role="HUMAN", begin='<|im_start|>user\n', end='<|im_end|>\n'),
-        dict(role="BOT", begin="<|im_start|>assistant\n", end='<|im_end|>\n', generate=True),
-    ],
-)
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFacewithChatTemplate,
         abbr='qwen1.5-0.5b-chat-hf',
-        path="Qwen/Qwen1.5-0.5B-Chat",
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-            use_fast=False,
-        ),
-        meta_template=_meta_template,
-        max_out_len=100,
-        max_seq_len=2048,
+        path='Qwen/Qwen1.5-0.5B-Chat',
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=4, num_procs=1),
-        end_str='<|im_end|>',
-        batch_padding=True,
+        run_cfg=dict(num_gpus=1),
     )
 ]
diff --git a/configs/models/qwen/hf_qwen1_5_14b.py b/configs/models/qwen/hf_qwen1_5_14b.py
index 1f6d17097..c338d1b99 100644
--- a/configs/models/qwen/hf_qwen1_5_14b.py
+++ b/configs/models/qwen/hf_qwen1_5_14b.py
@@ -1,25 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
+from opencompass.models import HuggingFaceBaseModel
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFaceBaseModel,
         abbr='qwen1.5-14b-hf',
-        path="Qwen/Qwen1.5-14B",
-        tokenizer_path='Qwen/Qwen1.5-14B',
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-            use_fast=False,
-        ),
-        pad_token_id=151645,
-        max_out_len=100,
-        max_seq_len=2048,
+        path='Qwen/Qwen1.5-14B',
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=2, num_procs=1),
+        run_cfg=dict(num_gpus=1),
     )
 ]
diff --git a/configs/models/qwen/hf_qwen1_5_14b_chat.py b/configs/models/qwen/hf_qwen1_5_14b_chat.py
index f6bff1f95..81efa2a39 100644
--- a/configs/models/qwen/hf_qwen1_5_14b_chat.py
+++ b/configs/models/qwen/hf_qwen1_5_14b_chat.py
@@ -1,33 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
-
-_meta_template = dict(
-    round=[
-        dict(role="HUMAN", begin='<|im_start|>user\n', end='<|im_end|>\n'),
-        dict(role="BOT", begin="<|im_start|>assistant\n", end='<|im_end|>\n', generate=True),
-    ],
-)
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFacewithChatTemplate,
         abbr='qwen1.5-14b-chat-hf',
-        path="Qwen/Qwen1.5-14B-Chat",
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-            use_fast=False,
-        ),
-        meta_template=_meta_template,
-        max_out_len=100,
-        max_seq_len=2048,
+        path='Qwen/Qwen1.5-14B-Chat',
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=4, num_procs=1),
-        end_str='<|im_end|>',
-        batch_padding=True,
+        run_cfg=dict(num_gpus=1),
     )
 ]
diff --git a/configs/models/qwen/hf_qwen1_5_1_8b.py b/configs/models/qwen/hf_qwen1_5_1_8b.py
index 71492cf80..5caf3efab 100644
--- a/configs/models/qwen/hf_qwen1_5_1_8b.py
+++ b/configs/models/qwen/hf_qwen1_5_1_8b.py
@@ -1,25 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
+from opencompass.models import HuggingFaceBaseModel
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFaceBaseModel,
         abbr='qwen1.5-1.8b-hf',
-        path="Qwen/Qwen1.5-1.8B",
-        tokenizer_path='Qwen/Qwen1.5-1.8B',
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-            use_fast=False,
-        ),
-        pad_token_id=151645,
-        max_out_len=100,
-        max_seq_len=2048,
+        path='Qwen/Qwen1.5-1.8B',
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=1, num_procs=1),
+        run_cfg=dict(num_gpus=1),
     )
 ]
diff --git a/configs/models/qwen/hf_qwen1_5_1_8b_chat.py b/configs/models/qwen/hf_qwen1_5_1_8b_chat.py
index 4e090de04..e1682e070 100644
--- a/configs/models/qwen/hf_qwen1_5_1_8b_chat.py
+++ b/configs/models/qwen/hf_qwen1_5_1_8b_chat.py
@@ -1,33 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
-
-_meta_template = dict(
-    round=[
-        dict(role="HUMAN", begin='<|im_start|>user\n', end='<|im_end|>\n'),
-        dict(role="BOT", begin="<|im_start|>assistant\n", end='<|im_end|>\n', generate=True),
-    ],
-)
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFacewithChatTemplate,
         abbr='qwen1.5-1.8b-chat-hf',
-        path="Qwen/Qwen1.5-1.8B-Chat",
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-            use_fast=False,
-        ),
-        meta_template=_meta_template,
-        max_out_len=100,
-        max_seq_len=2048,
+        path='Qwen/Qwen1.5-1.8B-Chat',
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=4, num_procs=1),
-        end_str='<|im_end|>',
-        batch_padding=True,
+        run_cfg=dict(num_gpus=1),
     )
 ]
diff --git a/configs/models/qwen/hf_qwen1_5_32b.py b/configs/models/qwen/hf_qwen1_5_32b.py
index 9ad947aff..e886873da 100644
--- a/configs/models/qwen/hf_qwen1_5_32b.py
+++ b/configs/models/qwen/hf_qwen1_5_32b.py
@@ -1,25 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
+from opencompass.models import HuggingFaceBaseModel
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFaceBaseModel,
         abbr='qwen1.5-32b-hf',
-        path="Qwen/Qwen1.5-32B",
-        tokenizer_path='Qwen/Qwen1.5-32B',
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-            use_fast=False,
-        ),
-        pad_token_id=151645,
-        max_out_len=100,
-        max_seq_len=2048,
+        path='Qwen/Qwen1.5-32B',
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=2, num_procs=1),
+        run_cfg=dict(num_gpus=2),
     )
 ]
diff --git a/configs/models/qwen/hf_qwen1_5_32b_chat.py b/configs/models/qwen/hf_qwen1_5_32b_chat.py
index 1e215ff61..03506d13e 100644
--- a/configs/models/qwen/hf_qwen1_5_32b_chat.py
+++ b/configs/models/qwen/hf_qwen1_5_32b_chat.py
@@ -1,33 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
-
-_meta_template = dict(
-    round=[
-        dict(role="HUMAN", begin='<|im_start|>user\n', end='<|im_end|>\n'),
-        dict(role="BOT", begin="<|im_start|>assistant\n", end='<|im_end|>\n', generate=True),
-    ],
-)
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFacewithChatTemplate,
         abbr='qwen1.5-32b-chat-hf',
-        path="Qwen/Qwen1.5-32B-Chat",
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-            use_fast=False,
-        ),
-        meta_template=_meta_template,
-        max_out_len=100,
-        max_seq_len=2048,
+        path='Qwen/Qwen1.5-32B-Chat',
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=2, num_procs=1),
-        end_str='<|im_end|>',
-        batch_padding=True,
+        run_cfg=dict(num_gpus=2),
     )
 ]
diff --git a/configs/models/qwen/hf_qwen1_5_4b.py b/configs/models/qwen/hf_qwen1_5_4b.py
index 6aa57263e..e63eaec42 100644
--- a/configs/models/qwen/hf_qwen1_5_4b.py
+++ b/configs/models/qwen/hf_qwen1_5_4b.py
@@ -1,25 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
+from opencompass.models import HuggingFaceBaseModel
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFaceBaseModel,
         abbr='qwen1.5-4b-hf',
-        path="Qwen/Qwen1.5-4B",
-        tokenizer_path='Qwen/Qwen1.5-4B',
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-            use_fast=False,
-        ),
-        pad_token_id=151645,
-        max_out_len=100,
-        max_seq_len=2048,
+        path='Qwen/Qwen1.5-4B',
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=1, num_procs=1),
+        run_cfg=dict(num_gpus=1),
     )
 ]
diff --git a/configs/models/qwen/hf_qwen1_5_4b_chat.py b/configs/models/qwen/hf_qwen1_5_4b_chat.py
index 427c78494..32475a5ff 100644
--- a/configs/models/qwen/hf_qwen1_5_4b_chat.py
+++ b/configs/models/qwen/hf_qwen1_5_4b_chat.py
@@ -1,33 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
-
-_meta_template = dict(
-    round=[
-        dict(role="HUMAN", begin='<|im_start|>user\n', end='<|im_end|>\n'),
-        dict(role="BOT", begin="<|im_start|>assistant\n", end='<|im_end|>\n', generate=True),
-    ],
-)
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFacewithChatTemplate,
         abbr='qwen1.5-4b-chat-hf',
-        path="Qwen/Qwen1.5-4B-Chat",
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-            use_fast=False,
-        ),
-        meta_template=_meta_template,
-        max_out_len=100,
-        max_seq_len=2048,
+        path='Qwen/Qwen1.5-4B-Chat',
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=4, num_procs=1),
-        end_str='<|im_end|>',
-        batch_padding=True,
+        run_cfg=dict(num_gpus=1),
     )
 ]
diff --git a/configs/models/qwen/hf_qwen1_5_72b.py b/configs/models/qwen/hf_qwen1_5_72b.py
index 3dd6e6380..d850b2364 100644
--- a/configs/models/qwen/hf_qwen1_5_72b.py
+++ b/configs/models/qwen/hf_qwen1_5_72b.py
@@ -1,25 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
+from opencompass.models import HuggingFaceBaseModel
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFaceBaseModel,
         abbr='qwen1.5-72b-hf',
-        path="Qwen/Qwen1.5-72B",
-        tokenizer_path='Qwen/Qwen1.5-72B',
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-            use_fast=False,
-        ),
-        pad_token_id=151645,
-        max_out_len=100,
-        max_seq_len=2048,
+        path='Qwen/Qwen1.5-72B',
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=4, num_procs=1),
+        run_cfg=dict(num_gpus=4),
     )
 ]
diff --git a/configs/models/qwen/hf_qwen1_5_72b_chat.py b/configs/models/qwen/hf_qwen1_5_72b_chat.py
index f02794427..1ff66255b 100644
--- a/configs/models/qwen/hf_qwen1_5_72b_chat.py
+++ b/configs/models/qwen/hf_qwen1_5_72b_chat.py
@@ -1,33 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
-
-_meta_template = dict(
-    round=[
-        dict(role="HUMAN", begin='<|im_start|>user\n', end='<|im_end|>\n'),
-        dict(role="BOT", begin="<|im_start|>assistant\n", end='<|im_end|>\n', generate=True),
-    ],
-)
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFacewithChatTemplate,
         abbr='qwen1.5-72b-chat-hf',
-        path="Qwen/Qwen1.5-72B-Chat",
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-            use_fast=False,
-        ),
-        meta_template=_meta_template,
-        max_out_len=100,
-        max_seq_len=2048,
+        path='Qwen/Qwen1.5-72B-Chat',
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=4, num_procs=1),
-        end_str='<|im_end|>',
-        batch_padding=True,
+        run_cfg=dict(num_gpus=4),
     )
 ]
diff --git a/configs/models/qwen/hf_qwen1_5_7b.py b/configs/models/qwen/hf_qwen1_5_7b.py
index d9df3031b..2649ffff2 100644
--- a/configs/models/qwen/hf_qwen1_5_7b.py
+++ b/configs/models/qwen/hf_qwen1_5_7b.py
@@ -1,25 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
+from opencompass.models import HuggingFaceBaseModel
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFaceBaseModel,
         abbr='qwen1.5-7b-hf',
-        path="Qwen/Qwen1.5-7B",
-        tokenizer_path='Qwen/Qwen1.5-7B',
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-            use_fast=False,
-        ),
-        pad_token_id=151645,
-        max_out_len=100,
-        max_seq_len=2048,
+        path='Qwen/Qwen1.5-7B',
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=1, num_procs=1),
+        run_cfg=dict(num_gpus=1),
     )
 ]
diff --git a/configs/models/qwen/hf_qwen1_5_7b_chat.py b/configs/models/qwen/hf_qwen1_5_7b_chat.py
index 43825c225..b62c3bed2 100644
--- a/configs/models/qwen/hf_qwen1_5_7b_chat.py
+++ b/configs/models/qwen/hf_qwen1_5_7b_chat.py
@@ -1,33 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
-
-_meta_template = dict(
-    round=[
-        dict(role="HUMAN", begin='<|im_start|>user\n', end='<|im_end|>\n'),
-        dict(role="BOT", begin="<|im_start|>assistant\n", end='<|im_end|>\n', generate=True),
-    ],
-)
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFacewithChatTemplate,
         abbr='qwen1.5-7b-chat-hf',
-        path="Qwen/Qwen1.5-7B-Chat",
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-            use_fast=False,
-        ),
-        meta_template=_meta_template,
-        max_out_len=100,
-        max_seq_len=2048,
+        path='Qwen/Qwen1.5-7B-Chat',
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=4, num_procs=1),
-        end_str='<|im_end|>',
-        batch_padding=True,
+        run_cfg=dict(num_gpus=1),
     )
 ]
diff --git a/configs/models/qwen/hf_qwen_14b.py b/configs/models/qwen/hf_qwen_14b.py
index 83c628670..8c15c0329 100644
--- a/configs/models/qwen/hf_qwen_14b.py
+++ b/configs/models/qwen/hf_qwen_14b.py
@@ -1,26 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
+from opencompass.models import HuggingFaceBaseModel
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFaceBaseModel,
         abbr='qwen-14b-hf',
-        path="Qwen/Qwen-14B",
-        tokenizer_path='Qwen/Qwen-14B',
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True,
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-            use_fast=False,
-        ),
-        pad_token_id=151643,
-        min_out_len=1,
-        max_out_len=100,
-        max_seq_len=2048,
+        path='Qwen/Qwen-14B',
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=1, num_procs=1),
+        run_cfg=dict(num_gpus=1),
     )
 ]
diff --git a/configs/models/qwen/hf_qwen_14b_chat.py b/configs/models/qwen/hf_qwen_14b_chat.py
index 47a609bc0..4d9cccee6 100644
--- a/configs/models/qwen/hf_qwen_14b_chat.py
+++ b/configs/models/qwen/hf_qwen_14b_chat.py
@@ -1,35 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
-
-
-_meta_template = dict(
-    round=[
-        dict(role="HUMAN", begin='\n<|im_start|>user\n', end='<|im_end|>'),
-        dict(role="BOT", begin="\n<|im_start|>assistant\n", end='<|im_end|>', generate=True),
-    ],
-)
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFacewithChatTemplate,
         abbr='qwen-14b-chat-hf',
-        path="Qwen/Qwen-14B-Chat",
-        tokenizer_path='Qwen/Qwen-14B-Chat',
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-            use_fast=False,
-        ),
-        pad_token_id=151643,
-        max_out_len=100,
-        max_seq_len=2048,
+        path='Qwen/Qwen-14B-Chat',
+        max_out_len=1024,
         batch_size=8,
-        meta_template=_meta_template,
-        run_cfg=dict(num_gpus=1, num_procs=1),
-        end_str='<|im_end|>',
+        run_cfg=dict(num_gpus=1),
     )
-]
\ No newline at end of file
+]
diff --git a/configs/models/qwen/hf_qwen_1_8b.py b/configs/models/qwen/hf_qwen_1_8b.py
index 7ba7ddba1..f82d3db70 100644
--- a/configs/models/qwen/hf_qwen_1_8b.py
+++ b/configs/models/qwen/hf_qwen_1_8b.py
@@ -1,26 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
+from opencompass.models import HuggingFaceBaseModel
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFaceBaseModel,
         abbr='qwen-1.8b-hf',
-        path="Qwen/Qwen-1_8B",
-        tokenizer_path='Qwen/Qwen-1_8B',
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True,
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-            use_fast=False,
-        ),
-        pad_token_id=151643,
-        min_out_len=1,
-        max_out_len=100,
-        max_seq_len=2048,
+        path='Qwen/Qwen-1_8B',
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=1, num_procs=1),
+        run_cfg=dict(num_gpus=1),
     )
 ]
diff --git a/configs/models/qwen/hf_qwen_1_8b_chat.py b/configs/models/qwen/hf_qwen_1_8b_chat.py
index fb4f488aa..1838a04aa 100644
--- a/configs/models/qwen/hf_qwen_1_8b_chat.py
+++ b/configs/models/qwen/hf_qwen_1_8b_chat.py
@@ -1,35 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
-
-_meta_template = dict(
-    round=[
-        dict(role="HUMAN", begin='\n<|im_start|>user\n', end='<|im_end|>'),
-        dict(role="BOT", begin="\n<|im_start|>assistant\n", end='<|im_end|>', generate=True),
-    ],
-    eos_token_id=151645,
-)
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFacewithChatTemplate,
         abbr='qwen-1.8b-chat-hf',
-        path="Qwen/Qwen-1_8B-Chat",
-        tokenizer_path='Qwen/Qwen-1_8B-Chat',
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-            use_fast=False,
-        ),
-        pad_token_id=151643,
-        max_out_len=100,
-        max_seq_len=2048,
+        path='Qwen/Qwen-1_8B-Chat',
+        max_out_len=1024,
         batch_size=8,
-        meta_template=_meta_template,
-        run_cfg=dict(num_gpus=1, num_procs=1),
-        end_str='<|im_end|>',
+        run_cfg=dict(num_gpus=1),
     )
 ]
diff --git a/configs/models/qwen/hf_qwen_72b.py b/configs/models/qwen/hf_qwen_72b.py
index 686a435d1..325315b8e 100644
--- a/configs/models/qwen/hf_qwen_72b.py
+++ b/configs/models/qwen/hf_qwen_72b.py
@@ -1,26 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
+from opencompass.models import HuggingFaceBaseModel
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFaceBaseModel,
         abbr='qwen-72b-hf',
-        path="Qwen/Qwen-72B",
-        tokenizer_path='Qwen/Qwen-72B',
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True,
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-            use_fast=False,
-        ),
-        pad_token_id=151643,
-        min_out_len=1,
-        max_out_len=100,
-        max_seq_len=2048,
+        path='Qwen/Qwen-72B',
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=4, num_procs=1),
+        run_cfg=dict(num_gpus=4),
     )
 ]
diff --git a/configs/models/qwen/hf_qwen_72b_chat.py b/configs/models/qwen/hf_qwen_72b_chat.py
index 83da466f3..255aeb5db 100644
--- a/configs/models/qwen/hf_qwen_72b_chat.py
+++ b/configs/models/qwen/hf_qwen_72b_chat.py
@@ -1,34 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
-
-
-_meta_template = dict(
-    round=[
-        dict(role="HUMAN", begin='\n<|im_start|>user\n', end='<|im_end|>'),
-        dict(role="BOT", begin="\n<|im_start|>assistant\n", end='<|im_end|>', generate=True),
-    ],
-)
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFacewithChatTemplate,
         abbr='qwen-72b-chat-hf',
-        path="Qwen/Qwen-72B-Chat",
-        tokenizer_path='Qwen/Qwen-72B-Chat',
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-            use_fast=False,),
-        pad_token_id=151643,
-        max_out_len=100,
-        max_seq_len=2048,
+        path='Qwen/Qwen-72B-Chat',
+        max_out_len=1024,
         batch_size=8,
-        meta_template=_meta_template,
-        run_cfg=dict(num_gpus=4, num_procs=1),
-        end_str='<|im_end|>',
+        run_cfg=dict(num_gpus=4),
     )
 ]
diff --git a/configs/models/qwen/hf_qwen_7b.py b/configs/models/qwen/hf_qwen_7b.py
index cb60c1562..17ba5b47b 100644
--- a/configs/models/qwen/hf_qwen_7b.py
+++ b/configs/models/qwen/hf_qwen_7b.py
@@ -1,26 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
+from opencompass.models import HuggingFaceBaseModel
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFaceBaseModel,
         abbr='qwen-7b-hf',
-        path="Qwen/Qwen-7B",
-        tokenizer_path='Qwen/Qwen-7B',
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True,
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-            use_fast=False,
-        ),
-        pad_token_id=151643,
-        min_out_len=1,
-        max_out_len=100,
-        max_seq_len=2048,
+        path='Qwen/Qwen-7B',
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=1, num_procs=1),
+        run_cfg=dict(num_gpus=1),
     )
 ]
diff --git a/configs/models/qwen/hf_qwen_7b_chat.py b/configs/models/qwen/hf_qwen_7b_chat.py
index 88dda4f0d..e5479fb6e 100644
--- a/configs/models/qwen/hf_qwen_7b_chat.py
+++ b/configs/models/qwen/hf_qwen_7b_chat.py
@@ -1,35 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
-
-
-_meta_template = dict(
-    round=[
-        dict(role="HUMAN", begin='\n<|im_start|>user\n', end='<|im_end|>'),
-        dict(role="BOT", begin="\n<|im_start|>assistant\n", end='<|im_end|>', generate=True),
-    ],
-)
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFacewithChatTemplate,
         abbr='qwen-7b-chat-hf',
-        path="Qwen/Qwen-7B-Chat",
-        tokenizer_path='Qwen/Qwen-7B-Chat',
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-            use_fast=False,
-        ),
-        pad_token_id=151643,
-        max_out_len=100,
-        max_seq_len=2048,
+        path='Qwen/Qwen-7B-Chat',
+        max_out_len=1024,
         batch_size=8,
-        meta_template=_meta_template,
-        run_cfg=dict(num_gpus=1, num_procs=1),
-        end_str='<|im_end|>',
+        run_cfg=dict(num_gpus=1),
     )
-]
\ No newline at end of file
+]
diff --git a/configs/models/qwen/vllm_qwen1_5_14b_chat.py b/configs/models/qwen/vllm_qwen1_5_14b_chat.py
index 15cd97bbb..4af727293 100644
--- a/configs/models/qwen/vllm_qwen1_5_14b_chat.py
+++ b/configs/models/qwen/vllm_qwen1_5_14b_chat.py
@@ -6,7 +6,6 @@
         dict(role="HUMAN", begin='<|im_start|>user\n', end='<|im_end|>\n'),
         dict(role="BOT", begin="<|im_start|>assistant\n", end='<|im_end|>\n', generate=True),
     ],
-    eos_token_id=151645,
 )
 
 models = [
diff --git a/configs/models/qwen/vllm_qwen1_5_72b_chat.py b/configs/models/qwen/vllm_qwen1_5_72b_chat.py
index 035c7a8a6..68f1e73c4 100644
--- a/configs/models/qwen/vllm_qwen1_5_72b_chat.py
+++ b/configs/models/qwen/vllm_qwen1_5_72b_chat.py
@@ -6,7 +6,6 @@
         dict(role="HUMAN", begin='<|im_start|>user\n', end='<|im_end|>\n'),
         dict(role="BOT", begin="<|im_start|>assistant\n", end='<|im_end|>\n', generate=True),
     ],
-    eos_token_id=151645,
 )
 
 models = [
diff --git a/configs/models/skywork/hf_skywork_13b.py b/configs/models/skywork/hf_skywork_13b.py
index 495a33922..1b56c3a6e 100644
--- a/configs/models/skywork/hf_skywork_13b.py
+++ b/configs/models/skywork/hf_skywork_13b.py
@@ -1,24 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
+from opencompass.models import HuggingFaceBaseModel
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFaceBaseModel,
         abbr='skywork-13b-hf',
-        path="Skywork/Skywork-13B-base",
-        tokenizer_path='Skywork/Skywork-13B-base',
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True,
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-            use_fast=False,
-        ),
-        max_out_len=100,
-        max_seq_len=2048,
+        path='Skywork/Skywork-13B-base',
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=1, num_procs=1),
+        run_cfg=dict(num_gpus=1),
     )
 ]
diff --git a/configs/models/vicuna/hf_vicuna_13b_v13.py b/configs/models/vicuna/hf_vicuna_13b_v13.py
index 6a04a3c46..74f4e147c 100644
--- a/configs/models/vicuna/hf_vicuna_13b_v13.py
+++ b/configs/models/vicuna/hf_vicuna_13b_v13.py
@@ -1,23 +1,13 @@
-from opencompass.models import HuggingFaceCausalLM
-
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFacewithChatTemplate,
         abbr='vicuna-13b-v1.3-hf',
-        path="lmsys/vicuna-13b-v1.3",
-        tokenizer_path='lmsys/vicuna-13b-v1.3',
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            use_fast=False,
-        ),
-        max_out_len=100,
-        max_seq_len=2048,
+        path='lmsys/vicuna-13b-v1.3',
+        max_out_len=1024,
         batch_size=8,
-        model_kwargs=dict(device_map='auto'),
-        batch_padding=False, # if false, inference with for-loop without batch padding
-        use_fastchat_template=True,
-        run_cfg=dict(num_gpus=2, num_procs=1)
+        run_cfg=dict(num_gpus=1),
+        fastchat_template='vicuna',
     )
 ]
diff --git a/configs/models/vicuna/hf_vicuna_13b_v15.py b/configs/models/vicuna/hf_vicuna_13b_v15.py
index c87b9dc79..28366ea9b 100644
--- a/configs/models/vicuna/hf_vicuna_13b_v15.py
+++ b/configs/models/vicuna/hf_vicuna_13b_v15.py
@@ -1,23 +1,13 @@
-from opencompass.models import HuggingFaceCausalLM
-
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFacewithChatTemplate,
         abbr='vicuna-13b-v1.5-hf',
-        path="lmsys/vicuna-13b-v1.5",
-        tokenizer_path='lmsys/vicuna-13b-v1.5',
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            use_fast=False,
-        ),
-        max_out_len=100,
-        max_seq_len=2048,
+        path='lmsys/vicuna-13b-v1.5',
+        max_out_len=1024,
         batch_size=8,
-        model_kwargs=dict(device_map='auto'),
-        batch_padding=False, # if false, inference with for-loop without batch padding
-        use_fastchat_template=True,
-        run_cfg=dict(num_gpus=1, num_procs=1)
+        run_cfg=dict(num_gpus=1),
+        fastchat_template='vicuna',
     )
 ]
diff --git a/configs/models/vicuna/hf_vicuna_13b_v15_16k.py b/configs/models/vicuna/hf_vicuna_13b_v15_16k.py
index a8e2aa5fc..3caf3f571 100644
--- a/configs/models/vicuna/hf_vicuna_13b_v15_16k.py
+++ b/configs/models/vicuna/hf_vicuna_13b_v15_16k.py
@@ -1,30 +1,13 @@
-from opencompass.models import HuggingFaceCausalLM
-
-_meta_template = dict(
-    round=[
-        dict(role="HUMAN", begin='USER: '),
-        dict(role="BOT", begin=" ASSISTANT:", end='</s>', generate=True),
-    ],
-)
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFacewithChatTemplate,
         abbr='vicuna-13b-v1.5-16k-hf',
-        path="lmsys/vicuna-13b-v1.5-16k",
-        tokenizer_path='lmsys/vicuna-13b-v1.5-16k',
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            use_fast=False,
-        ),
-        meta_template=_meta_template,
-        max_out_len=100,
-        max_seq_len=8192,
+        path='lmsys/vicuna-13b-v1.5-16k',
+        max_out_len=1024,
         batch_size=8,
-        model_kwargs=dict(device_map='auto'),
-        batch_padding=False, # if false, inference with for-loop without batch padding
-        run_cfg=dict(num_gpus=2, num_procs=1),
-        end_str='</s>',
+        run_cfg=dict(num_gpus=1),
+        fastchat_template='vicuna',
     )
 ]
diff --git a/configs/models/vicuna/hf_vicuna_33b_v13.py b/configs/models/vicuna/hf_vicuna_33b_v13.py
index 0f280e636..036cbc638 100644
--- a/configs/models/vicuna/hf_vicuna_33b_v13.py
+++ b/configs/models/vicuna/hf_vicuna_33b_v13.py
@@ -1,23 +1,13 @@
-from opencompass.models import HuggingFaceCausalLM
-
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFacewithChatTemplate,
         abbr='vicuna-33b-v1.3-hf',
-        path="lmsys/vicuna-33b-v1.3",
-        tokenizer_path='lmsys/vicuna-33b-v1.3',
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            use_fast=False,
-        ),
-        max_out_len=100,
-        max_seq_len=2048,
+        path='lmsys/vicuna-33b-v1.3',
+        max_out_len=1024,
         batch_size=8,
-        model_kwargs=dict(device_map='auto'),
-        batch_padding=False, # if false, inference with for-loop without batch padding
-        use_fastchat_template=True,
-        run_cfg=dict(num_gpus=4, num_procs=1)
+        run_cfg=dict(num_gpus=2),
+        fastchat_template='vicuna',
     )
 ]
diff --git a/configs/models/vicuna/hf_vicuna_7b_v13.py b/configs/models/vicuna/hf_vicuna_7b_v13.py
index 67e1c79be..396264554 100644
--- a/configs/models/vicuna/hf_vicuna_7b_v13.py
+++ b/configs/models/vicuna/hf_vicuna_7b_v13.py
@@ -1,23 +1,13 @@
-from opencompass.models import HuggingFaceCausalLM
-
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFacewithChatTemplate,
         abbr='vicuna-7b-v1.3-hf',
-        path="lmsys/vicuna-7b-v1.3",
-        tokenizer_path='lmsys/vicuna-7b-v1.3',
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            use_fast=False,
-        ),
-        max_out_len=100,
-        max_seq_len=2048,
+        path='lmsys/vicuna-7b-v1.3',
+        max_out_len=1024,
         batch_size=8,
-        model_kwargs=dict(device_map='auto'),
-        batch_padding=False, # if false, inference with for-loop without batch padding
-        use_fastchat_template=True,
-        run_cfg=dict(num_gpus=1, num_procs=1)
+        run_cfg=dict(num_gpus=1),
+        fastchat_template='vicuna',
     )
 ]
diff --git a/configs/models/vicuna/hf_vicuna_7b_v15.py b/configs/models/vicuna/hf_vicuna_7b_v15.py
index 06f3ef73f..b7888f04c 100644
--- a/configs/models/vicuna/hf_vicuna_7b_v15.py
+++ b/configs/models/vicuna/hf_vicuna_7b_v15.py
@@ -1,23 +1,13 @@
-from opencompass.models import HuggingFaceCausalLM
-
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFacewithChatTemplate,
         abbr='vicuna-7b-v1.5-hf',
-        path="lmsys/vicuna-7b-v1.5",
-        tokenizer_path='lmsys/vicuna-7b-v1.5',
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            use_fast=False,
-        ),
-        max_out_len=100,
-        max_seq_len=2048,
+        path='lmsys/vicuna-7b-v1.5',
+        max_out_len=1024,
         batch_size=8,
-        model_kwargs=dict(device_map='auto'),
-        batch_padding=False, # if false, inference with for-loop without batch padding
-        use_fastchat_template=True,
-        run_cfg=dict(num_gpus=1, num_procs=1)
+        run_cfg=dict(num_gpus=1),
+        fastchat_template='vicuna',
     )
 ]
diff --git a/configs/models/vicuna/hf_vicuna_7b_v15_16k.py b/configs/models/vicuna/hf_vicuna_7b_v15_16k.py
index e8ad47df0..c8b557ab5 100644
--- a/configs/models/vicuna/hf_vicuna_7b_v15_16k.py
+++ b/configs/models/vicuna/hf_vicuna_7b_v15_16k.py
@@ -1,30 +1,13 @@
-from opencompass.models import HuggingFaceCausalLM
-
-_meta_template = dict(
-    round=[
-        dict(role="HUMAN", begin='USER: '),
-        dict(role="BOT", begin=" ASSISTANT:", end='</s>', generate=True),
-    ],
-)
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFacewithChatTemplate,
         abbr='vicuna-7b-v1.5-16k-hf',
-        path="lmsys/vicuna-7b-v1.5-16k",
-        tokenizer_path='lmsys/vicuna-7b-v1.5-16k',
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            use_fast=False,
-        ),
-        meta_template=_meta_template,
-        max_out_len=100,
-        max_seq_len=8192,
+        path='lmsys/vicuna-7b-v1.5-16k',
+        max_out_len=1024,
         batch_size=8,
-        model_kwargs=dict(device_map='auto'),
-        batch_padding=False, # if false, inference with for-loop without batch padding
-        run_cfg=dict(num_gpus=1, num_procs=1),
-        end_str='</s>',
+        run_cfg=dict(num_gpus=1),
+        fastchat_template='vicuna',
     )
 ]
diff --git a/configs/models/yi/hf_yi_34b.py b/configs/models/yi/hf_yi_34b.py
index 3f20f4167..7fc59d464 100644
--- a/configs/models/yi/hf_yi_34b.py
+++ b/configs/models/yi/hf_yi_34b.py
@@ -1,24 +1,12 @@
-from opencompass.models import HuggingFace
-
+from opencompass.models import HuggingFaceBaseModel
 
 models = [
     dict(
-        type=HuggingFace,
+        type=HuggingFaceBaseModel,
         abbr='yi-34b-hf',
         path='01-ai/Yi-34B',
-        tokenizer_path='01-ai/Yi-34B',
-        model_kwargs=dict(
-            trust_remote_code=True,
-            device_map='auto',
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-        ),
-        max_out_len=100,
-        max_seq_len=2048,
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=4, num_procs=1),
+        run_cfg=dict(num_gpus=2),
     )
 ]
diff --git a/configs/models/yi/hf_yi_34b_200k.py b/configs/models/yi/hf_yi_34b_200k.py
deleted file mode 100644
index a8e207bf8..000000000
--- a/configs/models/yi/hf_yi_34b_200k.py
+++ /dev/null
@@ -1,24 +0,0 @@
-from opencompass.models import HuggingFace
-
-
-models = [
-    dict(
-        type=HuggingFace,
-        abbr='yi-34b-200k-hf',
-        path='01-ai/Yi-34B-200K',
-        tokenizer_path='01-ai/Yi-34B-200K',
-        model_kwargs=dict(
-            trust_remote_code=True,
-            device_map='auto',
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-        ),
-        max_out_len=100,
-        max_seq_len=2048,
-        batch_size=8,
-        run_cfg=dict(num_gpus=4, num_procs=1),
-    )
-]
diff --git a/configs/models/yi/hf_yi_34b_chat.py b/configs/models/yi/hf_yi_34b_chat.py
index 352c58bfa..635ee3b8d 100644
--- a/configs/models/yi/hf_yi_34b_chat.py
+++ b/configs/models/yi/hf_yi_34b_chat.py
@@ -1,32 +1,12 @@
-from opencompass.models import HuggingFace
-
-_meta_template = dict(
-    round=[
-        dict(role="HUMAN", begin='<|im_start|>user\n', end='<|im_end|>\n'),
-        dict(role="BOT", begin="<|im_start|>assistant\n", end='<|im_end|>\n', generate=True),
-    ],
-)
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
     dict(
-        type=HuggingFace,
+        type=HuggingFacewithChatTemplate,
         abbr='yi-34b-chat-hf',
         path='01-ai/Yi-34B-Chat',
-        model_kwargs=dict(
-            trust_remote_code=True,
-            device_map='auto',
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-        ),
-        meta_template=_meta_template,
-        max_out_len=100,
-        max_seq_len=2048,
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=2, num_procs=1),
-        end_str='<|im_end|>',
-        batch_padding=True,
+        run_cfg=dict(num_gpus=2),
     )
 ]
diff --git a/configs/models/yi/hf_yi_6b.py b/configs/models/yi/hf_yi_6b.py
index c376d8680..6ce3b1347 100644
--- a/configs/models/yi/hf_yi_6b.py
+++ b/configs/models/yi/hf_yi_6b.py
@@ -1,24 +1,12 @@
-from opencompass.models import HuggingFace
-
+from opencompass.models import HuggingFaceBaseModel
 
 models = [
     dict(
-        type=HuggingFace,
+        type=HuggingFaceBaseModel,
         abbr='yi-6b-hf',
         path='01-ai/Yi-6B',
-        tokenizer_path='01-ai/Yi-6B',
-        model_kwargs=dict(
-            trust_remote_code=True,
-            device_map='auto',
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-        ),
-        max_out_len=100,
-        max_seq_len=2048,
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=1, num_procs=1),
+        run_cfg=dict(num_gpus=1),
     )
 ]
diff --git a/configs/models/yi/hf_yi_6b_200k.py b/configs/models/yi/hf_yi_6b_200k.py
deleted file mode 100644
index bc2cd1254..000000000
--- a/configs/models/yi/hf_yi_6b_200k.py
+++ /dev/null
@@ -1,23 +0,0 @@
-from opencompass.models import HuggingFace
-
-models = [
-    dict(
-        type=HuggingFace,
-        abbr='yi-6b-200k-hf',
-        path='01-ai/Yi-6B-200K',
-        tokenizer_path='01-ai/Yi-6B-200K',
-        model_kwargs=dict(
-            trust_remote_code=True,
-            device_map='auto',
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-        ),
-        max_out_len=100,
-        max_seq_len=2048,
-        batch_size=8,
-        run_cfg=dict(num_gpus=1, num_procs=1),
-    )
-]
diff --git a/configs/models/yi/hf_yi_6b_chat.py b/configs/models/yi/hf_yi_6b_chat.py
index 92a46e693..f04f11029 100644
--- a/configs/models/yi/hf_yi_6b_chat.py
+++ b/configs/models/yi/hf_yi_6b_chat.py
@@ -1,33 +1,12 @@
-from opencompass.models import HuggingFace
-
-_meta_template = dict(
-    round=[
-        dict(role="HUMAN", begin='<|im_start|>user\n', end='<|im_end|>\n'),
-        dict(role="BOT", begin="<|im_start|>assistant\n", end='<|im_end|>\n', generate=True),
-    ],
-)
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
     dict(
-        type=HuggingFace,
+        type=HuggingFacewithChatTemplate,
         abbr='yi-6b-chat-hf',
         path='01-ai/Yi-6B-Chat',
-        tokenizer_path='01-ai/Yi-6B-Chat',
-        model_kwargs=dict(
-            trust_remote_code=True,
-            device_map='auto',
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-        ),
-        meta_template=_meta_template,
-        max_out_len=100,
-        max_seq_len=2048,
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=1, num_procs=1),
-        end_str='<|im_end|>',
-        batch_padding=True,
+        run_cfg=dict(num_gpus=1),
     )
 ]
diff --git a/configs/models/zephyr/hf_zephyr_7b_beta.py b/configs/models/zephyr/hf_zephyr_7b_beta.py
index 916ebe2cc..da58c31ee 100644
--- a/configs/models/zephyr/hf_zephyr_7b_beta.py
+++ b/configs/models/zephyr/hf_zephyr_7b_beta.py
@@ -1,32 +1,12 @@
-from opencompass.models import HuggingFace
-
-_meta_template = dict(
-    round=[
-        dict(role="HUMAN", begin='<|user|>\n', end='</s>'),
-        dict(role="BOT", begin="<|assistant|>\n", end='</s>', generate=True),
-    ],
-)
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
     dict(
-        type=HuggingFace,
+        type=HuggingFacewithChatTemplate,
         abbr='zephyr-7b-beta-hf',
         path='HuggingFaceH4/zephyr-7b-beta',
-        tokenizer_path='HuggingFaceH4/zephyr-7b-beta',
-        model_kwargs=dict(
-            trust_remote_code=True,
-            device_map='auto',
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-        ),
-        meta_template=_meta_template,
-        max_out_len=100,
-        max_seq_len=2048,
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=1, num_procs=1),
-        end_str='</s>',
+        run_cfg=dict(num_gpus=1),
     )
 ]
diff --git a/configs/summarizers/chat_OC15.py b/configs/summarizers/chat_OC15.py
new file mode 100644
index 000000000..6379a1157
--- /dev/null
+++ b/configs/summarizers/chat_OC15.py
@@ -0,0 +1,81 @@
+from mmengine.config import read_base
+
+with read_base():
+    from .groups.mmlu import mmlu_summary_groups
+    from .groups.cmmlu import cmmlu_summary_groups
+    from .groups.ceval import ceval_summary_groups
+    from .groups.bbh import bbh_summary_groups
+    from .groups.GaokaoBench import GaokaoBench_summary_groups
+    from .groups.lcbench import lcbench_summary_groups
+
+other_summary_groups = [
+    {
+        'name': 'average',
+        'subsets': [
+            ['mmlu', 'naive_average'],
+            ['cmmlu', 'naive_average'],
+            ['ceval', 'naive_average'],
+            ['GaokaoBench', 'weighted_average'],
+            ['triviaqa_wiki_1shot', 'score'],
+            ['nq_open_1shot', 'score'],
+            ['race-high', 'accuracy'],
+            ['winogrande', 'accuracy'],
+            ['hellaswag', 'accuracy'],
+            ['bbh', 'naive_average'],
+            ['gsm8k', 'accuracy'],
+            ['math', 'accuracy'],
+            ['TheoremQA', 'score'],
+            ['openai_humaneval', 'humaneval_pass@1'],
+            ['sanitized_mbpp', 'score'],
+            ['GPQA_diamond', 'accuracy'],
+            ['IFEval', 'Prompt-level-strict-accuracy'],
+        ],
+    },
+]
+
+summarizer = dict(
+    dataset_abbrs=[
+        ['average', 'naive_average'],
+        ['mmlu', 'naive_average'],
+        ['cmmlu', 'naive_average'],
+        ['ceval', 'naive_average'],
+        ['GaokaoBench', 'weighted_average'],
+        ['triviaqa_wiki_1shot', 'score'],
+        ['nq_open_1shot', 'score'],
+        ['race-high', 'accuracy'],
+        ['winogrande', 'accuracy'],
+        ['hellaswag', 'accuracy'],
+        ['bbh', 'naive_average'],
+        ['gsm8k', 'accuracy'],
+        ['math', 'accuracy'],
+        ['TheoremQA', 'score'],
+        ['openai_humaneval', 'humaneval_pass@1'],
+        ['sanitized_mbpp', 'score'],
+        ['GPQA_diamond', 'accuracy'],
+        ['IFEval', 'Prompt-level-strict-accuracy'],
+
+        '',
+
+        'mmlu',
+        'mmlu-stem',
+        'mmlu-social-science',
+        'mmlu-humanities',
+        'mmlu-other',
+
+        'cmmlu',
+        'cmmlu-stem',
+        'cmmlu-social-science',
+        'cmmlu-humanities',
+        'cmmlu-other',
+        'cmmlu-china-specific',
+
+        'ceval',
+        'ceval-stem',
+        'ceval-social-science',
+        'ceval-humanities',
+        'ceval-other',
+        'ceval-hard',
+    ],
+    summary_groups=sum(
+        [v for k, v in locals().items() if k.endswith("_summary_groups")], []),
+)
diff --git a/configs/summarizers/chat_OC15_multi_faceted.py b/configs/summarizers/chat_OC15_multi_faceted.py
new file mode 100644
index 000000000..badeac518
--- /dev/null
+++ b/configs/summarizers/chat_OC15_multi_faceted.py
@@ -0,0 +1,130 @@
+from mmengine.config import read_base
+from opencompass.summarizers import MultiFacetedSummarizer
+
+with read_base():
+    from .groups.mmlu import mmlu_summary_groups
+    from .groups.cmmlu import cmmlu_summary_groups
+    from .groups.ceval import ceval_summary_groups
+    from .groups.bbh import bbh_summary_groups
+    from .groups.GaokaoBench import GaokaoBench_summary_groups
+
+other_summary_groups = [
+    {
+        'name': 'average',
+        'subsets': [
+            ['mmlu', 'naive_average'],
+            ['cmmlu', 'naive_average'],
+            ['ceval', 'naive_average'],
+            ['GaokaoBench', 'weighted_average'],
+            ['triviaqa_wiki_1shot', 'score'],
+            ['nq_open_1shot', 'score'],
+            ['race-high', 'accuracy'],
+            ['winogrande', 'accuracy'],
+            ['hellaswag', 'accuracy'],
+            ['bbh', 'naive_average'],
+            ['gsm8k', 'accuracy'],
+            ['math', 'accuracy'],
+            ['TheoremQA', 'score'],
+            ['openai_humaneval', 'humaneval_pass@1'],
+            ['sanitized_mbpp', 'score'],
+            ['GPQA_diamond', 'accuracy'],
+            ['IFEval', 'Prompt-level-strict-accuracy'],
+        ],
+    },
+]
+
+overall_dataset_abbrs = [
+    ['average', 'naive_average'],
+    ['mmlu', 'naive_average'],
+    ['cmmlu', 'naive_average'],
+    ['ceval', 'naive_average'],
+    ['GaokaoBench', 'weighted_average'],
+    ['triviaqa_wiki_1shot', 'score'],
+    ['nq_open_1shot', 'score'],
+    ['race-high', 'accuracy'],
+    ['winogrande', 'accuracy'],
+    ['hellaswag', 'accuracy'],
+    ['bbh', 'naive_average'],
+    ['gsm8k', 'accuracy'],
+    ['math', 'accuracy'],
+    ['TheoremQA', 'score'],
+    ['openai_humaneval', 'humaneval_pass@1'],
+    ['sanitized_mbpp', 'score'],
+    ['GPQA_diamond', 'accuracy'],
+    ['IFEval', 'Prompt-level-strict-accuracy'],
+]
+
+mmlu_summary_groups_dict = {g['name']: g['subsets'] for g in mmlu_summary_groups}
+mmlu_dataset_abbrs = [
+    ['mmlu', 'naive_average'],
+    ['mmlu-stem', 'naive_average'],
+    ['mmlu-social-science', 'naive_average'],
+    ['mmlu-humanities', 'naive_average'],
+    ['mmlu-other', 'naive_average'],
+    *mmlu_summary_groups_dict['mmlu-stem'],
+    *mmlu_summary_groups_dict['mmlu-social-science'],
+    *mmlu_summary_groups_dict['mmlu-humanities'],
+    *mmlu_summary_groups_dict['mmlu-other'],
+]
+
+cmmlu_summary_groups_dict = {g['name']: g['subsets'] for g in cmmlu_summary_groups}
+cmmlu_dataset_abbrs = [
+    ['cmmlu', 'naive_average'],
+    ['cmmlu-stem', 'naive_average'],
+    ['cmmlu-social-science', 'naive_average'],
+    ['cmmlu-humanities', 'naive_average'],
+    ['cmmlu-other', 'naive_average'],
+    ['cmmlu-china-specific', 'naive_average'],
+    *cmmlu_summary_groups_dict['cmmlu-stem'],
+    *cmmlu_summary_groups_dict['cmmlu-social-science'],
+    *cmmlu_summary_groups_dict['cmmlu-humanities'],
+    *cmmlu_summary_groups_dict['cmmlu-other'],
+]
+
+ceval_summary_groups_dict = {g['name']: g['subsets'] for g in ceval_summary_groups}
+ceval_dataset_abbrs = [
+    ['ceval', 'naive_average'],
+    ['ceval-stem', 'naive_average'],
+    ['ceval-social-science', 'naive_average'],
+    ['ceval-humanities', 'naive_average'],
+    ['ceval-other', 'naive_average'],
+    ['ceval-hard', 'naive_average'],
+    *ceval_summary_groups_dict['ceval-stem'],
+    *ceval_summary_groups_dict['ceval-social-science'],
+    *ceval_summary_groups_dict['ceval-humanities'],
+    *ceval_summary_groups_dict['ceval-other'],
+]
+
+bbh_summary_groups_dict = {g['name']: g['subsets'] for g in bbh_summary_groups}
+bbh_dataset_abbrs = [
+    ['bbh', 'naive_average'],
+    *bbh_summary_groups_dict['bbh'],
+]
+
+GaokaoBench_summary_groups_dict = {g['name']: g['subsets'] for g in GaokaoBench_summary_groups}
+GaokaoBench_dataset_abbrs = [
+    ['GaokaoBench', 'weighted_average'],
+    *GaokaoBench_summary_groups_dict['GaokaoBench'],
+]
+
+sanitized_mbpp_dataset_abbrs = [
+    ['sanitized_mbpp', 'score'],
+    ['sanitized_mbpp', 'pass'],
+    ['sanitized_mbpp', 'failed'],
+    ['sanitized_mbpp', 'wrong_answer'],
+    ['sanitized_mbpp', 'timeout'],
+]
+
+summarizer = dict(
+    type=MultiFacetedSummarizer,
+    dataset_abbrs_list=[
+        {'name': 'mmlu', 'dataset_abbrs': mmlu_dataset_abbrs},
+        {'name': 'cmmlu', 'dataset_abbrs': cmmlu_dataset_abbrs},
+        {'name': 'ceval', 'dataset_abbrs': ceval_dataset_abbrs},
+        {'name': 'bbh', 'dataset_abbrs': bbh_dataset_abbrs},
+        {'name': 'GaokaoBench', 'dataset_abbrs': GaokaoBench_dataset_abbrs},
+        {'name': 'sanitized_mbpp', 'dataset_abbrs': sanitized_mbpp_dataset_abbrs},
+        {'name': 'overall', 'dataset_abbrs': overall_dataset_abbrs},
+    ],
+    summary_groups=sum([v for k, v in locals().items() if k.endswith("_summary_groups")], []),
+)
diff --git a/docs/en/get_started/quick_start.md b/docs/en/get_started/quick_start.md
index d9e5bc071..caba742c9 100644
--- a/docs/en/get_started/quick_start.md
+++ b/docs/en/get_started/quick_start.md
@@ -80,13 +80,8 @@ For HuggingFace models, users can set model parameters directly through the comm
 
 ```bash
 python run.py --datasets siqa_gen winograd_ppl \
---hf-path facebook/opt-125m \
---model-kwargs device_map='auto' \
---tokenizer-kwargs padding_side='left' truncation='left' trust_remote_code=True \
---max-seq-len 2048 \
---max-out-len 100 \
---batch-size 128  \
---num-gpus 1  # Number of minimum required GPUs
+--hf-type base \
+--hf-path facebook/opt-125m
 ```
 
 Note that in this way, OpenCompass only evaluates one model at a time, while other ways can evaluate multiple models at once.
@@ -99,12 +94,14 @@ Note that in this way, OpenCompass only evaluates one model at a time, while oth
 :animate: fade-in-slide-down
 ```bash
 python run.py --datasets siqa_gen winograd_ppl \
+--hf-type base \  # HuggingFace model type, base or chat
 --hf-path facebook/opt-125m \  # HuggingFace model path
 --tokenizer-path facebook/opt-125m \  # HuggingFace tokenizer path (if the same as the model path, can be omitted)
 --tokenizer-kwargs padding_side='left' truncation='left' trust_remote_code=True \  # Arguments to construct the tokenizer
 --model-kwargs device_map='auto' \  # Arguments to construct the model
 --max-seq-len 2048 \  # Maximum sequence length the model can accept
 --max-out-len 100 \  # Maximum number of tokens to generate
+--min-out-len 100 \  # Minimum number of tokens to generate
 --batch-size 64  \  # Batch size
 --num-gpus 1  # Number of GPUs required to run the model
 ```
@@ -146,28 +143,22 @@ python run.py configs/eval_demo.py
 OpenCompass provides a series of pre-defined model configurations under `configs/models`. Below is the configuration snippet related to [opt-350m](https://github.com/open-compass/opencompass/blob/main/configs/models/opt/hf_opt_350m.py) (`configs/models/opt/hf_opt_350m.py`):
 
 ```python
-# Evaluate models supported by HuggingFace's `AutoModelForCausalLM` using `HuggingFaceCausalLM`
-from opencompass.models import HuggingFaceCausalLM
-
-# OPT-350M
-opt350m = dict(
-       type=HuggingFaceCausalLM,
-       # Initialization parameters for `HuggingFaceCausalLM`
-       path='facebook/opt-350m',
-       tokenizer_path='facebook/opt-350m',
-       tokenizer_kwargs=dict(
-           padding_side='left',
-           truncation_side='left',
-           proxies=None,
-           trust_remote_code=True),
-       model_kwargs=dict(device_map='auto'),
-       # Below are common parameters for all models, not specific to HuggingFaceCausalLM
-       abbr='opt350m',               # Model abbreviation for result display
-       max_seq_len=2048,             # The maximum length of the entire sequence
-       max_out_len=100,              # Maximum number of generated tokens
-       batch_size=64,                # batchsize
-       run_cfg=dict(num_gpus=1),     # The required GPU numbers for this model
+# Evaluate models supported by HuggingFace's `AutoModelForCausalLM` using `HuggingFaceBaseModel`
+from opencompass.models import HuggingFaceBaseModel
+
+models = [
+    # OPT-350M
+    dict(
+        type=HuggingFaceBaseModel,
+        # Initialization parameters for `HuggingFaceBaseModel`
+        path='facebook/opt-350m',
+        # Below are common parameters for all models, not specific to HuggingFaceBaseModel
+        abbr='opt-350m-hf',         # Model abbreviation
+        max_out_len=1024,           # Maximum number of generated tokens
+        batch_size=32,              # Batch size
+        run_cfg=dict(num_gpus=1),   # The required GPU numbers for this model
     )
+]
 ```
 
 When using configurations, we can specify the relevant files through the command-line argument ` --models` or import the model configurations into the  `models` list in the configuration file using the inheritance mechanism.
diff --git a/docs/zh_cn/get_started/quick_start.md b/docs/zh_cn/get_started/quick_start.md
index e164349f3..2e092b160 100644
--- a/docs/zh_cn/get_started/quick_start.md
+++ b/docs/zh_cn/get_started/quick_start.md
@@ -79,13 +79,8 @@ python tools/list_configs.py llama mmlu
 
 ```bash
 python run.py --datasets siqa_gen winograd_ppl \
---hf-path facebook/opt-125m \
---model-kwargs device_map='auto' \
---tokenizer-kwargs padding_side='left' truncation='left' trust_remote_code=True \
---max-seq-len 2048 \
---max-out-len 100 \
---batch-size 128  \
---num-gpus 1  # 最少需要的 GPU 数量
+--hf-type base \
+--hf-path facebook/opt-125m
 ```
 
 请注意，通过这种方式，OpenCompass 一次只评估一个模型，而其他方式可以一次评估多个模型。
@@ -100,12 +95,14 @@ python run.py --datasets siqa_gen winograd_ppl \
 :animate: fade-in-slide-down
 ```bash
 python run.py --datasets siqa_gen winograd_ppl \
+--hf-type base \  # HuggingFace 模型类型, base 或 chat
 --hf-path facebook/opt-125m \  # HuggingFace 模型路径
 --tokenizer-path facebook/opt-125m \  # HuggingFace tokenizer 路径（如果与模型路径相同，可以省略）
 --tokenizer-kwargs padding_side='left' truncation='left' trust_remote_code=True \  # 构建 tokenizer 的参数
 --model-kwargs device_map='auto' \  # 构建模型的参数
 --max-seq-len 2048 \  # 模型可以接受的最大序列长度
 --max-out-len 100 \  # 生成的最大 token 数
+--min-out-len 100 \  # 生成的最小 token 数
 --batch-size 64  \  # 批量大小
 --num-gpus 1  # 运行模型所需的 GPU 数量
 ```
@@ -147,28 +144,22 @@ python run.py configs/eval_demo.py
 OpenCompass 提供了一系列预定义的模型配置，位于 `configs/models` 下。以下是与 [opt-350m](https://github.com/open-compass/opencompass/blob/main/configs/models/opt/hf_opt_350m.py)（`configs/models/opt/hf_opt_350m.py`）相关的配置片段：
 
 ```python
-# 使用 `HuggingFaceCausalLM` 评估由 HuggingFace 的 `AutoModelForCausalLM` 支持的模型
-from opencompass.models import HuggingFaceCausalLM
-
-# OPT-350M
-opt350m = dict(
-       type=HuggingFaceCausalLM,
-       # `HuggingFaceCausalLM` 的初始化参数
-       path='facebook/opt-350m',
-       tokenizer_path='facebook/opt-350m',
-       tokenizer_kwargs=dict(
-           padding_side='left',
-           truncation_side='left',
-           proxies=None,
-           trust_remote_code=True),
-       model_kwargs=dict(device_map='auto'),
-       # 下面是所有模型的共同参数，不特定于 HuggingFaceCausalLM
-       abbr='opt350m',               # 结果显示的模型缩写
-       max_seq_len=2048,             # 整个序列的最大长度
-       max_out_len=100,              # 生成的最大 token 数
-       batch_size=64,                # 批量大小
-       run_cfg=dict(num_gpus=1),     # 该模型所需的 GPU 数量
+# 使用 `HuggingFaceBaseModel` 评估由 HuggingFace 的 `AutoModelForCausalLM` 支持的模型
+from opencompass.models import HuggingFaceBaseModel
+
+models = [
+    # OPT-350M
+    dict(
+        type=HuggingFaceBaseModel,
+        # `HuggingFaceBaseModel` 的初始化参数
+        path='facebook/opt-350m',
+        # 下面是所有模型的共同参数，不特定于 HuggingFaceBaseModel
+        abbr='opt-350m-hf',         # 模型的缩写
+        max_out_len=1024,           # 生成的最大 token 数
+        batch_size=32,              # 批量大小
+        run_cfg=dict(num_gpus=1),   # 该模型所需的 GPU 数量
     )
+]
 ```
 
 使用配置时，我们可以通过命令行参数 `--models` 指定相关文件，或使用继承机制将模型配置导入到配置文件中的 `models` 列表中。
diff --git a/opencompass/cli/main.py b/opencompass/cli/main.py
index 2c06b9f3d..e9b5abb39 100644
--- a/opencompass/cli/main.py
+++ b/opencompass/cli/main.py
@@ -1,3 +1,5 @@
+# flake8: noqa
+# yapf: disable
 import argparse
 import getpass
 import os
@@ -51,7 +53,7 @@ def parse_args():
                         action='store_true',
                         default=False)
     parser.add_argument(
-        '--accelerator',
+        '-a', '--accelerator',
         help='Infer accelerator, support vllm and lmdeploy now.',
         choices=['vllm', 'lmdeploy', 'hf'],
         default='hf',
@@ -81,7 +83,7 @@ def parse_args():
                         'saved in this path, including the slurm logs, '
                         'the evaluation results, the summary results, etc.'
                         'If not specified, the work_dir will be set to '
-                        './outputs/default.',
+                        'outputs/default.',
                         default=None,
                         type=str)
     parser.add_argument(
@@ -95,23 +97,12 @@ def parse_args():
                         help='Report the running status to lark bot',
                         action='store_true',
                         default=False)
-    parser.add_argument('--max-partition-size',
-                        help='The maximum size of an infer task. Only '
-                        'effective when "infer" is missing from the config.',
-                        type=int,
-                        default=40000),
-    parser.add_argument(
-        '--gen-task-coef',
-        help='The dataset cost measurement coefficient for generation tasks, '
-        'Only effective when "infer" is missing from the config.',
-        type=int,
-        default=20)
     parser.add_argument('--max-num-workers',
                         help='Max number of workers to run in parallel. '
                         'Will be overrideen by the "max_num_workers" argument '
                         'in the config.',
                         type=int,
-                        default=32)
+                        default=1)
     parser.add_argument('--max-workers-per-gpu',
                         help='Max task to run in parallel on one GPU. '
                         'It will only be used in the local runner.',
@@ -181,25 +172,21 @@ def parse_dlc_args(dlc_parser):
 
 def parse_hf_args(hf_parser):
     """These args are all for the quick construction of HuggingFace models."""
-    hf_parser.add_argument('--hf-path', type=str)
-    hf_parser.add_argument('--peft-path', type=str)
-    hf_parser.add_argument('--tokenizer-path', type=str)
-    hf_parser.add_argument('--model-kwargs',
-                           nargs='+',
-                           action=DictAction,
-                           default={})
-    hf_parser.add_argument('--tokenizer-kwargs',
-                           nargs='+',
-                           action=DictAction,
-                           default={})
-    hf_parser.add_argument('--max-out-len', type=int)
-    hf_parser.add_argument('--max-seq-len', type=int)
-    hf_parser.add_argument('--no-batch-padding',
-                           action='store_true',
-                           default=False)
-    hf_parser.add_argument('--batch-size', type=int)
-    hf_parser.add_argument('--num-gpus', type=int)
-    hf_parser.add_argument('--pad-token-id', type=int)
+    hf_parser.add_argument('--hf-type', type=str, choices=['base', 'chat'], default='chat', help='The type of the HuggingFace model, base or chat')
+    hf_parser.add_argument('--hf-path', type=str, help='The path to the HuggingFace model, e.g. "facebook/opt-125m", required')
+    hf_parser.add_argument('--model-kwargs', nargs='+', action=DictAction, default={}, help='The kwargs for the HuggingFace model')
+    hf_parser.add_argument('--tokenizer-path', type=str, help='The path to the HuggingFace tokenizer, same as --hf-path if not specified')
+    hf_parser.add_argument('--tokenizer-kwargs', nargs='+', action=DictAction, default={}, help='The kwargs for the tokenizer')
+    hf_parser.add_argument('--peft-path', type=str, help='The path to the PEFT model')
+    hf_parser.add_argument('--peft-kwargs', nargs='+', action=DictAction, default={}, help='The kwargs for the PEFT model')
+    hf_parser.add_argument('--generation-kwargs', nargs='+', action=DictAction, default={}, help='The kwargs for the generation')
+    hf_parser.add_argument('--max-seq-len', type=int, help='The max sequence length for the HuggingFace model')
+    hf_parser.add_argument('--max-out-len', type=int, default=256, help='The max output length for the HuggingFace model')
+    hf_parser.add_argument('--min-out-len', type=int, default=1, help='The min output length for the HuggingFace model')
+    hf_parser.add_argument('--batch-size', type=int, default=8, help='The batch size for the HuggingFace model')
+    hf_parser.add_argument('--num-gpus', type=int, default=1, help='The number of GPUs for **the HuggingFace model passed via cli**')
+    hf_parser.add_argument('--pad-token-id', type=int, help='The pad token id for the HuggingFace model')
+    hf_parser.add_argument('--stop-words', nargs='+', default=[], help='The stop words for the HuggingFace model')
 
 
 def parse_custom_dataset_args(custom_dataset_parser):
@@ -225,7 +212,7 @@ def main():
     if args.work_dir is not None:
         cfg['work_dir'] = args.work_dir
     else:
-        cfg.setdefault('work_dir', './outputs/default/')
+        cfg.setdefault('work_dir', osp.join('outputs', 'default'))
 
     # cfg_time_str defaults to the current time
     cfg_time_str = dir_time_str = datetime.now().strftime('%Y%m%d_%H%M%S')
diff --git a/opencompass/datasets/winogrande.py b/opencompass/datasets/winogrande.py
index 0e897ee5f..8ea8d27bd 100644
--- a/opencompass/datasets/winogrande.py
+++ b/opencompass/datasets/winogrande.py
@@ -22,6 +22,9 @@ def load(path):
                 prompt = line['sentence']
                 continue_prompt = prompt.split('_')[1]
                 data_item = {
+                    'prompt': prompt,
+                    'only_option1': line['option1'],
+                    'only_option2': line['option2'],
                     'opt1': prompt.replace('_', line['option1']),
                     'opt2': prompt.replace('_', line['option2']),
                     'answer': line['answer'],
@@ -48,6 +51,9 @@ def load(path):
                 answer = line['answer']
                 answer = ' AB'[int(answer)] if answer != '' else 'NULL'
                 data_item = {
+                    'prompt': prompt,
+                    'only_option1': line['option1'],
+                    'only_option2': line['option2'],
                     'opt1': prompt.replace('_', line['option1']),
                     'opt2': prompt.replace('_', line['option2']),
                     'answer': answer,
@@ -76,6 +82,9 @@ def load(path):
                     answer = line['answer']
                     answer = ' AB'[int(answer)] if answer != '' else 'NULL'
                     data_item = {
+                        'prompt': prompt,
+                        'only_option1': line['option1'],
+                        'only_option2': line['option2'],
                         'opt1': prompt.replace('_', line['option1']),
                         'opt2': prompt.replace('_', line['option2']),
                         'answer': answer,
diff --git a/opencompass/models/__init__.py b/opencompass/models/__init__.py
index 273ae3678..fab0824b1 100644
--- a/opencompass/models/__init__.py
+++ b/opencompass/models/__init__.py
@@ -3,26 +3,28 @@
 from .alaya import AlayaLM  # noqa: F401
 from .baichuan_api import BaiChuan, BaiChuan3  # noqa: F401
 from .baidu_api import ERNIEBot  # noqa: F401
-from .base import BaseModel, LMTemplateParser  # noqa
-from .base_api import APITemplateParser, BaseAPIModel  # noqa
+from .base import BaseModel, LMTemplateParser  # noqa: F401
+from .base_api import APITemplateParser, BaseAPIModel  # noqa: F401
 from .bytedance_api import ByteDance  # noqa: F401
 from .claude_api import Claude  # noqa: F401
-from .gemini_api import Gemini, GeminiAllesAPIN  # noqa: F401, F403
-from .glm import GLM130B  # noqa: F401, F403
-from .huggingface import HuggingFace  # noqa: F401, F403
-from .huggingface import HuggingFaceCausalLM  # noqa: F401, F403
-from .huggingface import HuggingFaceChatGLM3  # noqa: F401, F403
+from .gemini_api import Gemini, GeminiAllesAPIN  # noqa: F401
+from .glm import GLM130B  # noqa: F401
+from .huggingface import HuggingFace  # noqa: F401
+from .huggingface import HuggingFaceCausalLM  # noqa: F401
+from .huggingface import HuggingFaceChatGLM3  # noqa: F401
+from .huggingface_above_v4_33 import HuggingFaceBaseModel  # noqa: F401
+from .huggingface_above_v4_33 import HuggingFacewithChatTemplate  # noqa: F401
 from .hunyuan_api import Hunyuan  # noqa: F401
-from .intern_model import InternLM  # noqa: F401, F403
+from .intern_model import InternLM  # noqa: F401
 from .krgpt_api import KrGPT  # noqa: F401
 from .lightllm_api import LightllmAPI  # noqa: F401
-from .llama2 import Llama2, Llama2Chat  # noqa: F401, F403
+from .llama2 import Llama2, Llama2Chat  # noqa: F401
 from .lmdeploy_pytorch import LmdeployPytorchModel  # noqa: F401
 from .lmdeploy_tis import LmdeployTisModel  # noqa: F401
 from .minimax_api import MiniMax  # noqa: F401
 from .mistral_api import Mistral  # noqa: F401
 from .mixtral import Mixtral  # noqa: F401
-from .modelscope import ModelScope, ModelScopeCausalLM  # noqa: F401, F403
+from .modelscope import ModelScope, ModelScopeCausalLM  # noqa: F401
 from .moonshot_api import MoonShot  # noqa: F401
 from .nanbeige_api import Nanbeige  # noqa: F401
 from .openai_api import OpenAI  # noqa: F401
diff --git a/opencompass/models/huggingface_above_v4_33.py b/opencompass/models/huggingface_above_v4_33.py
new file mode 100644
index 000000000..f7ce622b7
--- /dev/null
+++ b/opencompass/models/huggingface_above_v4_33.py
@@ -0,0 +1,414 @@
+# flake8: noqa
+# yapf: disable
+from typing import Dict, List, Optional, Union
+
+from opencompass.models.base import BaseModel, LMTemplateParser
+from opencompass.models.base_api import APITemplateParser
+from opencompass.registry import MODELS
+from opencompass.utils.logging import get_logger
+from opencompass.utils.prompt import PromptList
+
+PromptType = Union[PromptList, str]
+
+
+def _get_stopping_criteria(stop_words, tokenizer, batch_size):
+    from transformers import (PreTrainedTokenizer, StoppingCriteria,
+                              StoppingCriteriaList)
+
+    class MultiTokenEOSCriteria(StoppingCriteria):
+        """Criteria to stop on the specified multi-token sequence."""
+
+        def __init__(self, sequence: str, tokenizer: PreTrainedTokenizer, batch_size: int):
+            self.done_tracker = [False] * batch_size
+            self.sequence = sequence
+            self.sequence_ids = tokenizer.encode(sequence, add_special_tokens=False)
+            self.sequence_id_len = len(self.sequence_ids)
+            self.tokenizer = tokenizer
+
+        def __call__(self, input_ids, scores, **kwargs) -> bool:
+            # compare the last len(stop) tokens
+            lookback_ids_batch = input_ids[:, -self.sequence_id_len:]
+            lookback_tokens_batch = self.tokenizer.batch_decode(lookback_ids_batch)
+            for i, done in enumerate(self.done_tracker):
+                if done:
+                    continue
+                self.done_tracker[i] = self.sequence in lookback_tokens_batch[i]
+            return False not in self.done_tracker
+
+    criteria = []
+    for stop_word in stop_words:
+        c = MultiTokenEOSCriteria(stop_word, tokenizer, batch_size)
+        criteria.append(c)
+    criteria = StoppingCriteriaList(criteria)
+    return criteria
+
+def _get_possible_max_seq_len(max_seq_len, path):
+    if max_seq_len is not None:
+        return max_seq_len
+
+    from transformers import AutoConfig
+    config = AutoConfig.from_pretrained(path, trust_remote_code=True)
+    possible_keys = [
+        'max_position_embeddings',
+        'seq_length',
+        'model_max_length',
+    ]
+    for k in possible_keys:
+        if hasattr(config, k):
+            return getattr(config, k)
+    raise ValueError('max_seq_len is not provided and cannot be inferred from the model config.')
+
+
+def _convert_chat_messages(inputs):
+    outputs = []
+    for _input in inputs:
+        messages = []
+        if isinstance(_input, str):
+            messages.append({'role': 'HUMAN', 'prompt': _input})
+        else:
+            for item in _input:
+                role = {
+                    'HUMAN': 'user',
+                    'BOT': 'assistant',
+                    'SYSTEM': 'system',
+                }[item['role']]
+                messages.append({'role': role, 'content': item['prompt']})
+        outputs.append(messages)
+    return outputs
+
+
+def _format_with_fast_chat_template(inputs: List[str], name: str='vicuna'):
+    try:
+        from fastchat.model import get_conversation_template
+    except ImportError:
+        raise ModuleNotFoundError('fastchat not found. Please install with\npip install "fschat[model_worker,webui]"')
+
+    outputs = []
+    for _input in inputs:
+        template = get_conversation_template(name)
+        for item in _input:
+            if item['role'] == 'user':
+                template.append_message(template.roles[0], item['content'])
+            elif item['role'] == 'assistant':
+                template.append_message(template.roles[1], item['content'])
+            elif item['role'] == 'system':
+                continue
+            else:
+                raise ValueError(f'Unknown role {item["role"]}')
+        template.append_message(template.roles[1], None)
+        outputs.append(template.get_prompt())
+    return outputs
+
+
+def _get_meta_template(meta_template):
+    default_meta_template = dict(
+        round=[
+            dict(role='HUMAN', api_role='HUMAN'),
+            dict(role='BOT', api_role='BOT', generate=True),
+        ]
+    )
+    return APITemplateParser(meta_template or default_meta_template)
+
+
+def _set_model_kwargs_torch_dtype(model_kwargs):
+    import torch
+    if 'torch_dtype' not in model_kwargs:
+        torch_dtype = torch.float16
+    else:
+        torch_dtype = {
+            'torch.float16': torch.float16,
+            'torch.bfloat16': torch.bfloat16,
+            'torch.float': torch.float,
+            'auto': 'auto',
+            'None': None,
+        }.get(model_kwargs['torch_dtype'])
+    if torch_dtype is not None:
+        model_kwargs['torch_dtype'] = torch_dtype
+    return model_kwargs
+
+
+@MODELS.register_module()
+class HuggingFacewithChatTemplate(BaseModel):
+
+    def __init__(self,
+                 path: str,
+                 model_kwargs: dict = dict(),
+                 tokenizer_path: Optional[str] = None,
+                 tokenizer_kwargs: dict = dict(),
+                 peft_path: Optional[str] = None,
+                 peft_kwargs: dict = dict(),
+                 tokenizer_only: bool = False,
+                 generation_kwargs: dict = dict(),
+                 max_seq_len: Optional[int] = None,
+                 meta_template: Optional[Dict] = None,
+                 pad_token_id: Optional[int] = None,
+                 fastchat_template: Optional[str] = None,
+                 stop_words: Optional[str] = [],
+                 **other_kwargs):
+
+        self.logger = get_logger()
+        self.path = path
+        self.tokenizer_only = tokenizer_only
+        self.template_parser = _get_meta_template(meta_template)
+        self.max_seq_len = _get_possible_max_seq_len(max_seq_len, path)
+        self._load_tokenizer(tokenizer_path or path, tokenizer_kwargs, pad_token_id)
+        if not tokenizer_only:
+            self._load_model(path=path, kwargs=model_kwargs, peft_path=peft_path, peft_kwargs=peft_kwargs)
+        self.generation_kwargs = generation_kwargs
+        self.fastchat_template = fastchat_template
+        self.stop_words = stop_words
+
+        for k, v in other_kwargs.items():
+            if v is not None:
+                self.logger.warning(f'Unused argument {k}={v}')
+
+    def _load_tokenizer(self, path: Optional[str], kwargs: dict, pad_token_id: Optional[int] = None):
+        from transformers import AutoTokenizer, GenerationConfig
+
+        DEFAULT_TOKENIZER_KWARGS = dict(padding_side='left', truncation_side='left', use_fast=False, trust_remote_code=True)
+        tokenizer_kwargs = DEFAULT_TOKENIZER_KWARGS
+        tokenizer_kwargs.update(kwargs)
+        self.tokenizer = AutoTokenizer.from_pretrained(path, **tokenizer_kwargs)
+
+        # A patch for some models without pad_token_id
+        if pad_token_id is not None:
+            if self.tokenizer.pad_token_id is None:
+                self.logger.debug(f'Using {pad_token_id} as pad_token_id')
+            elif self.tokenizer.pad_token_id != pad_token_id:
+                self.logger.warning(f'pad_token_id is not consistent. Using {pad_token_id} as pad_token_id')
+            self.tokenizer.pad_token_id = pad_token_id
+            return
+        if self.tokenizer.pad_token_id is not None:
+            return
+        self.logger.warning('pad_token_id is not set for the tokenizer.')
+        generation_config = GenerationConfig.from_pretrained(path)
+        if generation_config.pad_token_id is not None:
+            self.logger.warning(f'Using {generation_config.pad_token_id} as pad_token_id.')
+            self.tokenizer.pad_token_id = generation_config.pad_token_id
+            return
+        if self.tokenizer.eos_token_id is not None:
+            self.logger.warning(f'Using eos_token_id {self.tokenizer.eos_token_id} as pad_token_id.')
+            self.tokenizer.pad_token_id = self.tokenizer.eos_token_id
+            return
+        raise ValueError('pad_token_id is not set for this tokenizer. Please set `pad_token_id={PAD_TOKEN_ID}` in model_cfg.')
+
+    def _load_model(self, path: str, kwargs: dict, peft_path: Optional[str] = None, peft_kwargs: dict = dict()):
+        from transformers import AutoModel, AutoModelForCausalLM
+
+        DEFAULT_MODEL_KWARGS = dict(device_map='auto', trust_remote_code=True)
+        model_kwargs = DEFAULT_MODEL_KWARGS
+        model_kwargs.update(kwargs)
+        model_kwargs = _set_model_kwargs_torch_dtype(model_kwargs)
+
+        try:
+            self.model = AutoModelForCausalLM.from_pretrained(path, **model_kwargs)
+        except ValueError:
+            self.model = AutoModel.from_pretrained(path, **model_kwargs)
+
+        if peft_path is not None:
+            from peft import PeftModel
+            peft_kwargs['is_trainable'] = False
+            self.model = PeftModel.from_pretrained(self.model, peft_path, **peft_kwargs)
+
+        self.model.eval()
+        self.model.generation_config.do_sample = False
+
+    def generate(self,
+                 inputs: List[str],
+                 max_out_len: int,
+                 min_out_len: Optional[int] = None,
+                 stopping_criteria: List[str] = [],
+                 **kwargs) -> List[str]:
+        messages = _convert_chat_messages(inputs)
+        batch_size = len(messages)
+
+        tokenize_kwargs = dict(
+            return_tensors='pt',
+            padding=True,
+            truncation=True,
+            add_special_tokens=True,
+            max_length=self.max_seq_len
+        )
+        if self.fastchat_template:
+            messages = _format_with_fast_chat_template(messages, self.fastchat_template)
+            tokens = self.tokenizer.batch_encode_plus(messages, **tokenize_kwargs)
+        else:
+            messages = [self.tokenizer.apply_chat_template(m, add_generation_prompt=True, tokenize=False) for m in messages]
+            tokenize_kwargs['add_special_tokens'] = False
+            tokens = self.tokenizer.batch_encode_plus(messages, **tokenize_kwargs)
+
+        tokens = {k: v.to(self.model.device) for k, v in tokens.items()}
+
+        generation_kwargs = self.generation_kwargs.copy()
+        generation_kwargs.update(kwargs)
+        stopping_criteria = list(set(stopping_criteria + self.stop_words))
+        if stopping_criteria:
+            generation_kwargs['stopping_criteria'] = _get_stopping_criteria(stopping_criteria, self.tokenizer, batch_size)
+        if max_out_len is not None:
+            generation_kwargs['max_new_tokens'] = max_out_len
+        if min_out_len is not None:
+            generation_kwargs['min_new_tokens'] = min_out_len
+        generation_kwargs['pad_token_id'] = self.tokenizer.pad_token_id
+
+        # step-2: conduct model forward to generate output
+        outputs = self.model.generate(**tokens, **generation_kwargs)
+        outputs = outputs[:, tokens['input_ids'].shape[1]:]
+
+        # step-3: decode the output
+        decodeds = self.tokenizer.batch_decode(outputs)
+        for stop in stopping_criteria:
+            decodeds = [t.split(stop)[0] for t in decodeds]
+
+        return decodeds
+
+    def get_token_len(self, prompt: str) -> int:
+        m = _convert_chat_messages([prompt])[0]
+        t = self.tokenizer.apply_chat_template(m, add_generation_prompt=True, return_dict=True)
+        return len(t['input_ids'])
+
+def  _convert_base_messages(inputs):
+    outputs = []
+    for _input in inputs:
+        if isinstance(_input, str):
+            outputs.append(_input)
+        else:
+            messages = []
+            for item in _input:
+                messages.append(item['prompt'])
+            outputs.append(''.join(messages))
+    return outputs
+
+
+class HuggingFaceBaseModel(HuggingFacewithChatTemplate):
+
+    def __init__(self,
+                 path: str,
+                 model_kwargs: dict = dict(),
+                 tokenizer_path: Optional[str] = None,
+                 tokenizer_kwargs: dict = dict(),
+                 peft_path: Optional[str] = None,
+                 peft_kwargs: dict = dict(),
+                 tokenizer_only: bool = False,
+                 generation_kwargs: dict = dict(),
+                 max_seq_len: Optional[int] = None,
+                 pad_token_id: Optional[int] = None,
+                 stop_words: Optional[str] = [],
+                 **other_kwargs):
+
+        self.logger = get_logger()
+        self.path = path
+        self.tokenizer_only = tokenizer_only
+        self.template_parser = LMTemplateParser()
+        self.max_seq_len = _get_possible_max_seq_len(max_seq_len, path)
+        self._load_tokenizer(tokenizer_path or path, tokenizer_kwargs, pad_token_id)
+        if not tokenizer_only:
+            self._load_model(path=path, kwargs=model_kwargs, peft_path=peft_path, peft_kwargs=peft_kwargs)
+        self.generation_kwargs = generation_kwargs
+        self.stop_words = stop_words
+
+        for k, v in other_kwargs.items():
+            if v is not None:
+                self.logger.warning(f'Unused argument {k}={v}')
+
+    def generate(self,
+                 inputs: List[str],
+                 max_out_len: int,
+                 min_out_len: Optional[int] = None,
+                 stopping_criteria: List[str] = [],
+                 **kwargs) -> List[str]:
+        messages = _convert_base_messages(inputs)
+        batch_size = len(messages)
+
+        tokenize_kwargs = dict(
+            return_tensors='pt',
+            padding=True,
+            truncation=True,
+            add_special_tokens=True,
+            max_length=self.max_seq_len
+        )
+        tokens = self.tokenizer.batch_encode_plus(messages, **tokenize_kwargs)
+        tokens = {k: v.to(self.model.device) for k, v in tokens.items()}
+
+        generation_kwargs = self.generation_kwargs.copy()
+        generation_kwargs.update(kwargs)
+        stopping_criteria = list(set(stopping_criteria + self.stop_words))
+        if stopping_criteria:
+            generation_kwargs['stopping_criteria'] = _get_stopping_criteria(stopping_criteria, self.tokenizer, batch_size)
+        if max_out_len is not None:
+            generation_kwargs['max_new_tokens'] = max_out_len
+        if min_out_len is not None:
+            generation_kwargs['min_new_tokens'] = min_out_len
+        generation_kwargs['pad_token_id'] = self.tokenizer.pad_token_id
+
+        # step-2: conduct model forward to generate output
+        outputs = self.model.generate(**tokens, **generation_kwargs)
+        outputs = outputs[:, tokens['input_ids'].shape[1]:]
+
+        # step-3: decode the output
+        decodeds = self.tokenizer.batch_decode(outputs, skip_special_tokens=True)
+        for stop in stopping_criteria:
+            decodeds = [token.split(stop)[0] for token in decodeds]
+
+        return decodeds
+
+    def get_ppl(self, inputs: List[str], mask_length: Optional[List[int]] = None) -> List[float]:
+        """Get perplexity scores given a list of inputs.
+
+        Args:
+            inputs (List[str]): A list of strings.
+            mask_length (Optional[List[int]]): A list of mask lengths. If
+                provided, the perplexity scores will be calculated with the
+                first mask_length[i] tokens masked out. It's okay to skip
+                its implementation if advanced features in PPLInfernecer is
+                not needed.
+
+        Returns:
+            List[float]: A list of perplexity scores.
+        """
+        assert self.tokenizer.pad_token
+        import torch
+        import torch.nn.functional as F
+        pad_token_id = self.tokenizer.pad_token_id
+        messages = _convert_base_messages(inputs)
+
+        tokenize_kwargs = dict(
+            return_tensors='pt',
+            padding=True,
+            truncation=True,
+            add_special_tokens=True,
+            max_length=self.max_seq_len
+        )
+        tokens = self.tokenizer.batch_encode_plus(messages, **tokenize_kwargs)
+        tokens = {k: v.to(self.model.device) for k, v in tokens.items()}
+        outputs = self.model(**tokens)[0]
+
+        batch_size, seq_len, vocab_size = outputs.shape
+        shift_logits = outputs[:, :-1, :].contiguous().float()
+        shift_labels = tokens['input_ids'][:, 1:].contiguous()
+        loss = F.cross_entropy(
+            shift_logits.view(-1, vocab_size),
+            shift_labels.view(-1),
+            ignore_index=pad_token_id,
+            reduction='none').view(batch_size, seq_len - 1)
+        lens = (tokens['input_ids'] != pad_token_id).sum(-1).cpu().numpy()
+
+        if mask_length is not None:
+            import numpy as np
+            mask = torch.zeros_like(shift_labels)  # [batch,seqlen]
+            for i in range(len(mask)):
+                for j in range(mask_length[i] - 1, len(mask[i])):
+                    mask[i][j] = 1
+            loss = loss * mask
+            lens -= np.array(mask_length)
+
+        ce_loss = loss.float().sum(-1).cpu().detach().numpy() / lens
+        return ce_loss
+
+    def get_loglikelihood(self, inputs: List[str], conts:  List[str]) -> List[float]:
+        mask_length = [self.get_token_len(c, add_special_tokens=False) for c in conts]
+        return - self.get_ppl(inputs, mask_length)
+
+    def get_token_len(self, prompt: str, add_special_tokens: bool=True) -> int:
+        m = _convert_base_messages([prompt])[0]
+        t = self.tokenizer(m, add_special_tokens=add_special_tokens)
+        return len(t['input_ids'])
diff --git a/opencompass/models/turbomind.py b/opencompass/models/turbomind.py
index 9479f02f9..f64249bc2 100644
--- a/opencompass/models/turbomind.py
+++ b/opencompass/models/turbomind.py
@@ -37,9 +37,6 @@ class TurboMindModel(BaseModel):
             arguments like session_len, max_batch_size for TurboMind.
         gen_config (Dict, optional): Generation config to set
                 arguments like top_k, top_p, temperature.
-        end_str (str, optional): Whether to trim generated strings with end_str
-            if the model has special ending strings that are not handled well.
-            Defaults to None.
     """
 
     def __init__(self,
@@ -47,9 +44,8 @@ def __init__(self,
                  concurrency: int = 8,
                  max_seq_len: int = 2048,
                  meta_template: Optional[Dict] = None,
-                 engine_config: Optional[Dict] = None,
-                 gen_config: Optional[Dict] = None,
-                 end_str: Optional[str] = None):
+                 engine_config: Dict = {},
+                 gen_config: Dict = {}):
         super().__init__(path=path,
                          max_seq_len=max_seq_len,
                          meta_template=meta_template)
@@ -70,12 +66,14 @@ def __init__(self,
         ]
         self.generator_ids = [i + 1 for i in range(concurrency)]
         self.gen_config = gen_config
-        self.end_str = end_str
         self.major_version, self.minor_version, _ = version_info
 
     def generate(self,
                  inputs: List[str],
                  max_out_len: int = 512,
+                 stopping_criteria: List[str] = [],
+                 do_sample: Optional[bool] = None,
+                 temperature: int = 1,
                  **kwargs) -> List[str]:
         """Generate results given a list of inputs.
 
@@ -96,13 +94,21 @@ def generate(self,
         ]
 
         gen_config = copy.deepcopy(self.gen_config)
-        if 'do_sample' in kwargs:
-            if kwargs['do_sample']:
-                gen_config.top_k = 1000
-                gen_config.temperature = kwargs.get('temperature', 1)
+        if do_sample is not None:
+            if do_sample:
+                gen_config['top_k'] = 1000
+                gen_config['temperature'] = temperature
             else:
-                gen_config.top_k = 1
-                gen_config.temperature = 0.01
+                gen_config['top_k'] = 1
+        if stopping_criteria:
+            stop_words = gen_config.get('stop_words', [])
+            for t in stopping_criteria:
+                t = self.tokenizer.encode(t, add_bos=False)
+                stop_words.append(t[0])
+            gen_config['stop_words'] = list(set(stop_words))
+
+        from lmdeploy.messages import EngineGenerationConfig
+        gen_config = EngineGenerationConfig(**gen_config)
 
         results = []
         for batch_input in batch_inputs:
@@ -115,7 +121,6 @@ def generate(self,
                         batch_input,
                         [max_out_len] * len(batch_input),
                         [gen_config] * len(batch_input),
-                        [self.end_str] * len(batch_input),
                     ))
                 results += _results
         return results
@@ -136,8 +141,7 @@ def _generate(self,
                   session_id,
                   prompt: PromptType,
                   max_out_len: int,
-                  gen_config=None,
-                  end_str: Optional[str] = None) -> str:
+                  gen_config=None) -> str:
         """Generate results given a list of inputs.
 
         Args:
@@ -147,10 +151,6 @@ def _generate(self,
             max_out_len (int): The maximum length of the output.
             gen_config (EngineGenerationConfig, optional): Generation
                 config to set arguments like top_k, top_p, temperature.
-            end_str (str, optional): Whether to trim generated strings
-                with end_str if the model has special ending strings
-                that are not handled well.
-                Defaults to None.
         Returns:
             str: The generated string.
         """
@@ -173,9 +173,6 @@ def _generate(self,
                 _, output_ids, _ = outputs
             response = self.tokenizer.decode(output_ids)
             response = valid_str(response)
-        # used to trim
-        if end_str:
-            response = response.split(end_str)[0]
         return response
 
     def get_ppl(self,
diff --git a/opencompass/models/vllm.py b/opencompass/models/vllm.py
index 63da7b3ff..e204c0c41 100644
--- a/opencompass/models/vllm.py
+++ b/opencompass/models/vllm.py
@@ -25,7 +25,7 @@ def __init__(
         meta_template: Optional[Dict] = None,
         mode: str = 'none',
         use_fastchat_template: bool = False,
-        end_str: Optional[str] = None,
+        stop_words: List[str] = [],
     ):
         super().__init__(path=path,
                          max_seq_len=max_seq_len,
@@ -42,7 +42,7 @@ def __init__(
         assert mode in ['none', 'mid']
         self.mode = mode
         self.use_fastchat_template = use_fastchat_template
-        self.end_str = end_str
+        self.stop_words = stop_words
 
     def _load_model(self,
                     path: str,
@@ -59,7 +59,10 @@ def _load_model(self,
             ray.shutdown()
         self.model = LLM(path, **model_kwargs)
 
-    def generate(self, inputs: List[str], max_out_len: int,
+    def generate(self,
+                 inputs: List[str],
+                 max_out_len: int,
+                 stopping_criteria: List[str] = [],
                  **kwargs) -> List[str]:
         """Generate results given a list of inputs.
 
@@ -90,6 +93,8 @@ def generate(self, inputs: List[str], max_out_len: int,
         generation_kwargs = kwargs.copy()
         generation_kwargs.update(self.generation_kwargs)
         generation_kwargs.update({'max_tokens': max_out_len})
+        _stop = list(set(self.stop_words + stopping_criteria))
+        generation_kwargs.update({'stop': _stop})
         sampling_kwargs = SamplingParams(**generation_kwargs)
         outputs = self.model.generate(inputs, sampling_kwargs)
 
@@ -97,9 +102,6 @@ def generate(self, inputs: List[str], max_out_len: int,
         for output in outputs:
             prompt = output.prompt
             generated_text = output.outputs[0].text
-
-            if self.end_str:
-                generated_text = generated_text.split(self.end_str)[0]
             prompt_list.append(prompt)
             output_strs.append(generated_text)
 
diff --git a/opencompass/openicl/icl_inferencer/icl_ll_inferencer.py b/opencompass/openicl/icl_inferencer/icl_ll_inferencer.py
index ca3251860..40367ade4 100644
--- a/opencompass/openicl/icl_inferencer/icl_ll_inferencer.py
+++ b/opencompass/openicl/icl_inferencer/icl_ll_inferencer.py
@@ -1,4 +1,6 @@
-"""PPL Inferencer."""
+# flake8: noqa
+# yapf: disable
+"""LogLikelihood(LL) Inferencer."""
 
 import os
 from typing import List, Optional
@@ -76,16 +78,13 @@ def inference(self,
 
         # 3. Get labels of all the classes
         if self.labels is None:
-            labels = retriever.get_labels(ice_template=ice_template,
-                                          prompt_template=prompt_template)
+            labels = retriever.get_labels(ice_template=ice_template, prompt_template=prompt_template)
         else:
             labels = self.labels
 
         # 4. Generate in-context examples for testing inputs
         for idx in range(len(ice_idx_list)):
-            ice.append(
-                retriever.generate_ice(ice_idx_list[idx],
-                                       ice_template=ice_template))
+            ice.append(retriever.generate_ice(ice_idx_list[idx], ice_template=ice_template))
         output_handler.save_ice(self.model.parse_template(ice, mode='ppl'))
 
         # 5. Calculating loglikelihood for prompts in each label's class
@@ -99,58 +98,41 @@ def inference(self,
             # 5.1 Generate prompts of current label and truncate
             # TODO: Refactor
             for idx in range(len(ice_idx_list)):
-                prompt = retriever.generate_label_prompt(
-                    idx,
-                    ice[idx],
-                    label,
-                    ice_template=ice_template,
-                    prompt_template=prompt_template)
+                prompt_kwargs = {
+                    'idx': idx,
+                    'ice': ice[idx],
+                    'label': label,
+                    'ice_template': ice_template,
+                    'prompt_template': prompt_template,
+                }
+                prompt = retriever.generate_label_prompt(**prompt_kwargs)
+                prompt_token_num = self.model.get_token_len_from_template(prompt, mode='ppl')
                 if self.max_seq_len is not None:
-                    prompt_token_num = self.model.get_token_len_from_template(
-                        prompt, mode='ppl')
-                    while len(ice_idx_list[idx]
-                              ) > 0 and prompt_token_num > self.max_seq_len:
+                    while len(ice_idx_list[idx]) > 0 and prompt_token_num > self.max_seq_len:
                         ice_idx_list[idx] = ice_idx_list[idx][:-1]
-                        ice[idx] = retriever.generate_ice(
-                            ice_idx_list[idx], ice_template=ice_template)
-                        prompt = retriever.generate_label_prompt(
-                            idx,
-                            ice[idx],
-                            label,
-                            ice_template=ice_template,
-                            prompt_template=prompt_template)
-                        prompt_token_num = self.model.get_token_len_from_template(  # noqa
-                            prompt, mode='ppl')  # noqa
+                        ice[idx] = retriever.generate_ice(ice_idx_list[idx], ice_template=ice_template)
+                        prompt_kwargs['ice'] = ice[idx]
+                        prompt = retriever.generate_label_prompt(**prompt_kwargs)
+                        prompt_token_num = self.model.get_token_len_from_template(prompt, mode='ppl')
 
                 prompt_list.append(prompt)
                 token_num_list.append(prompt_token_num)
                 cont_list.append(retriever.test_ds[idx]['cont'])
 
             # 5.2 Get loglikelihood
-            logger.info(
-                f"Calculating Loglikelihood for prompts labeled '{label}'"
-            )  # noqa
-            for idx in trange(0,
-                              len(prompt_list),
-                              self.batch_size,
-                              disable=not self.is_main_process):
+            logger.info(f"Calculating Loglikelihood for prompts labeled '{label}'")
+            for idx in trange(0, len(prompt_list), self.batch_size, disable=not self.is_main_process):
                 sub_prompt_list = prompt_list[idx:idx + self.batch_size]
                 sub_cont_list = cont_list[idx:idx + self.batch_size]
 
                 with torch.no_grad():
                     # mainly modify compared to PPLInferencer
-                    sub_inputs = self.model.parse_template(sub_prompt_list,
-                                                           mode='ppl')
-                    sub_res = self.model.get_loglikelihood(
-                        sub_inputs, sub_cont_list).tolist()
-                for res, prompt in zip(
-                        sub_res,
-                        self.model.parse_template(sub_prompt_list,
-                                                  mode='ppl')):
+                    sub_inputs = self.model.parse_template(sub_prompt_list, mode='ppl')
+                    sub_res = self.model.get_loglikelihood(sub_inputs, sub_cont_list).tolist()
+                for res, prompt in zip(sub_res, self.model.parse_template(sub_prompt_list, mode='ppl')):
                     sub_ppl_list.append(res)
                     ice_str = self.model.parse_template(ice[idx], mode='ppl')
-                    output_handler.save_prompt_and_loglikelihood(
-                        label, prompt.replace(ice_str, ''), prompt, res, index)
+                    output_handler.save_prompt_and_loglikelihood(label, prompt.replace(ice_str, ''), prompt, res, index)
                     index = index + 1
             ppl.append(sub_ppl_list)
 
@@ -169,13 +151,9 @@ def inference(self,
         # 8. Output
         if self.is_main_process:
             os.makedirs(output_json_filepath, exist_ok=True)
-            output_handler.write_to_json(output_json_filepath,
-                                         output_json_filename)
+            output_handler.write_to_json(output_json_filepath, output_json_filename)
 
-        return [
-            sample['prediction']
-            for sample in output_handler.results_dict.values()
-        ]
+        return [sample['prediction'] for sample in output_handler.results_dict.values()]
 
 
 class LLInferencerOutputHandler:
diff --git a/opencompass/openicl/icl_inferencer/icl_ppl_inferencer.py b/opencompass/openicl/icl_inferencer/icl_ppl_inferencer.py
index e48a8a2ff..40a854807 100644
--- a/opencompass/openicl/icl_inferencer/icl_ppl_inferencer.py
+++ b/opencompass/openicl/icl_inferencer/icl_ppl_inferencer.py
@@ -1,3 +1,5 @@
+# flake8: noqa
+# yapf: disable
 """PPL Inferencer."""
 
 import os
@@ -84,9 +86,7 @@ def inference(self,
 
         # 4. Generate in-context examples for testing inputs
         for idx in range(len(ice_idx_list)):
-            ice.append(
-                retriever.generate_ice(ice_idx_list[idx],
-                                       ice_template=ice_template))
+            ice.append(retriever.generate_ice(ice_idx_list[idx], ice_template=ice_template))
         output_handler.save_ice(self.model.parse_template(ice, mode='ppl'))
 
         # 5. Calculating PPL for prompts in each label's class
@@ -101,33 +101,26 @@ def inference(self,
             # 5.1 Generate prompts of current label and truncate
             # TODO: Refactor
             for idx in range(len(ice_idx_list)):
-                prompt = retriever.generate_label_prompt(
-                    idx,
-                    ice[idx],
-                    label,
-                    ice_template=ice_template,
-                    prompt_template=prompt_template,
-                    remain_sep=normalizing_str is not None)
-                prompt_token_num = self.model.get_token_len_from_template(
-                    prompt, mode='ppl')
+                prompt_kwargs = {
+                    'idx': idx,
+                    'ice': ice[idx],
+                    'label': label,
+                    'ice_template': ice_template,
+                    'prompt_template': prompt_template,
+                    'remain_sep': normalizing_str is not None
+                }
+                prompt = retriever.generate_label_prompt(**prompt_kwargs)
+                prompt_token_num = self.model.get_token_len_from_template(prompt, mode='ppl')
                 if self.max_seq_len is not None:
-                    while len(ice_idx_list[idx]
-                              ) > 0 and prompt_token_num > self.max_seq_len:
+                    while len(ice_idx_list[idx]) > 0 and prompt_token_num > self.max_seq_len:
                         ice_idx_list[idx] = ice_idx_list[idx][:-1]
-                        ice[idx] = retriever.generate_ice(
-                            ice_idx_list[idx], ice_template=ice_template)
-                        prompt = retriever.generate_label_prompt(
-                            idx,
-                            ice[idx],
-                            label,
-                            ice_template=ice_template,
-                            prompt_template=prompt_template)
-                        prompt_token_num = self.model.get_token_len_from_template(  # noqa
-                            prompt, mode='ppl')  # noqa
+                        ice[idx] = retriever.generate_ice(ice_idx_list[idx], ice_template=ice_template)
+                        prompt_kwargs['ice'] = ice[idx]
+                        prompt = retriever.generate_label_prompt(**prompt_kwargs)
+                        prompt_token_num = self.model.get_token_len_from_template(prompt, mode='ppl')
 
                 if normalizing_str is not None:
-                    assert isinstance(prompt, str), \
-                         'Prompt must be a string when normalizing_str is set.'
+                    assert isinstance(prompt, str), 'Prompt must be a string when normalizing_str is set.'
                     prompt_sep = prompt
                     if prompt_template is not None:
                         sep_token = prompt_template.sep_token
@@ -140,10 +133,9 @@ def inference(self,
                     prompt = context + answer
                     normalizing_prompt = normalizing_str + answer
 
-                    context_length_list.append(
-                        self.model.get_token_len_from_template(context,
-                                                               mode='ppl'))
+                    context_length_list.append(self.model.get_token_len_from_template(context, mode='ppl'))
                     normalizing_prompt_list.append(normalizing_prompt)
+
                 prompt_list.append(prompt)
                 token_num_list.append(prompt_token_num)
 
@@ -153,45 +145,25 @@ def inference(self,
 
             # 5.2 Get PPL
             logger.info(f"Calculating PPL for prompts labeled '{label}'")
-            for idx in trange(0,
-                              len(prompt_list),
-                              self.batch_size,
-                              disable=not self.is_main_process):
+            for idx in trange(0, len(prompt_list), self.batch_size, disable=not self.is_main_process):
                 sub_prompt_list = prompt_list[idx:idx + self.batch_size]
-                if normalizing_str is not None:
-                    sub_context_length_list = context_length_list[idx:idx +
-                                                                  self.
-                                                                  batch_size]
-                    sub_normalizing_prompt_list = normalizing_prompt_list[
-                        idx:idx + self.batch_size]
-
                 with torch.no_grad():
                     if normalizing_str is not None:
-                        res1 = self.model.get_ppl_from_template(
-                            sub_prompt_list,
-                            mask_length=sub_context_length_list)
-                        res2 = self.model.get_ppl_from_template(
-                            sub_normalizing_prompt_list,
-                            mask_length=[
-                                normalizing_str_len
-                                for i in range(len(sub_prompt_list))
-                            ])
+                        sub_context_length_list = context_length_list[idx:idx + self.batch_size]
+                        sub_normalizing_prompt_list = normalizing_prompt_list[idx:idx + self.batch_size]
+                        res1 = self.model.get_ppl_from_template(sub_prompt_list, mask_length=sub_context_length_list)
+                        sub_normalizing_context_length_list = [normalizing_str_len for _ in range(len(sub_prompt_list))]
+                        res2 = self.model.get_ppl_from_template(sub_normalizing_prompt_list, mask_length=sub_normalizing_context_length_list)
                         sub_res = res1 - res2
                     else:
-                        sub_res = self.model.get_ppl_from_template(
-                            sub_prompt_list).tolist()
-                for res, prompt in zip(
-                        sub_res,
-                        self.model.parse_template(sub_prompt_list,
-                                                  mode='ppl')):
+                        sub_res = self.model.get_ppl_from_template(sub_prompt_list).tolist()
+
+                for res, prompt in zip(sub_res, self.model.parse_template(sub_prompt_list, mode='ppl')):
                     sub_ppl_list.append(res)
                     ice_str = self.model.parse_template(ice[idx], mode='ppl')
-                    output_handler.save_prompt_and_ppl(
-                        label, prompt.replace(ice_str, ''), prompt, res, index)
-                    output_handler.results_dict[str(
-                        index)][f'label: {str(label)}'][
-                            'BPB'] = res * token_num_list[index] / len(
-                                prompt.replace(ice_str, '').encode())
+                    prompt_wo_ice = prompt.replace(ice_str, '')
+                    output_handler.save_prompt_and_ppl(label, prompt_wo_ice, prompt, res, index)
+                    output_handler.results_dict[str(index)][f'label: {str(label)}']['BPB'] = res * token_num_list[index] / len(prompt_wo_ice.encode())
                     index = index + 1
             ppl.append(sub_ppl_list)
 
@@ -210,10 +182,6 @@ def inference(self,
         # 8. Output
         if self.is_main_process:
             os.makedirs(output_json_filepath, exist_ok=True)
-            output_handler.write_to_json(output_json_filepath,
-                                         output_json_filename)
+            output_handler.write_to_json(output_json_filepath, output_json_filename)
 
-        return [
-            sample['prediction']
-            for sample in output_handler.results_dict.values()
-        ]
+        return [sample['prediction'] for sample in output_handler.results_dict.values()]
diff --git a/opencompass/partitioners/num_worker.py b/opencompass/partitioners/num_worker.py
index 4e22a5ff7..58f358478 100644
--- a/opencompass/partitioners/num_worker.py
+++ b/opencompass/partitioners/num_worker.py
@@ -60,14 +60,16 @@ def partition(self,
                     if osp.exists(filename):
                         continue
                     dataset_size = self.get_size(dataset)
-                    if dataset_size > self.min_task_size:
+                    if self.num_worker <= 1:
+                        chunks.append(dataset)
+                    elif dataset_size <= self.min_task_size:
+                        chunks.append(dataset)
+                    else:
                         root, ext = osp.splitext(filename)
                         dataset_splits = self.split_dataset(dataset)
                         for i, dataset_split in enumerate(dataset_splits):
                             if not osp.exists(f'{root}_{i}{ext}'):
                                 chunks.append(dataset_split)
-                    else:
-                        chunks.append(dataset)
 
                 if self.strategy == 'heuristic':
                     buckets = [[] for _ in range(self.num_worker)]
diff --git a/opencompass/summarizers/__init__.py b/opencompass/summarizers/__init__.py
index 1d2d25848..274f3b18d 100644
--- a/opencompass/summarizers/__init__.py
+++ b/opencompass/summarizers/__init__.py
@@ -1,5 +1,6 @@
 # flake8: noqa: F401, E501
 from .circular import CircularSummarizer  # noqa: F401
 from .default import DefaultSummarizer  # noqa: F401
-from .llm_compression import LLMCompressionSummarizer
+from .llm_compression import LLMCompressionSummarizer  # noqa: F401
+from .multi_faceted import MultiFacetedSummarizer  # noqa: F401
 from .subjective import *  # noqa: F401
diff --git a/opencompass/summarizers/default.py b/opencompass/summarizers/default.py
index e4fe023cf..f16b208db 100644
--- a/opencompass/summarizers/default.py
+++ b/opencompass/summarizers/default.py
@@ -226,12 +226,12 @@ def _calculate_group_metrics(self, raw_results, parsed_results, dataset_metrics,
 
         return raw_results, parsed_results, dataset_metrics, dataset_eval_mode
 
-    def _format_table(self, parsed_results, dataset_metrics, dataset_eval_mode):
+    def _format_table(self, parsed_results, dataset_metrics, dataset_eval_mode, required_dataset_abbrs=None):
         dataset_abbrs = [dataset_abbr_from_cfg(dataset) for dataset in self.dataset_cfgs]
         prompt_version = {dataset_abbr_from_cfg(d): get_prompt_hash(d)[:6] for d in self.dataset_cfgs}
 
         summarizer_dataset_abbrs = []
-        if self.dataset_abbrs is None:
+        if required_dataset_abbrs is None:
             # display all dataset metrics included in the config
             for dataset_abbr in dataset_abbrs:
                 if dataset_abbr in dataset_metrics:
@@ -246,7 +246,7 @@ def _format_table(self, parsed_results, dataset_metrics, dataset_eval_mode):
                         summarizer_dataset_abbrs.append((dataset_abbr, metric))
         else:
             # follow the required order
-            for item in self.dataset_abbrs:
+            for item in required_dataset_abbrs:
                 if isinstance(item, str):
                     summarizer_dataset_abbrs.append((item, None))
                 elif isinstance(item, (list, tuple)):
@@ -306,7 +306,7 @@ def _output_to_file(self, output_path, time_str, table, raw_txts):
             text = f'{time_str}\n' + \
                     'tabulate format\n' + \
                     '^' * 128 + '\n' + \
-                    tabulate.tabulate(table, headers='firstrow') + '\n' + \
+                    tabulate.tabulate(table, headers='firstrow', floatfmt='.2f') + '\n' + \
                     '$' * 128 + '\n\n' + \
                     '-' * 128 + ' THIS IS A DIVIDER ' + '-' * 128 + '\n\n' + \
                     'csv format\n' + \
@@ -338,13 +338,13 @@ def summarize(
             self._calculate_group_metrics(raw_results, parsed_results, dataset_metrics, dataset_eval_mode)
 
         # format table
-        table = self._format_table(parsed_results, dataset_metrics, dataset_eval_mode)
+        table = self._format_table(parsed_results, dataset_metrics, dataset_eval_mode, required_dataset_abbrs=self.dataset_abbrs)
 
         # format raw txt
         raw_txts = self._format_raw_txt(raw_results)
 
         # output to screen
-        print(tabulate.tabulate(table, headers='firstrow'))
+        print(tabulate.tabulate(table, headers='firstrow', floatfmt='.2f'))
 
         # output to .text / .csv files
         self._output_to_file(output_path, time_str, table, raw_txts)
diff --git a/opencompass/summarizers/multi_faceted.py b/opencompass/summarizers/multi_faceted.py
new file mode 100644
index 000000000..27848ba31
--- /dev/null
+++ b/opencompass/summarizers/multi_faceted.py
@@ -0,0 +1,46 @@
+# flake8: noqa
+# yapf: disable
+import functools
+import getpass
+import math
+import os
+from datetime import datetime
+from typing import Any, Dict, List, Optional
+
+import tabulate
+from mmengine import ConfigDict
+
+from .default import DefaultSummarizer
+
+
+class MultiFacetedSummarizer(DefaultSummarizer):
+
+    def __init__(self, config: ConfigDict, dataset_abbrs_list: Optional[Dict[str, List[str]]] = None, summary_groups: List = []) -> None:
+        super().__init__(config, dataset_abbrs=None, summary_groups=summary_groups)
+        self.dataset_abbrs_list = dataset_abbrs_list
+
+    def summarize(self, output_path: str = None, time_str: str = datetime.now().strftime('%Y%m%d_%H%M%S')):
+
+        # pick up results
+        raw_results, parsed_results, dataset_metrics, dataset_eval_mode = self._pick_up_results()
+
+        # calculate group metrics
+        raw_results, parsed_results, dataset_metrics, dataset_eval_mode = \
+            self._calculate_group_metrics(raw_results, parsed_results, dataset_metrics, dataset_eval_mode)
+
+        for dataset_abbrs_item in self.dataset_abbrs_list:
+            profile_name = dataset_abbrs_item['name']
+            profile_dataset_abbrs = dataset_abbrs_item['dataset_abbrs']
+
+            # format table
+            table = self._format_table(parsed_results, dataset_metrics, dataset_eval_mode, required_dataset_abbrs=profile_dataset_abbrs)
+
+            # output to screen
+            print(tabulate.tabulate(table, headers='firstrow', floatfmt='.2f'))
+
+            # output to .text / .csv files
+            output_csv_path = os.path.join(self.work_dir, 'summary', f'summary_{time_str}', f'{profile_name}.csv')
+            os.makedirs(os.path.dirname(output_csv_path), exist_ok=True)
+            with open(output_csv_path, 'w', encoding='utf-8') as f:
+                f.write('\n'.join([','.join(row) for row in table]) + '\n')
+            self.logger.info(f'write csv to {os.path.abspath(output_csv_path)}')
diff --git a/opencompass/utils/build.py b/opencompass/utils/build.py
index 40e8ae2da..14a666832 100644
--- a/opencompass/utils/build.py
+++ b/opencompass/utils/build.py
@@ -22,5 +22,4 @@ def build_model_from_cfg(model_cfg: ConfigDict):
     model_cfg.pop('summarizer_abbr', None)
     model_cfg.pop('pred_postprocessor', None)
     model_cfg.pop('min_out_len', None)
-    model_cfg.pop('tokenizer_only', None)
     return MODELS.build(model_cfg)
diff --git a/opencompass/utils/run.py b/opencompass/utils/run.py
index 2a26b6c77..c3b9de811 100644
--- a/opencompass/utils/run.py
+++ b/opencompass/utils/run.py
@@ -5,8 +5,10 @@
 from mmengine.config import Config
 
 from opencompass.datasets.custom import make_custom_dataset_config
-from opencompass.models import VLLM, HuggingFaceCausalLM, TurboMindModel
-from opencompass.partitioners import NaivePartitioner, SizePartitioner
+from opencompass.models import (VLLM, HuggingFaceBaseModel,
+                                HuggingFaceCausalLM,
+                                HuggingFacewithChatTemplate, TurboMindModel)
+from opencompass.partitioners import NaivePartitioner, NumWorkerPartitioner
 from opencompass.runners import DLCRunner, LocalRunner, SlurmRunner
 from opencompass.tasks import OpenICLEvalTask, OpenICLInferTask
 from opencompass.utils import get_logger, match_files
@@ -71,6 +73,7 @@ def get_config_from_arg(args) -> Config:
     2. args.models and args.datasets
     3. Huggingface parameter groups and args.datasets
     """
+    logger = get_logger()
     if args.config:
         config = Config.fromfile(args.config, format_python_code=False)
         config = try_fill_in_custom_cfgs(config)
@@ -140,19 +143,25 @@ def get_config_from_arg(args) -> Config:
                     f'Config file {model[1]} does not contain "models" field')
             models += cfg['models']
     else:
-        from opencompass.models import HuggingFace
-        model = dict(type=f'{HuggingFace.__module__}.{HuggingFace.__name__}',
+        if args.hf_type == 'chat':
+            mod = HuggingFacewithChatTemplate
+        else:
+            mod = HuggingFaceBaseModel
+        model = dict(type=f'{mod.__module__}.{mod.__name__}',
+                     abbr=args.hf_path.split('/')[-1] + '_hf',
                      path=args.hf_path,
-                     peft_path=args.peft_path,
-                     tokenizer_path=args.tokenizer_path,
                      model_kwargs=args.model_kwargs,
+                     tokenizer_path=args.tokenizer_path,
                      tokenizer_kwargs=args.tokenizer_kwargs,
+                     peft_path=args.peft_path,
+                     peft_kwargs=args.peft_kwargs,
                      max_seq_len=args.max_seq_len,
                      max_out_len=args.max_out_len,
-                     batch_padding=not args.no_batch_padding,
                      batch_size=args.batch_size,
                      pad_token_id=args.pad_token_id,
+                     stop_words=args.stop_words,
                      run_cfg=dict(num_gpus=args.num_gpus))
+        logger.debug(f'Using model: {model}')
         models.append(model)
     # set infer accelerator if needed
     if args.accelerator in ['vllm', 'lmdeploy']:
@@ -173,7 +182,7 @@ def get_config_from_arg(args) -> Config:
         summarizer_file = summarizer_arg
 
     s = match_cfg_file(summarizers_dir, [summarizer_file])[0]
-    get_logger().info(f'Loading {s[0]}: {s[1]}')
+    logger.info(f'Loading {s[0]}: {s[1]}')
     cfg = Config.fromfile(s[1])
     # Use summarizer_key to retrieve the summarizer definition
     # from the configuration file
@@ -186,28 +195,23 @@ def get_config_from_arg(args) -> Config:
 
 def change_accelerator(models, accelerator):
     models = models.copy()
+    logger = get_logger()
     model_accels = []
     for model in models:
-        get_logger().info(f'Transforming {model["abbr"]} to {accelerator}')
+        logger.info(f'Transforming {model["abbr"]} to {accelerator}')
         # change HuggingFace model to VLLM or TurboMindModel
         if model['type'] is HuggingFaceCausalLM:
             gen_args = dict()
             if model.get('generation_kwargs') is not None:
                 generation_kwargs = model['generation_kwargs'].copy()
-                gen_args['temperature'] = 0.001 if generation_kwargs.get(
-                    'temperature'
-                ) is None else generation_kwargs['temperature']
-                gen_args['top_k'] = 1 if generation_kwargs.get(
-                    'top_k') is None else generation_kwargs['top_k']
-                gen_args['top_p'] = 0.9 if generation_kwargs.get(
-                    'top_p') is None else generation_kwargs['top_p']
-                gen_args['stop_token_ids'] = None if generation_kwargs.get(
-                    'eos_token_id'
-                ) is None else generation_kwargs['eos_token_id']
-                generation_kwargs[
-                    'stop_token_ids'] = None if generation_kwargs.get(
-                        'eos_token_id'
-                    ) is None else generation_kwargs['eos_token_id']
+                gen_args['temperature'] = generation_kwargs.get(
+                    'temperature', 0.001)
+                gen_args['top_k'] = generation_kwargs.get('top_k', 1)
+                gen_args['top_p'] = generation_kwargs.get('top_p', 0.9)
+                gen_args['stop_token_ids'] = generation_kwargs.get(
+                    'eos_token_id', None)
+                generation_kwargs['stop_token_ids'] = generation_kwargs.get(
+                    'eos_token_id', None)
                 generation_kwargs.pop('eos_token_id')
             else:
                 # if generation_kwargs is not provided, set default values
@@ -218,11 +222,10 @@ def change_accelerator(models, accelerator):
                 gen_args['stop_token_ids'] = None
 
             if accelerator == 'lmdeploy':
-                get_logger().info(
-                    f'Transforming {model["abbr"]} to {accelerator}')
+                logger.info(f'Transforming {model["abbr"]} to {accelerator}')
+                mod = TurboMindModel
                 acc_model = dict(
-                    type=  # noqa E251
-                    f'{TurboMindModel.__module__}.{TurboMindModel.__name__}',
+                    type=f'{mod.__module__}.{mod.__name__}',
                     abbr=model['abbr'].replace('hf', 'lmdeploy')
                     if '-hf' in model['abbr'] else model['abbr'] + '-lmdeploy',
                     path=model['path'],
@@ -244,8 +247,7 @@ def change_accelerator(models, accelerator):
                     if model.get(item) is not None:
                         acc_model[item] = model[item]
             elif accelerator == 'vllm':
-                get_logger().info(
-                    f'Transforming {model["abbr"]} to {accelerator}')
+                logger.info(f'Transforming {model["abbr"]} to {accelerator}')
 
                 acc_model = dict(
                     type=f'{VLLM.__module__}.{VLLM.__name__}',
@@ -275,9 +277,8 @@ def get_config_type(obj) -> str:
 
 def fill_infer_cfg(cfg, args):
     new_cfg = dict(infer=dict(
-        partitioner=dict(type=get_config_type(SizePartitioner),
-                         max_task_size=args.max_partition_size,
-                         gen_task_coef=args.gen_task_coef),
+        partitioner=dict(type=get_config_type(NumWorkerPartitioner),
+                         num_worker=args.max_num_workers),
         runner=dict(
             max_num_workers=args.max_num_workers,
             debug=args.debug,
diff --git a/tools/prompt_viewer.py b/tools/prompt_viewer.py
index ed821c5af..ed7c0c96f 100644
--- a/tools/prompt_viewer.py
+++ b/tools/prompt_viewer.py
@@ -54,7 +54,7 @@ def print_prompts(model_cfg, dataset_cfg, count=1):
     # extracted and generalized as a static method in these Inferencers
     # and reused here.
     if model_cfg:
-        max_seq_len = model_cfg.max_seq_len
+        max_seq_len = model_cfg.get('max_seq_len', 32768)
         if not model_cfg['type'].is_api:
             model_cfg['tokenizer_only'] = True
         model = build_model_from_cfg(model_cfg)