From 648c645327ef7abbe3fa5cd5f1c783913aa4c2c9 Mon Sep 17 00:00:00 2001
From: Jintao Huang <huangjintao.hjt@alibaba-inc.com>
Date: Sun, 20 Aug 2023 01:49:42 +0800
Subject: [PATCH 1/9] update template

---
 examples/pytorch/llm/README.md               |  12 +-
 examples/pytorch/llm/README_CN.md            |  13 +-
 examples/pytorch/llm/src/llm_infer.py        |  33 ++--
 examples/pytorch/llm/src/llm_sft.py          |  41 +++--
 examples/pytorch/llm/src/utils/__init__.py   |   3 +-
 examples/pytorch/llm/src/utils/models.py     |  52 +++---
 examples/pytorch/llm/src/utils/preprocess.py | 181 +++++++++++++++++++
 examples/pytorch/llm/src/utils/utils.py      |  11 --
 swift/utils/llm_utils.py                     |  32 ----
 9 files changed, 275 insertions(+), 103 deletions(-)
 create mode 100644 examples/pytorch/llm/src/utils/preprocess.py

diff --git a/examples/pytorch/llm/README.md b/examples/pytorch/llm/README.md
index 27ea58b45e..2605d841de 100644
--- a/examples/pytorch/llm/README.md
+++ b/examples/pytorch/llm/README.md
@@ -19,6 +19,7 @@
 2. supported models: [**qwen-7b**](https://github.com/QwenLM/Qwen-7B), baichuan-7b, baichuan-13b, chatglm2-6b, chatglm2-6b-32k, llama2-7b, llama2-13b, llama2-70b, openbuddy-llama2-13b, openbuddy-llama-65b, polylm-13b, ...
 3. supported feature: quantization, ddp, model parallelism(device map), gradient checkpoint, gradient accumulation steps, push to modelscope hub, custom datasets, ...
 4. supported datasets: alpaca-en(gpt4), alpaca-zh(gpt4), finance-en, multi-alpaca-all, code-en, instinwild-en, instinwild-zh, ...
+5. supported templates: chatml(qwen), baichuan, chatglm2, llama, openbuddy_llama, default, ...
 
 ## Prepare the Environment
 Experimental environment: A10, 3090, A100, ... (V100 does not support bf16, quantization)
@@ -65,6 +66,10 @@ bash scripts/qwen_7b/qlora/infer.sh
 bash scripts/qwen_7b/qlora_ddp/sft.sh
 bash scripts/qwen_7b/qlora_ddp/infer.sh
 
+# sft(lora+ddp) and infer qwen-7b, Requires 4*22GB VRAM.
+bash scripts/qwen_7b/lora_ddp/sft.sh
+bash scripts/qwen_7b/lora_ddp/infer.sh
+
 # sft(full) and infer qwen-7b, Requires 95GB VRAM.
 bash scripts/qwen_7b/full/sft.sh
 bash scripts/qwen_7b/full/infer.sh
@@ -72,6 +77,7 @@ bash scripts/qwen_7b/full/infer.sh
 # For more scripts, please see `scripts/` folder
 ```
 
-## Extend Datasets
-1. If you need to extend the model, you can modify the `MODEL_MAPPING` in `utils/models.py`. `model_id` can be specified as a local path. In this case, `revision` doesn't work.
-2. If you need to extend or customize the dataset, you can modify the `DATASET_MAPPING` in `utils/datasets.py`. You need to customize the `get_*_dataset` function, which returns a dataset with two columns: `instruction`, `output`.
+## Extend Models and Datasets
+1. If you need to extend the model, you can modify the `MODEL_MAPPING` in `utils/model.py`. `model_id` can be specified as a local path. In this case, `revision` doesn't work.
+2. If you need to extend or customize the dataset, you can modify the `DATASET_MAPPING` in `utils/dataset.py`. You need to customize the `get_*_dataset` function, which returns a dataset with two columns: `instruction`, `output`.
+3. If you need to extend the template, you can modify the `TEMPLATE_MAPPING` in `utils/preprocess.py`.
diff --git a/examples/pytorch/llm/README_CN.md b/examples/pytorch/llm/README_CN.md
index a7652c0a8d..5f40d7f06c 100644
--- a/examples/pytorch/llm/README_CN.md
+++ b/examples/pytorch/llm/README_CN.md
@@ -20,7 +20,7 @@
 2. 支持的模型: [**qwen-7b**](https://github.com/QwenLM/Qwen-7B), baichuan-7b, baichuan-13b, chatglm2-6b, chatglm2-6b-32k, llama2-7b, llama2-13b, llama2-70b, openbuddy-llama2-13b, openbuddy-llama-65b, polylm-13b, ...
 3. 支持的特性: 模型量化, DDP, 模型并行(device_map), gradient checkpoint, 梯度累加, 支持推送modelscope hub, 支持自定义数据集, ...
 4. 支持的数据集: alpaca-en(gpt4), alpaca-zh(gpt4), finance-en, multi-alpaca-all, code-en, instinwild-en, instinwild-zh, ...
-
+5. 支持的template: chatml(qwen), baichuan, chatglm2, llama, openbuddy_llama, default, ...
 
 ## 准备实验环境
 实验环境: A10, 3090, A100均可. (V100不支持bf16, 量化)
@@ -68,6 +68,10 @@ bash scripts/qwen_7b/qlora/infer.sh
 bash scripts/qwen_7b/qlora_ddp/sft.sh
 bash scripts/qwen_7b/qlora_ddp/infer.sh
 
+# 微调(lora+ddp)+推理 qwen-7b, 需要4卡*22GB显存.
+bash scripts/qwen_7b/lora_ddp/sft.sh
+bash scripts/qwen_7b/lora_ddp/infer.sh
+
 # 微调(full)+推理 qwen-7b, 需要95G显存.
 bash scripts/qwen_7b/full/sft.sh
 bash scripts/qwen_7b/full/infer.sh
@@ -75,6 +79,7 @@ bash scripts/qwen_7b/full/infer.sh
 # 更多的scripts脚本, 可以看`scripts`文件夹
 ```
 
-## 拓展数据集
-1. 如果你想要拓展模型, 你可以修改`utils/models.py`文件中的`MODEL_MAPPING`. `model_id`可以指定为本地路径, 这种情况下, `revision`参数不起作用.
-2. 如果你想要拓展或使用自定义数据集, 你可以修改`utils/datasets.py`文件中的`DATASET_MAPPING`. 你需要自定义`get_*_dataset`函数, 并返回包含`instruction`, `output`两列的数据集.
+## 拓展模型和数据集
+1. 如果你想要拓展模型, 你可以修改`utils/model.py`文件中的`MODEL_MAPPING`. `model_id`可以指定为本地路径, 这种情况下, `revision`参数不起作用.
+2. 如果你想要拓展或使用自定义数据集, 你可以修改`utils/dataset.py`文件中的`DATASET_MAPPING`. 你需要自定义`get_*_dataset`函数, 并返回包含`instruction`, `output`两列的数据集.
+3. 如果你想要拓展template, 你可以修改`utils/preprocess.py`文件中的`TEMPLATE_MAPPING`.
diff --git a/examples/pytorch/llm/src/llm_infer.py b/examples/pytorch/llm/src/llm_infer.py
index 124b849f2f..6e8e651af5 100644
--- a/examples/pytorch/llm/src/llm_infer.py
+++ b/examples/pytorch/llm/src/llm_infer.py
@@ -1,18 +1,16 @@
 import os
 # os.environ['CUDA_VISIBLE_DEVICES'] = '0'
 from dataclasses import dataclass, field
-from functools import partial
 from typing import Optional
 
 import torch
 from transformers import BitsAndBytesConfig, GenerationConfig, TextStreamer
-from utils import (DATASET_MAPPING, DEFAULT_PROMPT, MODEL_MAPPING, get_dataset,
-                   get_model_tokenizer, inference, process_dataset, select_bnb,
-                   select_dtype, show_layers)
+from utils import (DATASET_MAPPING, MODEL_MAPPING, TEMPLATE_MAPPING,
+                   get_dataset, get_model_tokenizer, get_preprocess, inference,
+                   process_dataset, select_bnb, select_dtype, show_layers)
 
 from swift import Swift, get_logger
 from swift.utils import parse_args, print_model_info, seed_everything
-from swift.utils.llm_utils import tokenize_function
 
 logger = get_logger()
 
@@ -23,6 +21,8 @@ class InferArguments:
         default='qwen-7b', metadata={'choices': list(MODEL_MAPPING.keys())})
     sft_type: str = field(
         default='lora', metadata={'choices': ['lora', 'full']})
+    template_type: str = field(
+        default=None, metadata={'choices': list(TEMPLATE_MAPPING.keys())})
     ckpt_dir: str = '/path/to/your/vx_xxx/checkpoint-xxx'
     eval_human: bool = False  # False: eval test_dataset
 
@@ -37,13 +37,13 @@ class InferArguments:
     dataset_seed: int = 42
     dataset_sample: int = 20000  # -1: all dataset
     dataset_test_size: float = 0.01
-    prompt: str = DEFAULT_PROMPT
+    system: str = 'you are a helpful assistant!'
     max_length: Optional[int] = 1024
 
     quantization_bit: Optional[int] = field(
         default=None, metadata={'choices': {4, 8}})
     bnb_4bit_comp_dtype: str = field(
-        default='fp32', metadata={'choices': {'fp16', 'bf16', 'fp32'}})
+        default=None, metadata={'choices': {'fp16', 'bf16', 'fp32'}})
     bnb_4bit_quant_type: str = field(
         default='nf4', metadata={'choices': {'fp4', 'nf4'}})
     bnb_4bit_use_double_quant: bool = True
@@ -57,7 +57,14 @@ class InferArguments:
     def __post_init__(self):
         if not os.path.isdir(self.ckpt_dir):
             raise ValueError(f'Please enter a valid ckpt_dir: {self.ckpt_dir}')
+        if self.template_type is None:
+            self.template_type = MODEL_MAPPING[self.model_type].get(
+                'template', 'default')
+            logger.info(f'Setting template_type: {self.template_type}')
+
         self.torch_dtype, _, _ = select_dtype(self.dtype)
+        if self.bnb_4bit_comp_dtype is None:
+            self.bnb_4bit_comp_dtype = self.dtype
         self.bnb_4bit_compute_dtype, self.load_in_4bit, self.load_in_8bit = select_bnb(
             self.quantization_bit, self.bnb_4bit_comp_dtype)
 
@@ -91,11 +98,9 @@ def llm_infer(args: InferArguments) -> None:
     print_model_info(model)
 
     # ### Inference
-    tokenize_func = partial(
-        tokenize_function,
-        tokenizer=tokenizer,
-        prompt=args.prompt,
-        max_length=args.max_length)
+    template_type = MODEL_MAPPING[args.model_type]['template']
+    preprocess_func = get_preprocess(template_type, tokenizer, args.system,
+                                     args.max_length)
     streamer = TextStreamer(
         tokenizer, skip_prompt=True, skip_special_tokens=True)
     generation_config = GenerationConfig(
@@ -112,7 +117,7 @@ def llm_infer(args: InferArguments) -> None:
         while True:
             instruction = input('<<< ')
             data = {'instruction': instruction}
-            input_ids = tokenize_func(data)['input_ids']
+            input_ids = preprocess_func(data)['input_ids']
             inference(input_ids, model, tokenizer, streamer, generation_config)
             print('-' * 80)
     else:
@@ -125,7 +130,7 @@ def llm_infer(args: InferArguments) -> None:
         for data in mini_test_dataset:
             output = data['output']
             data['output'] = None
-            input_ids = tokenize_func(data)['input_ids']
+            input_ids = preprocess_func(data)['input_ids']
             inference(input_ids, model, tokenizer, streamer, generation_config)
             print()
             print(f'[LABELS]{output}')
diff --git a/examples/pytorch/llm/src/llm_sft.py b/examples/pytorch/llm/src/llm_sft.py
index ab6e73ce16..e3f4a73e22 100644
--- a/examples/pytorch/llm/src/llm_sft.py
+++ b/examples/pytorch/llm/src/llm_sft.py
@@ -7,18 +7,18 @@
 import torch
 import torch.distributed as dist
 from transformers import BitsAndBytesConfig
-from utils import (DATASET_MAPPING, DEFAULT_PROMPT, MODEL_MAPPING,
+from utils import (DATASET_MAPPING, MODEL_MAPPING, TEMPLATE_MAPPING,
                    broadcast_string, find_all_linear_for_lora, get_dataset,
-                   get_dist_setting, get_model_tokenizer, is_dist, plot_images,
-                   process_dataset, select_bnb, select_dtype, show_layers)
+                   get_dist_setting, get_model_tokenizer, get_preprocess,
+                   is_dist, plot_images, process_dataset, select_bnb,
+                   select_dtype, show_layers)
 
 from swift import (HubStrategy, LoraConfig, Seq2SeqTrainer,
                    Seq2SeqTrainingArguments, Swift, get_logger)
 from swift.hub import HubApi, ModelScopeConfig
 from swift.utils import (add_version_to_work_dir, is_master, parse_args,
                          print_model_info, seed_everything)
-from swift.utils.llm_utils import (data_collate_fn, print_example,
-                                   stat_dataset, tokenize_function)
+from swift.utils.llm_utils import data_collate_fn, print_example, stat_dataset
 
 logger = get_logger()
 
@@ -26,10 +26,13 @@
 @dataclass
 class SftArguments:
     model_type: str = field(
-        default='qwen-7b', metadata={'choices': list(MODEL_MAPPING.keys())})
+        default='qwen-7b-chat',
+        metadata={'choices': list(MODEL_MAPPING.keys())})
     # qwen-7b: lora+4bitQ: 10G, lora+8bitQ: 14G, lora: 22G; full: 95G
     sft_type: str = field(
         default='lora', metadata={'choices': ['lora', 'full']})
+    template_type: str = field(
+        default=None, metadata={'choices': list(TEMPLATE_MAPPING.keys())})
     output_dir: str = 'runs'
     # DDP + MP(device_map) is not supported
     ddp_backend: Optional[str] = field(
@@ -47,7 +50,7 @@ class SftArguments:
     dataset_seed: int = 42
     dataset_sample: int = 20000  # -1: all dataset
     dataset_test_size: float = 0.01
-    prompt: str = DEFAULT_PROMPT
+    system: str = 'you are a helpful assistant!'
     max_length: Optional[int] = 1024
 
     # If you want to use qlora, set the quantization_bit to 8 or 4.
@@ -56,7 +59,7 @@ class SftArguments:
     quantization_bit: Optional[int] = field(
         default=None, metadata={'choices': {4, 8}})
     bnb_4bit_comp_dtype: str = field(
-        default='fp32', metadata={'choices': {'fp16', 'bf16', 'fp32'}})
+        default=None, metadata={'choices': {'fp16', 'bf16', 'fp32'}})
     bnb_4bit_quant_type: str = field(
         default='nf4', metadata={'choices': {'fp4', 'nf4'}})
     bnb_4bit_use_double_quant: bool = True
@@ -99,7 +102,8 @@ class SftArguments:
     use_flash_attn: Optional[bool] = field(
         default=None,
         metadata={
-            'help': "This parameter is used only when model_type == 'qwen-7b'"
+            'help':
+            "This parameter is used only when model_type.startswith('qwen-7b')"
         })
 
     def __post_init__(self):
@@ -129,6 +133,10 @@ def __post_init__(self):
                 self.save_steps = self.eval_steps * 4
         else:
             raise ValueError(f'sft_type: {self.sft_type}')
+        if self.template_type is None:
+            self.template_type = MODEL_MAPPING[self.model_type].get(
+                'template', 'default')
+            logger.info(f'Setting template_type: {self.template_type}')
 
         self.output_dir = os.path.join(self.output_dir, self.model_type)
 
@@ -136,6 +144,8 @@ def __post_init__(self):
             self.lora_target_modules = MODEL_MAPPING[
                 self.model_type]['lora_TM']
         self.torch_dtype, self.fp16, self.bf16 = select_dtype(self.dtype)
+        if self.bnb_4bit_comp_dtype is None:
+            self.bnb_4bit_comp_dtype = self.dtype
         self.bnb_4bit_compute_dtype, self.load_in_4bit, self.load_in_8bit = select_bnb(
             self.quantization_bit, self.bnb_4bit_comp_dtype)
 
@@ -178,7 +188,7 @@ def llm_sft(args: SftArguments) -> None:
             bnb_4bit_use_double_quant=args.bnb_4bit_use_double_quant)
         logger.info(f'quantization_config: {quantization_config.__dict__}')
         kwargs['quantization_config'] = quantization_config
-    if args.model_type == 'qwen-7b':
+    if args.model_type.startswith('qwen-7b'):
         kwargs['use_flash_attn'] = args.use_flash_attn
 
     model, tokenizer = get_model_tokenizer(
@@ -214,13 +224,10 @@ def llm_sft(args: SftArguments) -> None:
                                                  args.dataset_test_size,
                                                  args.dataset_sample,
                                                  args.dataset_seed)
-    tokenize_func = partial(
-        tokenize_function,
-        tokenizer=tokenizer,
-        prompt=args.prompt,
-        max_length=args.max_length)
-    train_dataset = train_dataset.map(tokenize_func)
-    val_dataset = val_dataset.map(tokenize_func)
+    preprocess_func = get_preprocess(args.template_type, tokenizer,
+                                     args.system, args.max_length)
+    train_dataset = train_dataset.map(preprocess_func)
+    val_dataset = val_dataset.map(preprocess_func)
     del dataset
     # Data analysis
     stat_dataset(train_dataset)
diff --git a/examples/pytorch/llm/src/utils/__init__.py b/examples/pytorch/llm/src/utils/__init__.py
index ec4a153178..7b349e67ca 100644
--- a/examples/pytorch/llm/src/utils/__init__.py
+++ b/examples/pytorch/llm/src/utils/__init__.py
@@ -1,5 +1,6 @@
 from .datasets import DATASET_MAPPING, get_dataset, process_dataset
 from .models import MODEL_MAPPING, get_model_tokenizer
-from .utils import (DEFAULT_PROMPT, broadcast_string, find_all_linear_for_lora,
+from .preprocess import TEMPLATE_MAPPING, get_preprocess
+from .utils import (broadcast_string, find_all_linear_for_lora,
                     get_dist_setting, inference, is_dist, plot_images,
                     select_bnb, select_dtype, show_layers)
diff --git a/examples/pytorch/llm/src/utils/models.py b/examples/pytorch/llm/src/utils/models.py
index b76f46433c..9ffcf8aa8e 100644
--- a/examples/pytorch/llm/src/utils/models.py
+++ b/examples/pytorch/llm/src/utils/models.py
@@ -6,8 +6,6 @@
 import torch
 from modelscope import (AutoConfig, AutoModelForCausalLM, AutoTokenizer, Model,
                         read_config, snapshot_download)
-from modelscope.models.nlp.chatglm2 import ChatGLM2Config, ChatGLM2Tokenizer
-from modelscope.models.nlp.llama2 import Llama2Config, Llama2Tokenizer
 from torch import dtype as Dtype
 
 from swift import get_logger
@@ -97,9 +95,8 @@ def get_model_tokenizer_chatglm2(model_dir: str,
         model_kwargs['quantization_config'].llm_int8_skip_modules = [
             'output_layer'
         ]
-    return get_model_tokenizer_from_sdk(ChatGLM2Config, ChatGLM2Tokenizer,
-                                        model_dir, torch_dtype, load_model,
-                                        **model_kwargs)
+    return get_model_tokenizer_from_repo(model_dir, torch_dtype, load_model,
+                                         **model_kwargs)
 
 
 def get_model_tokenizer_llama2(model_dir: str,
@@ -109,9 +106,8 @@ def get_model_tokenizer_llama2(model_dir: str,
     model_config = AutoConfig.from_pretrained(
         model_dir, trust_remote_code=True)
     model_config.pretraining_tp = 1
-    return get_model_tokenizer_from_sdk(Llama2Config, Llama2Tokenizer,
-                                        model_dir, torch_dtype, load_model,
-                                        model_config, **model_kwargs)
+    return get_model_tokenizer_from_repo(model_dir, torch_dtype, load_model,
+                                         model_config, **model_kwargs)
 
 
 def get_model_tokenizer_polylm(model_dir: str,
@@ -166,65 +162,79 @@ class LoRATM(NamedTuple):
 #   'ignore_file_pattern', 'special_token_mapper', 'lora_TM'
 MODEL_MAPPING = {
     'qwen-7b': {
-        'model_id': 'qwen/Qwen-7B',
+        'model_id': 'qwen/Qwen-7B',  # model id or model dir
         'revision': 'v.1.0.4',
         'get_function': get_model_tokenizer_qwen,
+        'template': 'chatml',
+        'lora_TM': LoRATM.qwen,
+    },
+    'qwen-7b-chat': {
+        'model_id': 'qwen/Qwen-7B-Chat',
+        'revision': 'v1.0.5',
+        'get_function': get_model_tokenizer_qwen,
+        'template': 'chatml',
         'lora_TM': LoRATM.qwen,
-        'special_token_mapper': {
-            'eos_token': '<|endoftext|>'
-        }
     },
     'baichuan-7b': {
-        'model_id': 'baichuan-inc/baichuan-7B',  # model id or model dir
+        'model_id': 'baichuan-inc/baichuan-7B',
         'revision': 'v1.0.7',
-        'lora_TM': LoRATM.baichuan
+        'template': 'baichuan',
+        'lora_TM': LoRATM.baichuan,
     },
     'baichuan-13b': {
         'model_id': 'baichuan-inc/Baichuan-13B-Base',
         'revision': 'v1.0.5',
         'get_function': get_model_tokenizer_baichuan13b,
-        'lora_TM': LoRATM.baichuan
+        'template': 'baichuan',
+        'lora_TM': LoRATM.baichuan,
     },
     'chatglm2-6b': {
         'model_id': 'ZhipuAI/chatglm2-6b',
-        'revision': 'v1.0.7',
+        'revision': 'v1.0.8',
         'get_function': get_model_tokenizer_chatglm2,
-        'lora_TM': LoRATM.chatglm2
+        'template': 'chatglm2',
+        'lora_TM': LoRATM.chatglm2,
     },
     'chatglm2-6b-32k': {
         'model_id': 'ZhipuAI/chatglm2-6b-32k',
         'revision': 'v1.0.0',
-        'lora_TM': LoRATM.chatglm2
+        'template': 'chatglm2',
+        'lora_TM': LoRATM.chatglm2,
     },
     'llama2-7b': {
         'model_id': 'modelscope/Llama-2-7b-ms',
         'revision': 'v1.0.2',
         'get_function': get_model_tokenizer_llama2,
+        'template': 'llama',
         'ignore_file_pattern': [r'.+\.bin$'],  # use safetensors
-        'lora_TM': LoRATM.llama2
+        'lora_TM': LoRATM.llama2,
     },
     'llama2-13b': {
         'model_id': 'modelscope/Llama-2-13b-ms',
         'revision': 'v1.0.2',
         'get_function': get_model_tokenizer_llama2,
+        'template': 'llama',
         'ignore_file_pattern': [r'.+\.bin$'],
-        'lora_TM': LoRATM.llama2
+        'lora_TM': LoRATM.llama2,
     },
     'llama2-70b': {
         'model_id': 'modelscope/Llama-2-70b-ms',
         'revision': 'v1.0.0',
         'get_function': get_model_tokenizer_llama2,
+        'template': 'llama',
         'ignore_file_pattern': [r'.+\.bin$'],
-        'lora_TM': LoRATM.llama2
+        'lora_TM': LoRATM.llama2,
     },
     'openbuddy-llama2-13b': {
         'model_id': 'OpenBuddy/openbuddy-llama2-13b-v8.1-fp16',
         'revision': 'v1.0.0',
+        'template': 'openbuddy_llama',
         'lora_TM': LoRATM.llama2,
     },
     'openbuddy-llama-65b': {
         'model_id': 'OpenBuddy/openbuddy-llama-65b-v8-bf16',
         'revision': 'v1.0.0',
+        'template': 'openbuddy_llama',
         'lora_TM': LoRATM.llama2,
     },
     'polylm-13b': {
diff --git a/examples/pytorch/llm/src/utils/preprocess.py b/examples/pytorch/llm/src/utils/preprocess.py
new file mode 100644
index 0000000000..9ea4040ca4
--- /dev/null
+++ b/examples/pytorch/llm/src/utils/preprocess.py
@@ -0,0 +1,181 @@
+from typing import Any, Callable, Dict, List, Optional, Tuple, Union
+
+from transformers import PreTrainedTokenizer
+DEFAULT_SYSTEM = 'you are a helpful assistant!'
+
+TEMPLATE_MAPPING = {
+    'default': {
+        'prefix': ['{{system}}\n\n'],
+        'prompt': ['### Human:\n', '{{query}}\n\n', '### Assistant:\n'],
+        'chat_sep': ['\n\n'],
+        'suffix': [['eos_token_id']],
+    },
+    'chatml': {
+        'prefix': [['im_start_id'], 'system\n{{system}}', ['im_end_id'], '\n'],
+        'prompt': [['im_start_id'], 'user\n{{query}}', ['im_end_id'], '\n',
+                   ['im_start_id'], 'assistant\n'],
+        'chat_sep': [
+            ['im_end_id'],
+            '\n',
+        ],
+        'suffix': [['im_end_id'], ['eod_id']],
+    },
+    'baichuan': {
+        'prefix': [],
+        'prompt': [[195], '{{query}}', [196]],
+        'chat_sep': [],
+        'suffix': [['eos_token_id']],
+    },
+    'chatglm2':{
+        'prefix': [[64790, 64792]],
+        'prompt': [
+            '[Round {{round}}]\n\n问：{{query}}\n\n答：'
+        ],
+        'chat_sep': ['\n\n'],
+        'suffix': [['eos_token_id']],    
+    },
+    'llama': {
+        'prefix': [
+            ['bos_token_id'],
+            '[INST] <<SYS>>\n{{system}}\n<</SYS>>\n\n'
+        ],
+        'prompt': [
+            '{{query}} [/INST] '
+        ],
+        'chat_sep': [
+            ' ', ['eos_token_id', 'bos_token_id'], '[INST] '
+        ],
+        'suffix': [['eos_token_id']], 
+    },
+    'openbuddy_llama': {
+        'prefix': ['{{system}}\n\n'],
+        'prompt': [
+            'User: {{query}}\nAssistant: '
+        ],
+        'chat_sep': ['\n'],
+        'suffix': [['eos_token_id']],         
+    }
+}
+Context = Union[str, List[int]]
+
+def simplify_context_list(
+        context_list: List[Context]
+) -> List[Context]:
+    res = []
+    temp = []
+    for c in context_list:
+        if isinstance(c, str):
+            temp.append(c)
+        else:
+            if len(temp) > 0:
+                res.append(''.join(temp))
+                temp.clear()
+            res.append(c)
+    if len(temp) > 0:
+        res.append(''.join(temp))
+    if len(res) > 0 and isinstance(res[-1], str):
+        # avoid two spaces
+        res[-1] = res[-1].rstrip(' ')
+    return res
+
+
+def concat_context_list(
+        context_list: List[Context],
+        new_context_list: List[Context],
+        placeholder_list: List[str],
+        system: Optional[str] = None,
+        query: Optional[str] = None,
+        round: Optional[str] = None,
+) -> None:
+    for context in context_list:
+        if isinstance(context, str):
+            for old_str, new_str in zip(['{{system}}', '{{query}}', '{{round}}'],
+                                        [system, query, round]):
+                if new_str is not None and old_str in context:
+                    placeholder_list.append(new_str)
+        new_context_list.append(context)
+
+def _encode(tokenizer: PreTrainedTokenizer,
+            context_list: List[Context],
+            placeholder_list: List[str]) -> List[int]:
+    input_ids = []
+    placeholder_it = iter(placeholder_list)
+    for context in context_list:
+        if isinstance(context, list):
+            for c in context:
+                if isinstance(c, str):
+                    token = getattr(tokenizer, c)
+                    assert token is not None
+                else:
+                    token = c
+                input_ids.append(token)
+        elif isinstance(context, str):
+            for old_str in ['{{system}}', '{{query}}', '{{round}}']:
+                if old_str in context:
+                    new_str = next(placeholder_it)
+                    context = context.replace(old_str, new_str)
+            input_ids += tokenizer(
+                context, return_attention_mask=False,
+                add_special_tokens=False)['input_ids']
+    return input_ids
+
+
+def _preprocess(
+    template_type: str,
+    tokenizer: PreTrainedTokenizer,
+    query: str,
+    response: Optional[str] = None,
+    history: Optional[List[Tuple[str, str]]] = None,
+    system: Optional[str] = None,
+    max_length: Optional[int] = None,
+) -> Dict[str, List[int]]:
+    if history is None:
+        history = []
+
+    template_config = TEMPLATE_MAPPING[template_type]
+    if system is None:
+        system = DEFAULT_SYSTEM
+    total_context_list = []
+    placeholder_list = []
+    concat_context_list(template_config['prefix'], total_context_list, 
+                        placeholder_list, system=system)
+    for i, (q, r) in enumerate(history):
+        concat_context_list(
+            [*template_config['prompt'], r, *template_config['chat_sep']],
+            total_context_list, placeholder_list,
+            query=q, round=str(i+1))
+    concat_context_list(template_config['prompt'], total_context_list, placeholder_list,
+                         query=query, round=str(len(history)+1))
+    total_context_list = simplify_context_list(total_context_list)
+    input_ids = _encode(tokenizer, total_context_list, placeholder_list)
+
+    labels = None
+    if response is not None:
+        labels = [-100] * len(input_ids)
+        tgt_input_ids = _encode(tokenizer, [response], [])
+        tgt_input_ids += _encode(tokenizer, template_config['suffix'], [])
+        input_ids += tgt_input_ids
+        labels += tgt_input_ids
+
+    if max_length is not None:
+        input_ids = input_ids[-max_length:]
+        if labels is not None:
+            labels = labels[-max_length:]
+
+    return {'input_ids': input_ids, 'labels': labels}
+
+
+def get_preprocess(
+        template_type: str, tokenizer: PreTrainedTokenizer,
+        system: Optional[str]=None, max_length: Optional[int]=None
+) -> Callable[[Dict[str, Any]], Dict[str, List[int]]]:
+
+    def preprocess(examples: Dict[str, Any]) -> Dict[str, List[int]]:
+        history = examples['history']
+        query = history[-1][0]
+        response = history[-1][1]
+        history = history[:-1]
+        return _preprocess(
+            template_type, tokenizer, query, response, history, system, max_length)
+
+    return preprocess
diff --git a/examples/pytorch/llm/src/utils/utils.py b/examples/pytorch/llm/src/utils/utils.py
index 9bf6aaec14..4d34180394 100644
--- a/examples/pytorch/llm/src/utils/utils.py
+++ b/examples/pytorch/llm/src/utils/utils.py
@@ -15,17 +15,6 @@
 os.environ['TOKENIZERS_PARALLELISM'] = 'true'
 logger = get_logger()
 
-# The `output` section will be concatenated at the end
-# `prompt` part does not calculate the loss, `output` part calculates the loss
-DEFAULT_PROMPT = """Here's a conversation between a human and an AI assistant. \
-The AI assistant provides detailed, friendly answers for the human.
-
-### Human:
-{instruction}
-
-### AI:
-"""
-
 DTYPE_MAPPING = {
     'fp16': torch.float16,
     'bf16': torch.bfloat16,
diff --git a/swift/utils/llm_utils.py b/swift/utils/llm_utils.py
index cfcf787253..3ae6e3aca7 100644
--- a/swift/utils/llm_utils.py
+++ b/swift/utils/llm_utils.py
@@ -27,38 +27,6 @@ def stat_dataset(dataset: HfDataset) -> None:
     )
 
 
-def tokenize_function(example: Dict[str,
-                                    Optional[str]], tokenizer, prompt: str,
-                      max_length: Optional[int]) -> Dict[str, Any]:
-    instruction: str = example['instruction']
-    output = example.get('output')
-    src_text = prompt.format(instruction=instruction)
-    src_input_ids: List[int] = tokenizer(
-        src_text, return_attention_mask=False,
-        add_special_tokens=True)['input_ids']
-    if src_input_ids[-1] == tokenizer.eos_token_id:
-        src_input_ids.pop()
-
-    tgt_input_ids = []
-    if output is not None:
-        assert tokenizer.eos_token_id is not None
-        tgt_input_ids += tokenizer(
-            output, return_attention_mask=False,
-            add_special_tokens=False)['input_ids']
-        tgt_input_ids += [tokenizer.eos_token_id]
-        labels = [-100] * len(src_input_ids) + tgt_input_ids
-    else:
-        labels = None
-    input_ids = src_input_ids + tgt_input_ids
-
-    if max_length is not None:
-        input_ids = input_ids[-max_length:]
-        if labels is not None:
-            labels = labels[-max_length:]
-
-    return {'input_ids': input_ids, 'labels': labels}
-
-
 def data_collate_fn(batch: List[Dict[str, Any]], tokenizer) -> Dict[str, Any]:
     assert tokenizer.pad_token_id is not None
     input_ids = [torch.tensor(b['input_ids']) for b in batch]

From d5df8dbf0998ccc21a32cc216009f322304505f1 Mon Sep 17 00:00:00 2001
From: Jintao Huang <huangjintao.hjt@alibaba-inc.com>
Date: Sun, 20 Aug 2023 15:59:33 +0800
Subject: [PATCH 2/9] update sh

---
 .../{qwen_7b => qwen_7b_chat}/full/infer.sh   |  4 +--
 .../{qwen_7b => qwen_7b_chat}/full/sft.sh     |  4 +--
 .../lora_ddp/infer.sh                         |  4 +--
 .../{qwen_7b => qwen_7b_chat}/lora_ddp/sft.sh |  4 +--
 .../{qwen_7b => qwen_7b_chat}/qlora/infer.sh  |  4 +--
 .../{qwen_7b => qwen_7b_chat}/qlora/sft.sh    |  4 +--
 .../scripts/qwen_7b_chat/qlora_ddp/infer.sh   | 14 ++++++++
 .../llm/scripts/qwen_7b_chat/qlora_ddp/sft.sh | 36 +++++++++++++++++++
 .../llm/src/utils/{datasets.py => dataset.py} |  0
 .../llm/src/utils/{models.py => model.py}     |  0
 10 files changed, 62 insertions(+), 12 deletions(-)
 rename examples/pytorch/llm/scripts/{qwen_7b => qwen_7b_chat}/full/infer.sh (71%)
 rename examples/pytorch/llm/scripts/{qwen_7b => qwen_7b_chat}/full/sft.sh (90%)
 rename examples/pytorch/llm/scripts/{qwen_7b => qwen_7b_chat}/lora_ddp/infer.sh (71%)
 rename examples/pytorch/llm/scripts/{qwen_7b => qwen_7b_chat}/lora_ddp/sft.sh (92%)
 rename examples/pytorch/llm/scripts/{qwen_7b => qwen_7b_chat}/qlora/infer.sh (74%)
 rename examples/pytorch/llm/scripts/{qwen_7b => qwen_7b_chat}/qlora/sft.sh (91%)
 create mode 100644 examples/pytorch/llm/scripts/qwen_7b_chat/qlora_ddp/infer.sh
 create mode 100644 examples/pytorch/llm/scripts/qwen_7b_chat/qlora_ddp/sft.sh
 rename examples/pytorch/llm/src/utils/{datasets.py => dataset.py} (100%)
 rename examples/pytorch/llm/src/utils/{models.py => model.py} (100%)

diff --git a/examples/pytorch/llm/scripts/qwen_7b/full/infer.sh b/examples/pytorch/llm/scripts/qwen_7b_chat/full/infer.sh
similarity index 71%
rename from examples/pytorch/llm/scripts/qwen_7b/full/infer.sh
rename to examples/pytorch/llm/scripts/qwen_7b_chat/full/infer.sh
index 6a049dccd1..685d917b57 100644
--- a/examples/pytorch/llm/scripts/qwen_7b/full/infer.sh
+++ b/examples/pytorch/llm/scripts/qwen_7b_chat/full/infer.sh
@@ -1,10 +1,10 @@
 # 19G
 CUDA_VISIBLE_DEVICES=0 \
 python src/llm_infer.py \
-    --model_type qwen-7b \
+    --model_type qwen-7b-chat \
     --sft_type full \
     --dtype bf16 \
-    --ckpt_dir "runs/qwen-7b/vx_xxx/checkpoint-xxx" \
+    --ckpt_dir "runs/qwen-7b-chat/vx_xxx/checkpoint-xxx" \
     --eval_human true \
     --max_new_tokens 1024 \
     --temperature 0.9 \
diff --git a/examples/pytorch/llm/scripts/qwen_7b/full/sft.sh b/examples/pytorch/llm/scripts/qwen_7b_chat/full/sft.sh
similarity index 90%
rename from examples/pytorch/llm/scripts/qwen_7b/full/sft.sh
rename to examples/pytorch/llm/scripts/qwen_7b_chat/full/sft.sh
index 62b0d6c75b..8b4e1f3fa9 100644
--- a/examples/pytorch/llm/scripts/qwen_7b/full/sft.sh
+++ b/examples/pytorch/llm/scripts/qwen_7b_chat/full/sft.sh
@@ -2,7 +2,7 @@
 # Experimental environment: 8 * 3090
 CUDA_VISIBLE_DEVICES=0,1,2,3,4,5 \
 python src/llm_sft.py \
-    --model_type qwen-7b \
+    --model_type qwen-7b-chat \
     --sft_type full \
     --dtype bf16 \
     --output_dir runs \
@@ -22,6 +22,6 @@ python src/llm_sft.py \
     --logging_steps 10 \
     --use_flash_attn false \
     --push_to_hub false \
-    --hub_model_id qwen-7b-full \
+    --hub_model_id qwen-7b-chat-full \
     --hub_private_repo true \
     --hub_token 'your-sdk-token' \
diff --git a/examples/pytorch/llm/scripts/qwen_7b/lora_ddp/infer.sh b/examples/pytorch/llm/scripts/qwen_7b_chat/lora_ddp/infer.sh
similarity index 71%
rename from examples/pytorch/llm/scripts/qwen_7b/lora_ddp/infer.sh
rename to examples/pytorch/llm/scripts/qwen_7b_chat/lora_ddp/infer.sh
index 14bfb01309..6b933c92b1 100644
--- a/examples/pytorch/llm/scripts/qwen_7b/lora_ddp/infer.sh
+++ b/examples/pytorch/llm/scripts/qwen_7b_chat/lora_ddp/infer.sh
@@ -1,10 +1,10 @@
 # 19G
 CUDA_VISIBLE_DEVICES=0 \
 python src/llm_infer.py \
-    --model_type qwen-7b \
+    --model_type qwen-7b-chat \
     --sft_type lora \
     --dtype bf16 \
-    --ckpt_dir "runs/qwen-7b/vx_xxx/checkpoint-xxx" \
+    --ckpt_dir "runs/qwen-7b-chat/vx_xxx/checkpoint-xxx" \
     --eval_human true \
     --max_new_tokens 1024 \
     --temperature 0.9 \
diff --git a/examples/pytorch/llm/scripts/qwen_7b/lora_ddp/sft.sh b/examples/pytorch/llm/scripts/qwen_7b_chat/lora_ddp/sft.sh
similarity index 92%
rename from examples/pytorch/llm/scripts/qwen_7b/lora_ddp/sft.sh
rename to examples/pytorch/llm/scripts/qwen_7b_chat/lora_ddp/sft.sh
index f414fe2164..30df447cf9 100644
--- a/examples/pytorch/llm/scripts/qwen_7b/lora_ddp/sft.sh
+++ b/examples/pytorch/llm/scripts/qwen_7b_chat/lora_ddp/sft.sh
@@ -5,7 +5,7 @@ torchrun \
     --nproc_per_node=$nproc_per_node \
     --master_port 29500 \
     src/llm_sft.py \
-    --model_type qwen-7b \
+    --model_type qwen-7b-chat \
     --sft_type lora \
     --dtype bf16 \
     --output_dir runs \
@@ -29,6 +29,6 @@ torchrun \
     --logging_steps 10 \
     --use_flash_attn false \
     --push_to_hub false \
-    --hub_model_id qwen-7b-lora \
+    --hub_model_id qwen-7b-chat-lora \
     --hub_private_repo true \
     --hub_token 'your-sdk-token' \
diff --git a/examples/pytorch/llm/scripts/qwen_7b/qlora/infer.sh b/examples/pytorch/llm/scripts/qwen_7b_chat/qlora/infer.sh
similarity index 74%
rename from examples/pytorch/llm/scripts/qwen_7b/qlora/infer.sh
rename to examples/pytorch/llm/scripts/qwen_7b_chat/qlora/infer.sh
index b8e35a36b9..51c71c37ae 100644
--- a/examples/pytorch/llm/scripts/qwen_7b/qlora/infer.sh
+++ b/examples/pytorch/llm/scripts/qwen_7b_chat/qlora/infer.sh
@@ -1,10 +1,10 @@
 # 10G
 CUDA_VISIBLE_DEVICES=0 \
 python src/llm_infer.py \
-    --model_type qwen-7b \
+    --model_type qwen-7b-chat \
     --sft_type lora \
     --dtype bf16 \
-    --ckpt_dir "runs/qwen-7b/vx_xxx/checkpoint-xxx" \
+    --ckpt_dir "runs/qwen-7b-chat/vx_xxx/checkpoint-xxx" \
     --eval_human true \
     --quantization_bit 4 \
     --max_new_tokens 1024 \
diff --git a/examples/pytorch/llm/scripts/qwen_7b/qlora/sft.sh b/examples/pytorch/llm/scripts/qwen_7b_chat/qlora/sft.sh
similarity index 91%
rename from examples/pytorch/llm/scripts/qwen_7b/qlora/sft.sh
rename to examples/pytorch/llm/scripts/qwen_7b_chat/qlora/sft.sh
index 1da754a352..ac1240c78e 100644
--- a/examples/pytorch/llm/scripts/qwen_7b/qlora/sft.sh
+++ b/examples/pytorch/llm/scripts/qwen_7b_chat/qlora/sft.sh
@@ -1,7 +1,7 @@
 # 16GB VRAM
 CUDA_VISIBLE_DEVICES=0 \
 python src/llm_sft.py \
-    --model_type qwen-7b \
+    --model_type qwen-7b-chat \
     --sft_type lora \
     --dtype bf16 \
     --output_dir runs \
@@ -26,6 +26,6 @@ python src/llm_sft.py \
     --logging_steps 10 \
     --use_flash_attn false \
     --push_to_hub false \
-    --hub_model_id qwen-7b-qlora \
+    --hub_model_id qwen-7b-chat-qlora \
     --hub_private_repo true \
     --hub_token 'your-sdk-token' \
diff --git a/examples/pytorch/llm/scripts/qwen_7b_chat/qlora_ddp/infer.sh b/examples/pytorch/llm/scripts/qwen_7b_chat/qlora_ddp/infer.sh
new file mode 100644
index 0000000000..51c71c37ae
--- /dev/null
+++ b/examples/pytorch/llm/scripts/qwen_7b_chat/qlora_ddp/infer.sh
@@ -0,0 +1,14 @@
+# 10G
+CUDA_VISIBLE_DEVICES=0 \
+python src/llm_infer.py \
+    --model_type qwen-7b-chat \
+    --sft_type lora \
+    --dtype bf16 \
+    --ckpt_dir "runs/qwen-7b-chat/vx_xxx/checkpoint-xxx" \
+    --eval_human true \
+    --quantization_bit 4 \
+    --max_new_tokens 1024 \
+    --temperature 0.9 \
+    --top_k 50 \
+    --top_p 0.9 \
+    --do_sample true \
diff --git a/examples/pytorch/llm/scripts/qwen_7b_chat/qlora_ddp/sft.sh b/examples/pytorch/llm/scripts/qwen_7b_chat/qlora_ddp/sft.sh
new file mode 100644
index 0000000000..7ca32a52ac
--- /dev/null
+++ b/examples/pytorch/llm/scripts/qwen_7b_chat/qlora_ddp/sft.sh
@@ -0,0 +1,36 @@
+# 4 * 16GB VRAM
+nproc_per_node=4
+CUDA_VISIBLE_DEVICES=0,1,2,3 \
+torchrun \
+    --nproc_per_node=$nproc_per_node \
+    --master_port 29500 \
+    src/llm_sft.py \
+    --model_type qwen-7b-chat \
+    --sft_type lora \
+    --dtype bf16 \
+    --output_dir runs \
+    --ddp_backend nccl \
+    --dataset alpaca-en,alpaca-zh \
+    --dataset_sample -1 \
+    --num_train_epochs 1 \
+    --max_length 1024 \
+    --quantization_bit 4 \
+    --lora_rank 64 \
+    --lora_alpha 32 \
+    --lora_dropout_p 0.05 \
+    --lora_target_modules ALL \
+    --batch_size 1 \
+    --weight_decay 0. \
+    --learning_rate 1e-4 \
+    --gradient_accumulation_steps $(expr 16 / $nproc_per_node) \
+    --max_grad_norm 0.5 \
+    --warmup_ratio 0.03 \
+    --eval_steps 50 \
+    --save_steps 50 \
+    --save_total_limit 2 \
+    --logging_steps 10 \
+    --use_flash_attn false \
+    --push_to_hub false \
+    --hub_model_id qwen-7b-chat-qlora \
+    --hub_private_repo true \
+    --hub_token 'your-sdk-token' \
diff --git a/examples/pytorch/llm/src/utils/datasets.py b/examples/pytorch/llm/src/utils/dataset.py
similarity index 100%
rename from examples/pytorch/llm/src/utils/datasets.py
rename to examples/pytorch/llm/src/utils/dataset.py
diff --git a/examples/pytorch/llm/src/utils/models.py b/examples/pytorch/llm/src/utils/model.py
similarity index 100%
rename from examples/pytorch/llm/src/utils/models.py
rename to examples/pytorch/llm/src/utils/model.py

From 9c9e0cd6ae39709f0dc8fbe859703520e7a4a05e Mon Sep 17 00:00:00 2001
From: Jintao Huang <huangjintao.hjt@alibaba-inc.com>
Date: Sun, 20 Aug 2023 16:02:13 +0800
Subject: [PATCH 3/9] fix bug

---
 examples/pytorch/llm/src/llm_infer.py        | 14 ++-
 examples/pytorch/llm/src/utils/__init__.py   |  4 +-
 examples/pytorch/llm/src/utils/dataset.py    |  5 +-
 examples/pytorch/llm/src/utils/model.py      | 27 +++---
 examples/pytorch/llm/src/utils/preprocess.py | 96 ++++++++++----------
 examples/pytorch/llm/src/utils/utils.py      |  5 +-
 6 files changed, 77 insertions(+), 74 deletions(-)

diff --git a/examples/pytorch/llm/src/llm_infer.py b/examples/pytorch/llm/src/llm_infer.py
index 6e8e651af5..126c569abf 100644
--- a/examples/pytorch/llm/src/llm_infer.py
+++ b/examples/pytorch/llm/src/llm_infer.py
@@ -53,6 +53,7 @@ class InferArguments:
     temperature: float = 0.9
     top_k: int = 50
     top_p: float = 0.9
+    skip_prompt: Optional[bool] = None
 
     def __post_init__(self):
         if not os.path.isdir(self.ckpt_dir):
@@ -67,6 +68,8 @@ def __post_init__(self):
             self.bnb_4bit_comp_dtype = self.dtype
         self.bnb_4bit_compute_dtype, self.load_in_4bit, self.load_in_8bit = select_bnb(
             self.quantization_bit, self.bnb_4bit_comp_dtype)
+        if self.skip_prompt is None:
+            self.skip_prompt = self.eval_human
 
 
 def llm_infer(args: InferArguments) -> None:
@@ -115,11 +118,11 @@ def llm_infer(args: InferArguments) -> None:
 
     if args.eval_human:
         while True:
-            instruction = input('<<< ')
-            data = {'instruction': instruction}
+            query = input('<<< ')
+            data = {'query': query}
             input_ids = preprocess_func(data)['input_ids']
-            inference(input_ids, model, tokenizer, streamer, generation_config)
-            print('-' * 80)
+            inference(input_ids, model, tokenizer, streamer, generation_config,
+                      args.skip_prompt)
     else:
         dataset = get_dataset(args.dataset.split(','))
         _, test_dataset = process_dataset(dataset, args.dataset_test_size,
@@ -131,7 +134,8 @@ def llm_infer(args: InferArguments) -> None:
             output = data['output']
             data['output'] = None
             input_ids = preprocess_func(data)['input_ids']
-            inference(input_ids, model, tokenizer, streamer, generation_config)
+            inference(input_ids, model, tokenizer, streamer, generation_config,
+                      args.skip_prompt)
             print()
             print(f'[LABELS]{output}')
             print('-' * 80)
diff --git a/examples/pytorch/llm/src/utils/__init__.py b/examples/pytorch/llm/src/utils/__init__.py
index 7b349e67ca..11b7941d90 100644
--- a/examples/pytorch/llm/src/utils/__init__.py
+++ b/examples/pytorch/llm/src/utils/__init__.py
@@ -1,5 +1,5 @@
-from .datasets import DATASET_MAPPING, get_dataset, process_dataset
-from .models import MODEL_MAPPING, get_model_tokenizer
+from .dataset import DATASET_MAPPING, get_dataset, process_dataset
+from .model import MODEL_MAPPING, get_model_tokenizer
 from .preprocess import TEMPLATE_MAPPING, get_preprocess
 from .utils import (broadcast_string, find_all_linear_for_lora,
                     get_dist_setting, inference, is_dist, plot_images,
diff --git a/examples/pytorch/llm/src/utils/dataset.py b/examples/pytorch/llm/src/utils/dataset.py
index e4a099147f..980da50f63 100644
--- a/examples/pytorch/llm/src/utils/dataset.py
+++ b/examples/pytorch/llm/src/utils/dataset.py
@@ -23,8 +23,9 @@ def _processing_alpaca(
         inst = f'{inst}\n{inp}'
         new_instruction.append(inst)
     dataset = HfDataset.from_dict({
-        'instruction': new_instruction,
-        'output': dataset['output']
+        'history': [None] * len(new_instruction),
+        'query': new_instruction,
+        'response': dataset['output']
     })
     return dataset
 
diff --git a/examples/pytorch/llm/src/utils/model.py b/examples/pytorch/llm/src/utils/model.py
index 9ffcf8aa8e..941e704940 100644
--- a/examples/pytorch/llm/src/utils/model.py
+++ b/examples/pytorch/llm/src/utils/model.py
@@ -1,7 +1,7 @@
 import os
 # os.environ['CUDA_VISIBLE_DEVICES'] = '0'
 from types import MethodType
-from typing import Any, Dict, NamedTuple, Optional
+from typing import NamedTuple, Optional
 
 import torch
 from modelscope import (AutoConfig, AutoModelForCausalLM, AutoTokenizer, Model,
@@ -13,15 +13,6 @@
 logger = get_logger()
 
 
-def _add_special_token(tokenizer, special_token_mapper: Dict[str,
-                                                             Any]) -> None:
-    for k, v in special_token_mapper.items():
-        setattr(tokenizer, k, v)
-    assert tokenizer.eos_token is not None
-    if tokenizer.pad_token is None:
-        tokenizer.pad_token = tokenizer.eos_token
-
-
 def get_model_tokenizer_from_repo(model_dir: str,
                                   torch_dtype: Dtype,
                                   load_model: bool = True,
@@ -144,8 +135,11 @@ def get_model_tokenizer_qwen(model_dir: str,
 
     use_flash_attn = kwargs.pop('use_flash_attn', 'auto')
     model_config.use_flash_attn = use_flash_attn
-    return get_model_tokenizer_from_repo(model_dir, torch_dtype, load_model,
-                                         model_config, **kwargs)
+    model, tokenizer = get_model_tokenizer_from_repo(model_dir, torch_dtype,
+                                                     load_model, model_config,
+                                                     **kwargs)
+    tokenizer.eos_token_id = tokenizer.eod_id
+    return model, tokenizer
 
 
 class LoRATM(NamedTuple):
@@ -158,8 +152,8 @@ class LoRATM(NamedTuple):
 
 
 # Model Home: 'https://modelscope.cn/models/{model_id}/summary'
-# keys: 'model_id', 'revision', 'get_function',
-#   'ignore_file_pattern', 'special_token_mapper', 'lora_TM'
+# keys: 'model_id', 'revision', 'get_function', 'template',
+#   'ignore_file_pattern', 'lora_TM'
 MODEL_MAPPING = {
     'qwen-7b': {
         'model_id': 'qwen/Qwen-7B',  # model id or model dir
@@ -257,7 +251,6 @@ def get_model_tokenizer(model_type: str,
     model_id = data['model_id']
     get_function = data.get('get_function', get_model_tokenizer_from_repo)
     ignore_file_pattern = data.get('ignore_file_pattern', [])
-    special_token_mapper = data.get('special_token_mapper', {})
     if torch_dtype is None:
         torch_dtype = data.get('torch_dtype', torch.float16)
     if 'device_map' not in kwargs:
@@ -273,5 +266,7 @@ def get_model_tokenizer(model_type: str,
 
     model, tokenizer = get_function(model_dir, torch_dtype, load_model,
                                     **kwargs)
-    _add_special_token(tokenizer, special_token_mapper)
+    assert tokenizer.eos_token is not None
+    if tokenizer.pad_token is None:
+        tokenizer.pad_token = tokenizer.eos_token
     return model, tokenizer
diff --git a/examples/pytorch/llm/src/utils/preprocess.py b/examples/pytorch/llm/src/utils/preprocess.py
index 9ea4040ca4..19403122c5 100644
--- a/examples/pytorch/llm/src/utils/preprocess.py
+++ b/examples/pytorch/llm/src/utils/preprocess.py
@@ -1,6 +1,7 @@
 from typing import Any, Callable, Dict, List, Optional, Tuple, Union
 
 from transformers import PreTrainedTokenizer
+
 DEFAULT_SYSTEM = 'you are a helpful assistant!'
 
 TEMPLATE_MAPPING = {
@@ -26,41 +27,30 @@
         'chat_sep': [],
         'suffix': [['eos_token_id']],
     },
-    'chatglm2':{
+    'chatglm2': {
         'prefix': [[64790, 64792]],
-        'prompt': [
-            '[Round {{round}}]\n\n问：{{query}}\n\n答：'
-        ],
+        'prompt': ['[Round {{round}}]\n\n问：{{query}}\n\n答：'],
         'chat_sep': ['\n\n'],
-        'suffix': [['eos_token_id']],    
+        'suffix': [['eos_token_id']],
     },
     'llama': {
-        'prefix': [
-            ['bos_token_id'],
-            '[INST] <<SYS>>\n{{system}}\n<</SYS>>\n\n'
-        ],
-        'prompt': [
-            '{{query}} [/INST] '
-        ],
-        'chat_sep': [
-            ' ', ['eos_token_id', 'bos_token_id'], '[INST] '
-        ],
-        'suffix': [['eos_token_id']], 
+        'prefix': [['bos_token_id'],
+                   '[INST] <<SYS>>\n{{system}}\n<</SYS>>\n\n'],
+        'prompt': ['{{query}} [/INST] '],
+        'chat_sep': [' ', ['eos_token_id', 'bos_token_id'], '[INST] '],
+        'suffix': [['eos_token_id']],
     },
     'openbuddy_llama': {
         'prefix': ['{{system}}\n\n'],
-        'prompt': [
-            'User: {{query}}\nAssistant: '
-        ],
+        'prompt': ['User: {{query}}\nAssistant: '],
         'chat_sep': ['\n'],
-        'suffix': [['eos_token_id']],         
+        'suffix': [['eos_token_id']],
     }
 }
 Context = Union[str, List[int]]
 
-def simplify_context_list(
-        context_list: List[Context]
-) -> List[Context]:
+
+def simplify_context_list(context_list: List[Context]) -> List[Context]:
     res = []
     temp = []
     for c in context_list:
@@ -80,23 +70,24 @@ def simplify_context_list(
 
 
 def concat_context_list(
-        context_list: List[Context],
-        new_context_list: List[Context],
-        placeholder_list: List[str],
-        system: Optional[str] = None,
-        query: Optional[str] = None,
-        round: Optional[str] = None,
+    context_list: List[Context],
+    new_context_list: List[Context],
+    placeholder_list: List[str],
+    system: Optional[str] = None,
+    query: Optional[str] = None,
+    round: Optional[str] = None,
 ) -> None:
     for context in context_list:
         if isinstance(context, str):
-            for old_str, new_str in zip(['{{system}}', '{{query}}', '{{round}}'],
-                                        [system, query, round]):
+            for old_str, new_str in zip(
+                ['{{system}}', '{{query}}', '{{round}}'],
+                [system, query, round]):
                 if new_str is not None and old_str in context:
                     placeholder_list.append(new_str)
         new_context_list.append(context)
 
-def _encode(tokenizer: PreTrainedTokenizer,
-            context_list: List[Context],
+
+def _encode(tokenizer: PreTrainedTokenizer, context_list: List[Context],
             placeholder_list: List[str]) -> List[int]:
     input_ids = []
     placeholder_it = iter(placeholder_list)
@@ -137,15 +128,24 @@ def _preprocess(
         system = DEFAULT_SYSTEM
     total_context_list = []
     placeholder_list = []
-    concat_context_list(template_config['prefix'], total_context_list, 
-                        placeholder_list, system=system)
+    concat_context_list(
+        template_config['prefix'],
+        total_context_list,
+        placeholder_list,
+        system=system)
     for i, (q, r) in enumerate(history):
         concat_context_list(
             [*template_config['prompt'], r, *template_config['chat_sep']],
-            total_context_list, placeholder_list,
-            query=q, round=str(i+1))
-    concat_context_list(template_config['prompt'], total_context_list, placeholder_list,
-                         query=query, round=str(len(history)+1))
+            total_context_list,
+            placeholder_list,
+            query=q,
+            round=str(i + 1))
+    concat_context_list(
+        template_config['prompt'],
+        total_context_list,
+        placeholder_list,
+        query=query,
+        round=str(len(history) + 1))
     total_context_list = simplify_context_list(total_context_list)
     input_ids = _encode(tokenizer, total_context_list, placeholder_list)
 
@@ -166,16 +166,18 @@ def _preprocess(
 
 
 def get_preprocess(
-        template_type: str, tokenizer: PreTrainedTokenizer,
-        system: Optional[str]=None, max_length: Optional[int]=None
+    template_type: str,
+    tokenizer: PreTrainedTokenizer,
+    system: Optional[str] = None,
+    max_length: Optional[int] = None
 ) -> Callable[[Dict[str, Any]], Dict[str, List[int]]]:
 
     def preprocess(examples: Dict[str, Any]) -> Dict[str, List[int]]:
-        history = examples['history']
-        query = history[-1][0]
-        response = history[-1][1]
-        history = history[:-1]
-        return _preprocess(
-            template_type, tokenizer, query, response, history, system, max_length)
+        history: Optional[List[Tuple[str,
+                                     str]]] = examples.get('history', None)
+        query: str = examples['query']
+        response: str = examples.get('response', None)
+        return _preprocess(template_type, tokenizer, query, response, history,
+                           system, max_length)
 
     return preprocess
diff --git a/examples/pytorch/llm/src/utils/utils.py b/examples/pytorch/llm/src/utils/utils.py
index 4d34180394..b279ac81f2 100644
--- a/examples/pytorch/llm/src/utils/utils.py
+++ b/examples/pytorch/llm/src/utils/utils.py
@@ -88,8 +88,9 @@ def inference(input_ids: List[int],
               tokenizer,
               streamer: Optional[TextStreamer] = None,
               generation_config: Optional[GenerationConfig] = None,
-              tag: str = '[INFERENCE]') -> str:
-    print(f'{tag}{tokenizer.decode(input_ids)}', end='')
+              skip_prompt: bool = True) -> str:
+    if not skip_prompt:
+        print(f'[INFERENCE]{tokenizer.decode(input_ids)}', end='')
     input_ids = torch.tensor(input_ids)[None].cuda()
     attention_mask = torch.ones_like(input_ids)
     model.eval()

From 29e6eb407452412eb14de9ad0083d2f47ac1e4ac Mon Sep 17 00:00:00 2001
From: Jintao Huang <huangjintao.hjt@alibaba-inc.com>
Date: Sun, 20 Aug 2023 17:15:26 +0800
Subject: [PATCH 4/9] update baichuan-13b-chat, llama2-{7,13,70}b-chat

---
 examples/pytorch/llm/README.md                |  2 +-
 examples/pytorch/llm/README_CN.md             |  2 +-
 .../scripts/baichuan_13b/qlora_ddp/infer.sh   |  4 +-
 .../llm/scripts/baichuan_13b/qlora_ddp/sft.sh |  2 +-
 .../llm/scripts/llama2_70b/qlora/infer.sh     |  4 +-
 .../llm/scripts/llama2_70b/qlora/sft.sh       |  4 +-
 examples/pytorch/llm/src/utils/model.py       | 37 +++++++++++++++++--
 7 files changed, 42 insertions(+), 13 deletions(-)

diff --git a/examples/pytorch/llm/README.md b/examples/pytorch/llm/README.md
index 2605d841de..8dea971980 100644
--- a/examples/pytorch/llm/README.md
+++ b/examples/pytorch/llm/README.md
@@ -16,7 +16,7 @@
 
 ## Features
 1. supported sft method: [lora](https://arxiv.org/abs/2106.09685), [qlora](https://arxiv.org/abs/2305.14314), full(full parameter fine tuning), ...
-2. supported models: [**qwen-7b**](https://github.com/QwenLM/Qwen-7B), baichuan-7b, baichuan-13b, chatglm2-6b, chatglm2-6b-32k, llama2-7b, llama2-13b, llama2-70b, openbuddy-llama2-13b, openbuddy-llama-65b, polylm-13b, ...
+2. supported models: [**qwen-7b**](https://github.com/QwenLM/Qwen-7B), qwen-7b-chat, baichuan-7b, baichuan-13b, baichuan-13b-chat, chatglm2-6b, chatglm2-6b-32k, llama2-7b, llama2-7b-chat, llama2-13b, llama2-13b-chat, llama2-70b, llama2-70b-chat, openbuddy-llama2-13b, openbuddy-llama-65b, polylm-13b, ...
 3. supported feature: quantization, ddp, model parallelism(device map), gradient checkpoint, gradient accumulation steps, push to modelscope hub, custom datasets, ...
 4. supported datasets: alpaca-en(gpt4), alpaca-zh(gpt4), finance-en, multi-alpaca-all, code-en, instinwild-en, instinwild-zh, ...
 5. supported templates: chatml(qwen), baichuan, chatglm2, llama, openbuddy_llama, default, ...
diff --git a/examples/pytorch/llm/README_CN.md b/examples/pytorch/llm/README_CN.md
index 5f40d7f06c..a5d5235c6e 100644
--- a/examples/pytorch/llm/README_CN.md
+++ b/examples/pytorch/llm/README_CN.md
@@ -17,7 +17,7 @@
 
 ## 特性
 1. [lora](https://arxiv.org/abs/2106.09685), [qlora](https://arxiv.org/abs/2305.14314), 全参数微调, ...
-2. 支持的模型: [**qwen-7b**](https://github.com/QwenLM/Qwen-7B), baichuan-7b, baichuan-13b, chatglm2-6b, chatglm2-6b-32k, llama2-7b, llama2-13b, llama2-70b, openbuddy-llama2-13b, openbuddy-llama-65b, polylm-13b, ...
+2. 支持的模型: [**qwen-7b**](https://github.com/QwenLM/Qwen-7B), qwen-7b-chat, baichuan-7b, baichuan-13b, baichuan-13b-chat, chatglm2-6b, chatglm2-6b-32k, llama2-7b, llama2-7b-chat, llama2-13b, llama2-13b-chat, llama2-70b, llama2-70b-chat, openbuddy-llama2-13b, openbuddy-llama-65b, polylm-13b, ...
 3. 支持的特性: 模型量化, DDP, 模型并行(device_map), gradient checkpoint, 梯度累加, 支持推送modelscope hub, 支持自定义数据集, ...
 4. 支持的数据集: alpaca-en(gpt4), alpaca-zh(gpt4), finance-en, multi-alpaca-all, code-en, instinwild-en, instinwild-zh, ...
 5. 支持的template: chatml(qwen), baichuan, chatglm2, llama, openbuddy_llama, default, ...
diff --git a/examples/pytorch/llm/scripts/baichuan_13b/qlora_ddp/infer.sh b/examples/pytorch/llm/scripts/baichuan_13b/qlora_ddp/infer.sh
index 13c65c2d2d..b960909975 100644
--- a/examples/pytorch/llm/scripts/baichuan_13b/qlora_ddp/infer.sh
+++ b/examples/pytorch/llm/scripts/baichuan_13b/qlora_ddp/infer.sh
@@ -1,9 +1,9 @@
 # 12G
 CUDA_VISIBLE_DEVICES=0 \
 python src/llm_infer.py \
-    --model_type baichuan-13b \
+    --model_type baichuan-13b-chat \
     --sft_type lora \
-    --ckpt_dir "runs/baichuan-13b/vx_xxx/checkpoint-xxx" \
+    --ckpt_dir "runs/baichuan-13b-chat/vx_xxx/checkpoint-xxx" \
     --eval_human true \
     --quantization_bit 4 \
     --max_new_tokens 1024 \
diff --git a/examples/pytorch/llm/scripts/baichuan_13b/qlora_ddp/sft.sh b/examples/pytorch/llm/scripts/baichuan_13b/qlora_ddp/sft.sh
index 9493d7645a..b90aa26c02 100644
--- a/examples/pytorch/llm/scripts/baichuan_13b/qlora_ddp/sft.sh
+++ b/examples/pytorch/llm/scripts/baichuan_13b/qlora_ddp/sft.sh
@@ -5,7 +5,7 @@ torchrun \
     --nproc_per_node=$nproc_per_node \
     --master_port 29500 \
     src/llm_sft.py \
-    --model_type baichuan-13b \
+    --model_type baichuan-13b-chat \
     --sft_type lora \
     --output_dir runs \
     --ddp_backend nccl \
diff --git a/examples/pytorch/llm/scripts/llama2_70b/qlora/infer.sh b/examples/pytorch/llm/scripts/llama2_70b/qlora/infer.sh
index ccd09161da..5b8032a06d 100644
--- a/examples/pytorch/llm/scripts/llama2_70b/qlora/infer.sh
+++ b/examples/pytorch/llm/scripts/llama2_70b/qlora/infer.sh
@@ -1,9 +1,9 @@
 # 40G
 CUDA_VISIBLE_DEVICES=0,1 \
 python src/llm_infer.py \
-    --model_type llama2-7b \
+    --model_type llama2-7b-chat \
     --sft_type lora \
-    --ckpt_dir "runs/llama2-70b/vx_xxx/checkpoint-xxx" \
+    --ckpt_dir "runs/llama2-70b-chat/vx_xxx/checkpoint-xxx" \
     --eval_human true \
     --quantization_bit 4 \
     --max_new_tokens 1024 \
diff --git a/examples/pytorch/llm/scripts/llama2_70b/qlora/sft.sh b/examples/pytorch/llm/scripts/llama2_70b/qlora/sft.sh
index d38db3c8e7..31626d8afd 100644
--- a/examples/pytorch/llm/scripts/llama2_70b/qlora/sft.sh
+++ b/examples/pytorch/llm/scripts/llama2_70b/qlora/sft.sh
@@ -2,10 +2,10 @@
 # llama2 is not good at Chinese
 CUDA_VISIBLE_DEVICES=0,1 \
 python src/llm_sft.py \
-    --model_type llama2-70b \
+    --model_type llama2-70b-chat \
     --sft_type lora \
     --output_dir runs \
-    --dataset alpaca-en,alpaca-zh \
+    --dataset alpaca-en \
     --dataset_sample 20000 \
     --num_train_epochs 1 \
     --max_length 1024 \
diff --git a/examples/pytorch/llm/src/utils/model.py b/examples/pytorch/llm/src/utils/model.py
index 941e704940..ec165bf247 100644
--- a/examples/pytorch/llm/src/utils/model.py
+++ b/examples/pytorch/llm/src/utils/model.py
@@ -73,8 +73,10 @@ def get_model_tokenizer_baichuan13b(model_dir: str,
     model, tokenizer = get_model_tokenizer_from_repo(model_dir, torch_dtype,
                                                      load_model,
                                                      **model_kwargs)
-    model.get_input_embeddings = MethodType(
-        lambda self: self.model.embed_tokens, model)
+
+    if not hasattr(model, 'get_input_embeddings'):
+        model.get_input_embeddings = MethodType(
+            lambda self: self.model.embed_tokens, model)
     return model, tokenizer
 
 
@@ -143,7 +145,7 @@ def get_model_tokenizer_qwen(model_dir: str,
 
 
 class LoRATM(NamedTuple):
-    # default lora target modules
+    # default lora target modules. qkv
     baichuan = ['W_pack']
     chatglm2 = ['query_key_value']
     llama2 = ['q_proj', 'k_proj', 'v_proj']
@@ -182,6 +184,12 @@ class LoRATM(NamedTuple):
         'template': 'baichuan',
         'lora_TM': LoRATM.baichuan,
     },
+    'baichuan-13b-chat': {
+        'model_id': 'baichuan-inc/Baichuan-13B-Chat',
+        'revision': 'v1.0.8',
+        'template': 'baichuan',
+        'lora_TM': LoRATM.baichuan,
+    },
     'chatglm2-6b': {
         'model_id': 'ZhipuAI/chatglm2-6b',
         'revision': 'v1.0.8',
@@ -198,7 +206,6 @@ class LoRATM(NamedTuple):
     'llama2-7b': {
         'model_id': 'modelscope/Llama-2-7b-ms',
         'revision': 'v1.0.2',
-        'get_function': get_model_tokenizer_llama2,
         'template': 'llama',
         'ignore_file_pattern': [r'.+\.bin$'],  # use safetensors
         'lora_TM': LoRATM.llama2,
@@ -214,6 +221,28 @@ class LoRATM(NamedTuple):
     'llama2-70b': {
         'model_id': 'modelscope/Llama-2-70b-ms',
         'revision': 'v1.0.0',
+        'template': 'llama',
+        'ignore_file_pattern': [r'.+\.bin$'],
+        'lora_TM': LoRATM.llama2,
+    },
+    'llama2-7b-chat': {
+        'model_id': 'modelscope/Llama-2-7b-chat-ms',
+        'revision': 'v1.0.2',
+        'template': 'llama',
+        'ignore_file_pattern': [r'.+\.bin$'],  # use safetensors
+        'lora_TM': LoRATM.llama2,
+    },
+    'llama2-13b-chat': {
+        'model_id': 'modelscope/Llama-2-13b-chat-ms',
+        'revision': 'v1.0.2',
+        'get_function': get_model_tokenizer_llama2,
+        'template': 'llama',
+        'ignore_file_pattern': [r'.+\.bin$'],
+        'lora_TM': LoRATM.llama2,
+    },
+    'llama2-70b-chat': {
+        'model_id': 'modelscope/Llama-2-70b-chat-ms',
+        'revision': 'v1.0.1',
         'get_function': get_model_tokenizer_llama2,
         'template': 'llama',
         'ignore_file_pattern': [r'.+\.bin$'],

From 24526d46399a27e92c91ced5e92c72dab8ff74a9 Mon Sep 17 00:00:00 2001
From: Jintao Huang <huangjintao.hjt@alibaba-inc.com>
Date: Mon, 21 Aug 2023 09:27:21 +0800
Subject: [PATCH 5/9] update sh

---
 .../{baichuan_13b => baichuan_13b_chat}/qlora_ddp/infer.sh      | 0
 .../{baichuan_13b => baichuan_13b_chat}/qlora_ddp/sft.sh        | 0
 .../llm/scripts/{llama2_70b => llama2_70b_chat}/qlora/infer.sh  | 0
 .../llm/scripts/{llama2_70b => llama2_70b_chat}/qlora/sft.sh    | 0
 examples/pytorch/llm/scripts/qwen_7b_chat/full/infer.sh         | 1 +
 examples/pytorch/llm/scripts/qwen_7b_chat/full/sft.sh           | 1 +
 examples/pytorch/llm/scripts/qwen_7b_chat/lora_ddp/infer.sh     | 1 +
 examples/pytorch/llm/scripts/qwen_7b_chat/lora_ddp/sft.sh       | 1 +
 examples/pytorch/llm/scripts/qwen_7b_chat/qlora/infer.sh        | 1 +
 examples/pytorch/llm/scripts/qwen_7b_chat/qlora/sft.sh          | 2 ++
 examples/pytorch/llm/scripts/qwen_7b_chat/qlora_ddp/infer.sh    | 1 +
 examples/pytorch/llm/scripts/qwen_7b_chat/qlora_ddp/sft.sh      | 2 ++
 12 files changed, 10 insertions(+)
 rename examples/pytorch/llm/scripts/{baichuan_13b => baichuan_13b_chat}/qlora_ddp/infer.sh (100%)
 rename examples/pytorch/llm/scripts/{baichuan_13b => baichuan_13b_chat}/qlora_ddp/sft.sh (100%)
 rename examples/pytorch/llm/scripts/{llama2_70b => llama2_70b_chat}/qlora/infer.sh (100%)
 rename examples/pytorch/llm/scripts/{llama2_70b => llama2_70b_chat}/qlora/sft.sh (100%)

diff --git a/examples/pytorch/llm/scripts/baichuan_13b/qlora_ddp/infer.sh b/examples/pytorch/llm/scripts/baichuan_13b_chat/qlora_ddp/infer.sh
similarity index 100%
rename from examples/pytorch/llm/scripts/baichuan_13b/qlora_ddp/infer.sh
rename to examples/pytorch/llm/scripts/baichuan_13b_chat/qlora_ddp/infer.sh
diff --git a/examples/pytorch/llm/scripts/baichuan_13b/qlora_ddp/sft.sh b/examples/pytorch/llm/scripts/baichuan_13b_chat/qlora_ddp/sft.sh
similarity index 100%
rename from examples/pytorch/llm/scripts/baichuan_13b/qlora_ddp/sft.sh
rename to examples/pytorch/llm/scripts/baichuan_13b_chat/qlora_ddp/sft.sh
diff --git a/examples/pytorch/llm/scripts/llama2_70b/qlora/infer.sh b/examples/pytorch/llm/scripts/llama2_70b_chat/qlora/infer.sh
similarity index 100%
rename from examples/pytorch/llm/scripts/llama2_70b/qlora/infer.sh
rename to examples/pytorch/llm/scripts/llama2_70b_chat/qlora/infer.sh
diff --git a/examples/pytorch/llm/scripts/llama2_70b/qlora/sft.sh b/examples/pytorch/llm/scripts/llama2_70b_chat/qlora/sft.sh
similarity index 100%
rename from examples/pytorch/llm/scripts/llama2_70b/qlora/sft.sh
rename to examples/pytorch/llm/scripts/llama2_70b_chat/qlora/sft.sh
diff --git a/examples/pytorch/llm/scripts/qwen_7b_chat/full/infer.sh b/examples/pytorch/llm/scripts/qwen_7b_chat/full/infer.sh
index 685d917b57..2583dd915d 100644
--- a/examples/pytorch/llm/scripts/qwen_7b_chat/full/infer.sh
+++ b/examples/pytorch/llm/scripts/qwen_7b_chat/full/infer.sh
@@ -3,6 +3,7 @@ CUDA_VISIBLE_DEVICES=0 \
 python src/llm_infer.py \
     --model_type qwen-7b-chat \
     --sft_type full \
+    --template_type chatml \
     --dtype bf16 \
     --ckpt_dir "runs/qwen-7b-chat/vx_xxx/checkpoint-xxx" \
     --eval_human true \
diff --git a/examples/pytorch/llm/scripts/qwen_7b_chat/full/sft.sh b/examples/pytorch/llm/scripts/qwen_7b_chat/full/sft.sh
index 8b4e1f3fa9..6ce044db16 100644
--- a/examples/pytorch/llm/scripts/qwen_7b_chat/full/sft.sh
+++ b/examples/pytorch/llm/scripts/qwen_7b_chat/full/sft.sh
@@ -4,6 +4,7 @@ CUDA_VISIBLE_DEVICES=0,1,2,3,4,5 \
 python src/llm_sft.py \
     --model_type qwen-7b-chat \
     --sft_type full \
+    --template_type chatml \
     --dtype bf16 \
     --output_dir runs \
     --dataset alpaca-en,alpaca-zh \
diff --git a/examples/pytorch/llm/scripts/qwen_7b_chat/lora_ddp/infer.sh b/examples/pytorch/llm/scripts/qwen_7b_chat/lora_ddp/infer.sh
index 6b933c92b1..57abdfd3e6 100644
--- a/examples/pytorch/llm/scripts/qwen_7b_chat/lora_ddp/infer.sh
+++ b/examples/pytorch/llm/scripts/qwen_7b_chat/lora_ddp/infer.sh
@@ -3,6 +3,7 @@ CUDA_VISIBLE_DEVICES=0 \
 python src/llm_infer.py \
     --model_type qwen-7b-chat \
     --sft_type lora \
+    --template_type chatml \
     --dtype bf16 \
     --ckpt_dir "runs/qwen-7b-chat/vx_xxx/checkpoint-xxx" \
     --eval_human true \
diff --git a/examples/pytorch/llm/scripts/qwen_7b_chat/lora_ddp/sft.sh b/examples/pytorch/llm/scripts/qwen_7b_chat/lora_ddp/sft.sh
index 30df447cf9..6610c0a79f 100644
--- a/examples/pytorch/llm/scripts/qwen_7b_chat/lora_ddp/sft.sh
+++ b/examples/pytorch/llm/scripts/qwen_7b_chat/lora_ddp/sft.sh
@@ -7,6 +7,7 @@ torchrun \
     src/llm_sft.py \
     --model_type qwen-7b-chat \
     --sft_type lora \
+    --template_type chatml \
     --dtype bf16 \
     --output_dir runs \
     --ddp_backend nccl \
diff --git a/examples/pytorch/llm/scripts/qwen_7b_chat/qlora/infer.sh b/examples/pytorch/llm/scripts/qwen_7b_chat/qlora/infer.sh
index 51c71c37ae..5f25801938 100644
--- a/examples/pytorch/llm/scripts/qwen_7b_chat/qlora/infer.sh
+++ b/examples/pytorch/llm/scripts/qwen_7b_chat/qlora/infer.sh
@@ -3,6 +3,7 @@ CUDA_VISIBLE_DEVICES=0 \
 python src/llm_infer.py \
     --model_type qwen-7b-chat \
     --sft_type lora \
+    --template_type chatml \
     --dtype bf16 \
     --ckpt_dir "runs/qwen-7b-chat/vx_xxx/checkpoint-xxx" \
     --eval_human true \
diff --git a/examples/pytorch/llm/scripts/qwen_7b_chat/qlora/sft.sh b/examples/pytorch/llm/scripts/qwen_7b_chat/qlora/sft.sh
index ac1240c78e..d886bc305c 100644
--- a/examples/pytorch/llm/scripts/qwen_7b_chat/qlora/sft.sh
+++ b/examples/pytorch/llm/scripts/qwen_7b_chat/qlora/sft.sh
@@ -3,6 +3,7 @@ CUDA_VISIBLE_DEVICES=0 \
 python src/llm_sft.py \
     --model_type qwen-7b-chat \
     --sft_type lora \
+    --template_type chatml \
     --dtype bf16 \
     --output_dir runs \
     --dataset alpaca-en,alpaca-zh \
@@ -10,6 +11,7 @@ python src/llm_sft.py \
     --num_train_epochs 1 \
     --max_length 1024 \
     --quantization_bit 4 \
+    --bnb_4bit_comp_dtype bf16 \
     --lora_rank 64 \
     --lora_alpha 32 \
     --lora_dropout_p 0.05 \
diff --git a/examples/pytorch/llm/scripts/qwen_7b_chat/qlora_ddp/infer.sh b/examples/pytorch/llm/scripts/qwen_7b_chat/qlora_ddp/infer.sh
index 51c71c37ae..5f25801938 100644
--- a/examples/pytorch/llm/scripts/qwen_7b_chat/qlora_ddp/infer.sh
+++ b/examples/pytorch/llm/scripts/qwen_7b_chat/qlora_ddp/infer.sh
@@ -3,6 +3,7 @@ CUDA_VISIBLE_DEVICES=0 \
 python src/llm_infer.py \
     --model_type qwen-7b-chat \
     --sft_type lora \
+    --template_type chatml \
     --dtype bf16 \
     --ckpt_dir "runs/qwen-7b-chat/vx_xxx/checkpoint-xxx" \
     --eval_human true \
diff --git a/examples/pytorch/llm/scripts/qwen_7b_chat/qlora_ddp/sft.sh b/examples/pytorch/llm/scripts/qwen_7b_chat/qlora_ddp/sft.sh
index 7ca32a52ac..70e7eeb9e3 100644
--- a/examples/pytorch/llm/scripts/qwen_7b_chat/qlora_ddp/sft.sh
+++ b/examples/pytorch/llm/scripts/qwen_7b_chat/qlora_ddp/sft.sh
@@ -7,6 +7,7 @@ torchrun \
     src/llm_sft.py \
     --model_type qwen-7b-chat \
     --sft_type lora \
+    --template_type chatml \
     --dtype bf16 \
     --output_dir runs \
     --ddp_backend nccl \
@@ -15,6 +16,7 @@ torchrun \
     --num_train_epochs 1 \
     --max_length 1024 \
     --quantization_bit 4 \
+    --bnb_4bit_comp_dtype bf16 \
     --lora_rank 64 \
     --lora_alpha 32 \
     --lora_dropout_p 0.05 \

From 2f440b058bd4a607c0c344cb30973a8abcff234e Mon Sep 17 00:00:00 2001
From: Jintao Huang <huangjintao.hjt@alibaba-inc.com>
Date: Mon, 21 Aug 2023 14:07:28 +0800
Subject: [PATCH 6/9] update sh

---
 examples/pytorch/llm/scripts/qwen_7b/qlora_ddp/infer.sh      | 1 +
 examples/pytorch/llm/scripts/qwen_7b/qlora_ddp/sft.sh        | 1 +
 examples/pytorch/llm/scripts/qwen_7b_chat/qlora/infer.sh     | 1 +
 examples/pytorch/llm/scripts/qwen_7b_chat/qlora_ddp/infer.sh | 1 +
 4 files changed, 4 insertions(+)

diff --git a/examples/pytorch/llm/scripts/qwen_7b/qlora_ddp/infer.sh b/examples/pytorch/llm/scripts/qwen_7b/qlora_ddp/infer.sh
index b8e35a36b9..85c5e211c8 100644
--- a/examples/pytorch/llm/scripts/qwen_7b/qlora_ddp/infer.sh
+++ b/examples/pytorch/llm/scripts/qwen_7b/qlora_ddp/infer.sh
@@ -7,6 +7,7 @@ python src/llm_infer.py \
     --ckpt_dir "runs/qwen-7b/vx_xxx/checkpoint-xxx" \
     --eval_human true \
     --quantization_bit 4 \
+    --bnb_4bit_comp_dtype bf16 \
     --max_new_tokens 1024 \
     --temperature 0.9 \
     --top_k 50 \
diff --git a/examples/pytorch/llm/scripts/qwen_7b/qlora_ddp/sft.sh b/examples/pytorch/llm/scripts/qwen_7b/qlora_ddp/sft.sh
index 78acd9da50..8fdc71a2a4 100644
--- a/examples/pytorch/llm/scripts/qwen_7b/qlora_ddp/sft.sh
+++ b/examples/pytorch/llm/scripts/qwen_7b/qlora_ddp/sft.sh
@@ -15,6 +15,7 @@ torchrun \
     --num_train_epochs 1 \
     --max_length 1024 \
     --quantization_bit 4 \
+    --bnb_4bit_comp_dtype bf16 \
     --lora_rank 64 \
     --lora_alpha 32 \
     --lora_dropout_p 0.05 \
diff --git a/examples/pytorch/llm/scripts/qwen_7b_chat/qlora/infer.sh b/examples/pytorch/llm/scripts/qwen_7b_chat/qlora/infer.sh
index 5f25801938..3fc5883642 100644
--- a/examples/pytorch/llm/scripts/qwen_7b_chat/qlora/infer.sh
+++ b/examples/pytorch/llm/scripts/qwen_7b_chat/qlora/infer.sh
@@ -8,6 +8,7 @@ python src/llm_infer.py \
     --ckpt_dir "runs/qwen-7b-chat/vx_xxx/checkpoint-xxx" \
     --eval_human true \
     --quantization_bit 4 \
+    --bnb_4bit_comp_dtype bf16 \
     --max_new_tokens 1024 \
     --temperature 0.9 \
     --top_k 50 \
diff --git a/examples/pytorch/llm/scripts/qwen_7b_chat/qlora_ddp/infer.sh b/examples/pytorch/llm/scripts/qwen_7b_chat/qlora_ddp/infer.sh
index 5f25801938..3fc5883642 100644
--- a/examples/pytorch/llm/scripts/qwen_7b_chat/qlora_ddp/infer.sh
+++ b/examples/pytorch/llm/scripts/qwen_7b_chat/qlora_ddp/infer.sh
@@ -8,6 +8,7 @@ python src/llm_infer.py \
     --ckpt_dir "runs/qwen-7b-chat/vx_xxx/checkpoint-xxx" \
     --eval_human true \
     --quantization_bit 4 \
+    --bnb_4bit_comp_dtype bf16 \
     --max_new_tokens 1024 \
     --temperature 0.9 \
     --top_k 50 \

From 19758128639fb5045cec3f3d57f74f94bcad159b Mon Sep 17 00:00:00 2001
From: Jintao Huang <huangjintao.hjt@alibaba-inc.com>
Date: Mon, 21 Aug 2023 15:43:35 +0800
Subject: [PATCH 7/9] fix bug

---
 examples/pytorch/llm/scripts/qwen_7b/qlora_ddp/infer.sh | 1 +
 examples/pytorch/llm/scripts/qwen_7b/qlora_ddp/sft.sh   | 1 +
 examples/pytorch/llm/src/llm_infer.py                   | 2 +-
 examples/pytorch/llm/src/utils/dataset.py               | 1 -
 4 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/examples/pytorch/llm/scripts/qwen_7b/qlora_ddp/infer.sh b/examples/pytorch/llm/scripts/qwen_7b/qlora_ddp/infer.sh
index 85c5e211c8..ba6a61c880 100644
--- a/examples/pytorch/llm/scripts/qwen_7b/qlora_ddp/infer.sh
+++ b/examples/pytorch/llm/scripts/qwen_7b/qlora_ddp/infer.sh
@@ -3,6 +3,7 @@ CUDA_VISIBLE_DEVICES=0 \
 python src/llm_infer.py \
     --model_type qwen-7b \
     --sft_type lora \
+    --template_type chatml \
     --dtype bf16 \
     --ckpt_dir "runs/qwen-7b/vx_xxx/checkpoint-xxx" \
     --eval_human true \
diff --git a/examples/pytorch/llm/scripts/qwen_7b/qlora_ddp/sft.sh b/examples/pytorch/llm/scripts/qwen_7b/qlora_ddp/sft.sh
index 8fdc71a2a4..e5bd09af12 100644
--- a/examples/pytorch/llm/scripts/qwen_7b/qlora_ddp/sft.sh
+++ b/examples/pytorch/llm/scripts/qwen_7b/qlora_ddp/sft.sh
@@ -7,6 +7,7 @@ torchrun \
     src/llm_sft.py \
     --model_type qwen-7b \
     --sft_type lora \
+    --template_type chatml \
     --dtype bf16 \
     --output_dir runs \
     --ddp_backend nccl \
diff --git a/examples/pytorch/llm/src/llm_infer.py b/examples/pytorch/llm/src/llm_infer.py
index 126c569abf..d9d0ead270 100644
--- a/examples/pytorch/llm/src/llm_infer.py
+++ b/examples/pytorch/llm/src/llm_infer.py
@@ -18,7 +18,7 @@
 @dataclass
 class InferArguments:
     model_type: str = field(
-        default='qwen-7b', metadata={'choices': list(MODEL_MAPPING.keys())})
+        default='qwen-7b-chat', metadata={'choices': list(MODEL_MAPPING.keys())})
     sft_type: str = field(
         default='lora', metadata={'choices': ['lora', 'full']})
     template_type: str = field(
diff --git a/examples/pytorch/llm/src/utils/dataset.py b/examples/pytorch/llm/src/utils/dataset.py
index 980da50f63..164878a270 100644
--- a/examples/pytorch/llm/src/utils/dataset.py
+++ b/examples/pytorch/llm/src/utils/dataset.py
@@ -23,7 +23,6 @@ def _processing_alpaca(
         inst = f'{inst}\n{inp}'
         new_instruction.append(inst)
     dataset = HfDataset.from_dict({
-        'history': [None] * len(new_instruction),
         'query': new_instruction,
         'response': dataset['output']
     })

From 6512bee311307435109995e732c69819d380ae5e Mon Sep 17 00:00:00 2001
From: Jintao Huang <huangjintao.hjt@alibaba-inc.com>
Date: Mon, 21 Aug 2023 15:47:24 +0800
Subject: [PATCH 8/9] update readme

---
 examples/pytorch/llm/README.md    | 20 ++++++++++----------
 examples/pytorch/llm/README_CN.md | 20 ++++++++++----------
 2 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/examples/pytorch/llm/README.md b/examples/pytorch/llm/README.md
index 8dea971980..cd002cce62 100644
--- a/examples/pytorch/llm/README.md
+++ b/examples/pytorch/llm/README.md
@@ -59,25 +59,25 @@ cd swift/examples/pytorch/llm
 # sft(qlora) and infer qwen-7b, Requires 16GB VRAM.
 # If you want to use quantification, you need to `pip install bitsandbytes`
 # If you want to push weights into modelscope hub during training, you need to set '--push_to_hub true'
-bash scripts/qwen_7b/qlora/sft.sh
-bash scripts/qwen_7b/qlora/infer.sh
+bash scripts/qwen_7b_chat/qlora/sft.sh
+bash scripts/qwen_7b_chat/qlora/infer.sh
 
 # sft(qlora+ddp) and infer qwen-7b, Requires 4*16GB VRAM.
-bash scripts/qwen_7b/qlora_ddp/sft.sh
-bash scripts/qwen_7b/qlora_ddp/infer.sh
+bash scripts/qwen_7b_chat/qlora_ddp/sft.sh
+bash scripts/qwen_7b_chat/qlora_ddp/infer.sh
 
 # sft(lora+ddp) and infer qwen-7b, Requires 4*22GB VRAM.
-bash scripts/qwen_7b/lora_ddp/sft.sh
-bash scripts/qwen_7b/lora_ddp/infer.sh
+bash scripts/qwen_7b_chat/lora_ddp/sft.sh
+bash scripts/qwen_7b_chat/lora_ddp/infer.sh
 
 # sft(full) and infer qwen-7b, Requires 95GB VRAM.
-bash scripts/qwen_7b/full/sft.sh
-bash scripts/qwen_7b/full/infer.sh
+bash scripts/qwen_7b_chat/full/sft.sh
+bash scripts/qwen_7b_chat/full/infer.sh
 
 # For more scripts, please see `scripts/` folder
 ```
 
-## Extend Models and Datasets
+## Extend Datasets
 1. If you need to extend the model, you can modify the `MODEL_MAPPING` in `utils/model.py`. `model_id` can be specified as a local path. In this case, `revision` doesn't work.
-2. If you need to extend or customize the dataset, you can modify the `DATASET_MAPPING` in `utils/dataset.py`. You need to customize the `get_*_dataset` function, which returns a dataset with two columns: `instruction`, `output`.
+2. If you need to extend or customize the dataset, you can modify the `DATASET_MAPPING` in `utils/dataset.py`. You need to customize the `get_*_dataset` function, which returns a dataset with two columns: `query`, `response`.
 3. If you need to extend the template, you can modify the `TEMPLATE_MAPPING` in `utils/preprocess.py`.
diff --git a/examples/pytorch/llm/README_CN.md b/examples/pytorch/llm/README_CN.md
index a5d5235c6e..a47849d222 100644
--- a/examples/pytorch/llm/README_CN.md
+++ b/examples/pytorch/llm/README_CN.md
@@ -61,25 +61,25 @@ cd swift/examples/pytorch/llm
 # 微调(qlora)+推理 qwen-7b, 需要16GB显存.
 # 如果你想要使用量化, 你需要`pip install bitsandbytes`
 # 如果你想在训练时, 将权重push到modelscope hub中, 你需要设置`--push_to_hub true`
-bash scripts/qwen_7b/qlora/sft.sh
-bash scripts/qwen_7b/qlora/infer.sh
+bash scripts/qwen_7b_chat/qlora/sft.sh
+bash scripts/qwen_7b_chat/qlora/infer.sh
 
 # 微调(qlora+ddp)+推理 qwen-7b, 需要4卡*16GB显存.
-bash scripts/qwen_7b/qlora_ddp/sft.sh
-bash scripts/qwen_7b/qlora_ddp/infer.sh
+bash scripts/qwen_7b_chat/qlora_ddp/sft.sh
+bash scripts/qwen_7b_chat/qlora_ddp/infer.sh
 
 # 微调(lora+ddp)+推理 qwen-7b, 需要4卡*22GB显存.
-bash scripts/qwen_7b/lora_ddp/sft.sh
-bash scripts/qwen_7b/lora_ddp/infer.sh
+bash scripts/qwen_7b_chat/lora_ddp/sft.sh
+bash scripts/qwen_7b_chat/lora_ddp/infer.sh
 
 # 微调(full)+推理 qwen-7b, 需要95G显存.
-bash scripts/qwen_7b/full/sft.sh
-bash scripts/qwen_7b/full/infer.sh
+bash scripts/qwen_7b_chat/full/sft.sh
+bash scripts/qwen_7b_chat/full/infer.sh
 
 # 更多的scripts脚本, 可以看`scripts`文件夹
 ```
 
-## 拓展模型和数据集
+## 拓展数据集
 1. 如果你想要拓展模型, 你可以修改`utils/model.py`文件中的`MODEL_MAPPING`. `model_id`可以指定为本地路径, 这种情况下, `revision`参数不起作用.
-2. 如果你想要拓展或使用自定义数据集, 你可以修改`utils/dataset.py`文件中的`DATASET_MAPPING`. 你需要自定义`get_*_dataset`函数, 并返回包含`instruction`, `output`两列的数据集.
+2. 如果你想要拓展或使用自定义数据集, 你可以修改`utils/dataset.py`文件中的`DATASET_MAPPING`. 你需要自定义`get_*_dataset`函数, 并返回包含`query`, `response`两列的数据集.
 3. 如果你想要拓展template, 你可以修改`utils/preprocess.py`文件中的`TEMPLATE_MAPPING`.

From a5d99420b94446d0c609e73b868b4fb287050e61 Mon Sep 17 00:00:00 2001
From: Jintao Huang <huangjintao.hjt@alibaba-inc.com>
Date: Tue, 22 Aug 2023 11:00:20 +0800
Subject: [PATCH 9/9] update swift

---
 swift/trainers/mixin.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/swift/trainers/mixin.py b/swift/trainers/mixin.py
index 71c9a2ea8c..af133fe8db 100644
--- a/swift/trainers/mixin.py
+++ b/swift/trainers/mixin.py
@@ -96,7 +96,7 @@ def init_git_repo(self, at_init: bool = False) -> None:
 
         hub_model_id = self.args.hub_model_id
         assert hub_model_id is not None, 'Please enter a valid hub_model_id'
-        if '/' not in self.args.hub_model_id:
+        if '/' not in hub_model_id:
             user_name = ModelScopeConfig.get_user_info()[0]
             assert isinstance(user_name, str)
             hub_model_id = f'{user_name}/{hub_model_id}'