From 648c645327ef7abbe3fa5cd5f1c783913aa4c2c9 Mon Sep 17 00:00:00 2001 From: Jintao Huang Date: Sun, 20 Aug 2023 01:49:42 +0800 Subject: [PATCH 1/9] update template --- examples/pytorch/llm/README.md | 12 +- examples/pytorch/llm/README_CN.md | 13 +- examples/pytorch/llm/src/llm_infer.py | 33 ++-- examples/pytorch/llm/src/llm_sft.py | 41 +++-- examples/pytorch/llm/src/utils/__init__.py | 3 +- examples/pytorch/llm/src/utils/models.py | 52 +++--- examples/pytorch/llm/src/utils/preprocess.py | 181 +++++++++++++++++++ examples/pytorch/llm/src/utils/utils.py | 11 -- swift/utils/llm_utils.py | 32 ---- 9 files changed, 275 insertions(+), 103 deletions(-) create mode 100644 examples/pytorch/llm/src/utils/preprocess.py diff --git a/examples/pytorch/llm/README.md b/examples/pytorch/llm/README.md index 27ea58b45e..2605d841de 100644 --- a/examples/pytorch/llm/README.md +++ b/examples/pytorch/llm/README.md @@ -19,6 +19,7 @@ 2. supported models: [**qwen-7b**](https://github.com/QwenLM/Qwen-7B), baichuan-7b, baichuan-13b, chatglm2-6b, chatglm2-6b-32k, llama2-7b, llama2-13b, llama2-70b, openbuddy-llama2-13b, openbuddy-llama-65b, polylm-13b, ... 3. supported feature: quantization, ddp, model parallelism(device map), gradient checkpoint, gradient accumulation steps, push to modelscope hub, custom datasets, ... 4. supported datasets: alpaca-en(gpt4), alpaca-zh(gpt4), finance-en, multi-alpaca-all, code-en, instinwild-en, instinwild-zh, ... +5. supported templates: chatml(qwen), baichuan, chatglm2, llama, openbuddy_llama, default, ... ## Prepare the Environment Experimental environment: A10, 3090, A100, ... (V100 does not support bf16, quantization) @@ -65,6 +66,10 @@ bash scripts/qwen_7b/qlora/infer.sh bash scripts/qwen_7b/qlora_ddp/sft.sh bash scripts/qwen_7b/qlora_ddp/infer.sh +# sft(lora+ddp) and infer qwen-7b, Requires 4*22GB VRAM. +bash scripts/qwen_7b/lora_ddp/sft.sh +bash scripts/qwen_7b/lora_ddp/infer.sh + # sft(full) and infer qwen-7b, Requires 95GB VRAM. bash scripts/qwen_7b/full/sft.sh bash scripts/qwen_7b/full/infer.sh @@ -72,6 +77,7 @@ bash scripts/qwen_7b/full/infer.sh # For more scripts, please see `scripts/` folder ``` -## Extend Datasets -1. If you need to extend the model, you can modify the `MODEL_MAPPING` in `utils/models.py`. `model_id` can be specified as a local path. In this case, `revision` doesn't work. -2. If you need to extend or customize the dataset, you can modify the `DATASET_MAPPING` in `utils/datasets.py`. You need to customize the `get_*_dataset` function, which returns a dataset with two columns: `instruction`, `output`. +## Extend Models and Datasets +1. If you need to extend the model, you can modify the `MODEL_MAPPING` in `utils/model.py`. `model_id` can be specified as a local path. In this case, `revision` doesn't work. +2. If you need to extend or customize the dataset, you can modify the `DATASET_MAPPING` in `utils/dataset.py`. You need to customize the `get_*_dataset` function, which returns a dataset with two columns: `instruction`, `output`. +3. If you need to extend the template, you can modify the `TEMPLATE_MAPPING` in `utils/preprocess.py`. diff --git a/examples/pytorch/llm/README_CN.md b/examples/pytorch/llm/README_CN.md index a7652c0a8d..5f40d7f06c 100644 --- a/examples/pytorch/llm/README_CN.md +++ b/examples/pytorch/llm/README_CN.md @@ -20,7 +20,7 @@ 2. 支持的模型: [**qwen-7b**](https://github.com/QwenLM/Qwen-7B), baichuan-7b, baichuan-13b, chatglm2-6b, chatglm2-6b-32k, llama2-7b, llama2-13b, llama2-70b, openbuddy-llama2-13b, openbuddy-llama-65b, polylm-13b, ... 3. 支持的特性: 模型量化, DDP, 模型并行(device_map), gradient checkpoint, 梯度累加, 支持推送modelscope hub, 支持自定义数据集, ... 4. 支持的数据集: alpaca-en(gpt4), alpaca-zh(gpt4), finance-en, multi-alpaca-all, code-en, instinwild-en, instinwild-zh, ... - +5. 支持的template: chatml(qwen), baichuan, chatglm2, llama, openbuddy_llama, default, ... ## 准备实验环境 实验环境: A10, 3090, A100均可. (V100不支持bf16, 量化) @@ -68,6 +68,10 @@ bash scripts/qwen_7b/qlora/infer.sh bash scripts/qwen_7b/qlora_ddp/sft.sh bash scripts/qwen_7b/qlora_ddp/infer.sh +# 微调(lora+ddp)+推理 qwen-7b, 需要4卡*22GB显存. +bash scripts/qwen_7b/lora_ddp/sft.sh +bash scripts/qwen_7b/lora_ddp/infer.sh + # 微调(full)+推理 qwen-7b, 需要95G显存. bash scripts/qwen_7b/full/sft.sh bash scripts/qwen_7b/full/infer.sh @@ -75,6 +79,7 @@ bash scripts/qwen_7b/full/infer.sh # 更多的scripts脚本, 可以看`scripts`文件夹 ``` -## 拓展数据集 -1. 如果你想要拓展模型, 你可以修改`utils/models.py`文件中的`MODEL_MAPPING`. `model_id`可以指定为本地路径, 这种情况下, `revision`参数不起作用. -2. 如果你想要拓展或使用自定义数据集, 你可以修改`utils/datasets.py`文件中的`DATASET_MAPPING`. 你需要自定义`get_*_dataset`函数, 并返回包含`instruction`, `output`两列的数据集. +## 拓展模型和数据集 +1. 如果你想要拓展模型, 你可以修改`utils/model.py`文件中的`MODEL_MAPPING`. `model_id`可以指定为本地路径, 这种情况下, `revision`参数不起作用. +2. 如果你想要拓展或使用自定义数据集, 你可以修改`utils/dataset.py`文件中的`DATASET_MAPPING`. 你需要自定义`get_*_dataset`函数, 并返回包含`instruction`, `output`两列的数据集. +3. 如果你想要拓展template, 你可以修改`utils/preprocess.py`文件中的`TEMPLATE_MAPPING`. diff --git a/examples/pytorch/llm/src/llm_infer.py b/examples/pytorch/llm/src/llm_infer.py index 124b849f2f..6e8e651af5 100644 --- a/examples/pytorch/llm/src/llm_infer.py +++ b/examples/pytorch/llm/src/llm_infer.py @@ -1,18 +1,16 @@ import os # os.environ['CUDA_VISIBLE_DEVICES'] = '0' from dataclasses import dataclass, field -from functools import partial from typing import Optional import torch from transformers import BitsAndBytesConfig, GenerationConfig, TextStreamer -from utils import (DATASET_MAPPING, DEFAULT_PROMPT, MODEL_MAPPING, get_dataset, - get_model_tokenizer, inference, process_dataset, select_bnb, - select_dtype, show_layers) +from utils import (DATASET_MAPPING, MODEL_MAPPING, TEMPLATE_MAPPING, + get_dataset, get_model_tokenizer, get_preprocess, inference, + process_dataset, select_bnb, select_dtype, show_layers) from swift import Swift, get_logger from swift.utils import parse_args, print_model_info, seed_everything -from swift.utils.llm_utils import tokenize_function logger = get_logger() @@ -23,6 +21,8 @@ class InferArguments: default='qwen-7b', metadata={'choices': list(MODEL_MAPPING.keys())}) sft_type: str = field( default='lora', metadata={'choices': ['lora', 'full']}) + template_type: str = field( + default=None, metadata={'choices': list(TEMPLATE_MAPPING.keys())}) ckpt_dir: str = '/path/to/your/vx_xxx/checkpoint-xxx' eval_human: bool = False # False: eval test_dataset @@ -37,13 +37,13 @@ class InferArguments: dataset_seed: int = 42 dataset_sample: int = 20000 # -1: all dataset dataset_test_size: float = 0.01 - prompt: str = DEFAULT_PROMPT + system: str = 'you are a helpful assistant!' max_length: Optional[int] = 1024 quantization_bit: Optional[int] = field( default=None, metadata={'choices': {4, 8}}) bnb_4bit_comp_dtype: str = field( - default='fp32', metadata={'choices': {'fp16', 'bf16', 'fp32'}}) + default=None, metadata={'choices': {'fp16', 'bf16', 'fp32'}}) bnb_4bit_quant_type: str = field( default='nf4', metadata={'choices': {'fp4', 'nf4'}}) bnb_4bit_use_double_quant: bool = True @@ -57,7 +57,14 @@ class InferArguments: def __post_init__(self): if not os.path.isdir(self.ckpt_dir): raise ValueError(f'Please enter a valid ckpt_dir: {self.ckpt_dir}') + if self.template_type is None: + self.template_type = MODEL_MAPPING[self.model_type].get( + 'template', 'default') + logger.info(f'Setting template_type: {self.template_type}') + self.torch_dtype, _, _ = select_dtype(self.dtype) + if self.bnb_4bit_comp_dtype is None: + self.bnb_4bit_comp_dtype = self.dtype self.bnb_4bit_compute_dtype, self.load_in_4bit, self.load_in_8bit = select_bnb( self.quantization_bit, self.bnb_4bit_comp_dtype) @@ -91,11 +98,9 @@ def llm_infer(args: InferArguments) -> None: print_model_info(model) # ### Inference - tokenize_func = partial( - tokenize_function, - tokenizer=tokenizer, - prompt=args.prompt, - max_length=args.max_length) + template_type = MODEL_MAPPING[args.model_type]['template'] + preprocess_func = get_preprocess(template_type, tokenizer, args.system, + args.max_length) streamer = TextStreamer( tokenizer, skip_prompt=True, skip_special_tokens=True) generation_config = GenerationConfig( @@ -112,7 +117,7 @@ def llm_infer(args: InferArguments) -> None: while True: instruction = input('<<< ') data = {'instruction': instruction} - input_ids = tokenize_func(data)['input_ids'] + input_ids = preprocess_func(data)['input_ids'] inference(input_ids, model, tokenizer, streamer, generation_config) print('-' * 80) else: @@ -125,7 +130,7 @@ def llm_infer(args: InferArguments) -> None: for data in mini_test_dataset: output = data['output'] data['output'] = None - input_ids = tokenize_func(data)['input_ids'] + input_ids = preprocess_func(data)['input_ids'] inference(input_ids, model, tokenizer, streamer, generation_config) print() print(f'[LABELS]{output}') diff --git a/examples/pytorch/llm/src/llm_sft.py b/examples/pytorch/llm/src/llm_sft.py index ab6e73ce16..e3f4a73e22 100644 --- a/examples/pytorch/llm/src/llm_sft.py +++ b/examples/pytorch/llm/src/llm_sft.py @@ -7,18 +7,18 @@ import torch import torch.distributed as dist from transformers import BitsAndBytesConfig -from utils import (DATASET_MAPPING, DEFAULT_PROMPT, MODEL_MAPPING, +from utils import (DATASET_MAPPING, MODEL_MAPPING, TEMPLATE_MAPPING, broadcast_string, find_all_linear_for_lora, get_dataset, - get_dist_setting, get_model_tokenizer, is_dist, plot_images, - process_dataset, select_bnb, select_dtype, show_layers) + get_dist_setting, get_model_tokenizer, get_preprocess, + is_dist, plot_images, process_dataset, select_bnb, + select_dtype, show_layers) from swift import (HubStrategy, LoraConfig, Seq2SeqTrainer, Seq2SeqTrainingArguments, Swift, get_logger) from swift.hub import HubApi, ModelScopeConfig from swift.utils import (add_version_to_work_dir, is_master, parse_args, print_model_info, seed_everything) -from swift.utils.llm_utils import (data_collate_fn, print_example, - stat_dataset, tokenize_function) +from swift.utils.llm_utils import data_collate_fn, print_example, stat_dataset logger = get_logger() @@ -26,10 +26,13 @@ @dataclass class SftArguments: model_type: str = field( - default='qwen-7b', metadata={'choices': list(MODEL_MAPPING.keys())}) + default='qwen-7b-chat', + metadata={'choices': list(MODEL_MAPPING.keys())}) # qwen-7b: lora+4bitQ: 10G, lora+8bitQ: 14G, lora: 22G; full: 95G sft_type: str = field( default='lora', metadata={'choices': ['lora', 'full']}) + template_type: str = field( + default=None, metadata={'choices': list(TEMPLATE_MAPPING.keys())}) output_dir: str = 'runs' # DDP + MP(device_map) is not supported ddp_backend: Optional[str] = field( @@ -47,7 +50,7 @@ class SftArguments: dataset_seed: int = 42 dataset_sample: int = 20000 # -1: all dataset dataset_test_size: float = 0.01 - prompt: str = DEFAULT_PROMPT + system: str = 'you are a helpful assistant!' max_length: Optional[int] = 1024 # If you want to use qlora, set the quantization_bit to 8 or 4. @@ -56,7 +59,7 @@ class SftArguments: quantization_bit: Optional[int] = field( default=None, metadata={'choices': {4, 8}}) bnb_4bit_comp_dtype: str = field( - default='fp32', metadata={'choices': {'fp16', 'bf16', 'fp32'}}) + default=None, metadata={'choices': {'fp16', 'bf16', 'fp32'}}) bnb_4bit_quant_type: str = field( default='nf4', metadata={'choices': {'fp4', 'nf4'}}) bnb_4bit_use_double_quant: bool = True @@ -99,7 +102,8 @@ class SftArguments: use_flash_attn: Optional[bool] = field( default=None, metadata={ - 'help': "This parameter is used only when model_type == 'qwen-7b'" + 'help': + "This parameter is used only when model_type.startswith('qwen-7b')" }) def __post_init__(self): @@ -129,6 +133,10 @@ def __post_init__(self): self.save_steps = self.eval_steps * 4 else: raise ValueError(f'sft_type: {self.sft_type}') + if self.template_type is None: + self.template_type = MODEL_MAPPING[self.model_type].get( + 'template', 'default') + logger.info(f'Setting template_type: {self.template_type}') self.output_dir = os.path.join(self.output_dir, self.model_type) @@ -136,6 +144,8 @@ def __post_init__(self): self.lora_target_modules = MODEL_MAPPING[ self.model_type]['lora_TM'] self.torch_dtype, self.fp16, self.bf16 = select_dtype(self.dtype) + if self.bnb_4bit_comp_dtype is None: + self.bnb_4bit_comp_dtype = self.dtype self.bnb_4bit_compute_dtype, self.load_in_4bit, self.load_in_8bit = select_bnb( self.quantization_bit, self.bnb_4bit_comp_dtype) @@ -178,7 +188,7 @@ def llm_sft(args: SftArguments) -> None: bnb_4bit_use_double_quant=args.bnb_4bit_use_double_quant) logger.info(f'quantization_config: {quantization_config.__dict__}') kwargs['quantization_config'] = quantization_config - if args.model_type == 'qwen-7b': + if args.model_type.startswith('qwen-7b'): kwargs['use_flash_attn'] = args.use_flash_attn model, tokenizer = get_model_tokenizer( @@ -214,13 +224,10 @@ def llm_sft(args: SftArguments) -> None: args.dataset_test_size, args.dataset_sample, args.dataset_seed) - tokenize_func = partial( - tokenize_function, - tokenizer=tokenizer, - prompt=args.prompt, - max_length=args.max_length) - train_dataset = train_dataset.map(tokenize_func) - val_dataset = val_dataset.map(tokenize_func) + preprocess_func = get_preprocess(args.template_type, tokenizer, + args.system, args.max_length) + train_dataset = train_dataset.map(preprocess_func) + val_dataset = val_dataset.map(preprocess_func) del dataset # Data analysis stat_dataset(train_dataset) diff --git a/examples/pytorch/llm/src/utils/__init__.py b/examples/pytorch/llm/src/utils/__init__.py index ec4a153178..7b349e67ca 100644 --- a/examples/pytorch/llm/src/utils/__init__.py +++ b/examples/pytorch/llm/src/utils/__init__.py @@ -1,5 +1,6 @@ from .datasets import DATASET_MAPPING, get_dataset, process_dataset from .models import MODEL_MAPPING, get_model_tokenizer -from .utils import (DEFAULT_PROMPT, broadcast_string, find_all_linear_for_lora, +from .preprocess import TEMPLATE_MAPPING, get_preprocess +from .utils import (broadcast_string, find_all_linear_for_lora, get_dist_setting, inference, is_dist, plot_images, select_bnb, select_dtype, show_layers) diff --git a/examples/pytorch/llm/src/utils/models.py b/examples/pytorch/llm/src/utils/models.py index b76f46433c..9ffcf8aa8e 100644 --- a/examples/pytorch/llm/src/utils/models.py +++ b/examples/pytorch/llm/src/utils/models.py @@ -6,8 +6,6 @@ import torch from modelscope import (AutoConfig, AutoModelForCausalLM, AutoTokenizer, Model, read_config, snapshot_download) -from modelscope.models.nlp.chatglm2 import ChatGLM2Config, ChatGLM2Tokenizer -from modelscope.models.nlp.llama2 import Llama2Config, Llama2Tokenizer from torch import dtype as Dtype from swift import get_logger @@ -97,9 +95,8 @@ def get_model_tokenizer_chatglm2(model_dir: str, model_kwargs['quantization_config'].llm_int8_skip_modules = [ 'output_layer' ] - return get_model_tokenizer_from_sdk(ChatGLM2Config, ChatGLM2Tokenizer, - model_dir, torch_dtype, load_model, - **model_kwargs) + return get_model_tokenizer_from_repo(model_dir, torch_dtype, load_model, + **model_kwargs) def get_model_tokenizer_llama2(model_dir: str, @@ -109,9 +106,8 @@ def get_model_tokenizer_llama2(model_dir: str, model_config = AutoConfig.from_pretrained( model_dir, trust_remote_code=True) model_config.pretraining_tp = 1 - return get_model_tokenizer_from_sdk(Llama2Config, Llama2Tokenizer, - model_dir, torch_dtype, load_model, - model_config, **model_kwargs) + return get_model_tokenizer_from_repo(model_dir, torch_dtype, load_model, + model_config, **model_kwargs) def get_model_tokenizer_polylm(model_dir: str, @@ -166,65 +162,79 @@ class LoRATM(NamedTuple): # 'ignore_file_pattern', 'special_token_mapper', 'lora_TM' MODEL_MAPPING = { 'qwen-7b': { - 'model_id': 'qwen/Qwen-7B', + 'model_id': 'qwen/Qwen-7B', # model id or model dir 'revision': 'v.1.0.4', 'get_function': get_model_tokenizer_qwen, + 'template': 'chatml', + 'lora_TM': LoRATM.qwen, + }, + 'qwen-7b-chat': { + 'model_id': 'qwen/Qwen-7B-Chat', + 'revision': 'v1.0.5', + 'get_function': get_model_tokenizer_qwen, + 'template': 'chatml', 'lora_TM': LoRATM.qwen, - 'special_token_mapper': { - 'eos_token': '<|endoftext|>' - } }, 'baichuan-7b': { - 'model_id': 'baichuan-inc/baichuan-7B', # model id or model dir + 'model_id': 'baichuan-inc/baichuan-7B', 'revision': 'v1.0.7', - 'lora_TM': LoRATM.baichuan + 'template': 'baichuan', + 'lora_TM': LoRATM.baichuan, }, 'baichuan-13b': { 'model_id': 'baichuan-inc/Baichuan-13B-Base', 'revision': 'v1.0.5', 'get_function': get_model_tokenizer_baichuan13b, - 'lora_TM': LoRATM.baichuan + 'template': 'baichuan', + 'lora_TM': LoRATM.baichuan, }, 'chatglm2-6b': { 'model_id': 'ZhipuAI/chatglm2-6b', - 'revision': 'v1.0.7', + 'revision': 'v1.0.8', 'get_function': get_model_tokenizer_chatglm2, - 'lora_TM': LoRATM.chatglm2 + 'template': 'chatglm2', + 'lora_TM': LoRATM.chatglm2, }, 'chatglm2-6b-32k': { 'model_id': 'ZhipuAI/chatglm2-6b-32k', 'revision': 'v1.0.0', - 'lora_TM': LoRATM.chatglm2 + 'template': 'chatglm2', + 'lora_TM': LoRATM.chatglm2, }, 'llama2-7b': { 'model_id': 'modelscope/Llama-2-7b-ms', 'revision': 'v1.0.2', 'get_function': get_model_tokenizer_llama2, + 'template': 'llama', 'ignore_file_pattern': [r'.+\.bin$'], # use safetensors - 'lora_TM': LoRATM.llama2 + 'lora_TM': LoRATM.llama2, }, 'llama2-13b': { 'model_id': 'modelscope/Llama-2-13b-ms', 'revision': 'v1.0.2', 'get_function': get_model_tokenizer_llama2, + 'template': 'llama', 'ignore_file_pattern': [r'.+\.bin$'], - 'lora_TM': LoRATM.llama2 + 'lora_TM': LoRATM.llama2, }, 'llama2-70b': { 'model_id': 'modelscope/Llama-2-70b-ms', 'revision': 'v1.0.0', 'get_function': get_model_tokenizer_llama2, + 'template': 'llama', 'ignore_file_pattern': [r'.+\.bin$'], - 'lora_TM': LoRATM.llama2 + 'lora_TM': LoRATM.llama2, }, 'openbuddy-llama2-13b': { 'model_id': 'OpenBuddy/openbuddy-llama2-13b-v8.1-fp16', 'revision': 'v1.0.0', + 'template': 'openbuddy_llama', 'lora_TM': LoRATM.llama2, }, 'openbuddy-llama-65b': { 'model_id': 'OpenBuddy/openbuddy-llama-65b-v8-bf16', 'revision': 'v1.0.0', + 'template': 'openbuddy_llama', 'lora_TM': LoRATM.llama2, }, 'polylm-13b': { diff --git a/examples/pytorch/llm/src/utils/preprocess.py b/examples/pytorch/llm/src/utils/preprocess.py new file mode 100644 index 0000000000..9ea4040ca4 --- /dev/null +++ b/examples/pytorch/llm/src/utils/preprocess.py @@ -0,0 +1,181 @@ +from typing import Any, Callable, Dict, List, Optional, Tuple, Union + +from transformers import PreTrainedTokenizer +DEFAULT_SYSTEM = 'you are a helpful assistant!' + +TEMPLATE_MAPPING = { + 'default': { + 'prefix': ['{{system}}\n\n'], + 'prompt': ['### Human:\n', '{{query}}\n\n', '### Assistant:\n'], + 'chat_sep': ['\n\n'], + 'suffix': [['eos_token_id']], + }, + 'chatml': { + 'prefix': [['im_start_id'], 'system\n{{system}}', ['im_end_id'], '\n'], + 'prompt': [['im_start_id'], 'user\n{{query}}', ['im_end_id'], '\n', + ['im_start_id'], 'assistant\n'], + 'chat_sep': [ + ['im_end_id'], + '\n', + ], + 'suffix': [['im_end_id'], ['eod_id']], + }, + 'baichuan': { + 'prefix': [], + 'prompt': [[195], '{{query}}', [196]], + 'chat_sep': [], + 'suffix': [['eos_token_id']], + }, + 'chatglm2':{ + 'prefix': [[64790, 64792]], + 'prompt': [ + '[Round {{round}}]\n\n问:{{query}}\n\n答:' + ], + 'chat_sep': ['\n\n'], + 'suffix': [['eos_token_id']], + }, + 'llama': { + 'prefix': [ + ['bos_token_id'], + '[INST] <>\n{{system}}\n<>\n\n' + ], + 'prompt': [ + '{{query}} [/INST] ' + ], + 'chat_sep': [ + ' ', ['eos_token_id', 'bos_token_id'], '[INST] ' + ], + 'suffix': [['eos_token_id']], + }, + 'openbuddy_llama': { + 'prefix': ['{{system}}\n\n'], + 'prompt': [ + 'User: {{query}}\nAssistant: ' + ], + 'chat_sep': ['\n'], + 'suffix': [['eos_token_id']], + } +} +Context = Union[str, List[int]] + +def simplify_context_list( + context_list: List[Context] +) -> List[Context]: + res = [] + temp = [] + for c in context_list: + if isinstance(c, str): + temp.append(c) + else: + if len(temp) > 0: + res.append(''.join(temp)) + temp.clear() + res.append(c) + if len(temp) > 0: + res.append(''.join(temp)) + if len(res) > 0 and isinstance(res[-1], str): + # avoid two spaces + res[-1] = res[-1].rstrip(' ') + return res + + +def concat_context_list( + context_list: List[Context], + new_context_list: List[Context], + placeholder_list: List[str], + system: Optional[str] = None, + query: Optional[str] = None, + round: Optional[str] = None, +) -> None: + for context in context_list: + if isinstance(context, str): + for old_str, new_str in zip(['{{system}}', '{{query}}', '{{round}}'], + [system, query, round]): + if new_str is not None and old_str in context: + placeholder_list.append(new_str) + new_context_list.append(context) + +def _encode(tokenizer: PreTrainedTokenizer, + context_list: List[Context], + placeholder_list: List[str]) -> List[int]: + input_ids = [] + placeholder_it = iter(placeholder_list) + for context in context_list: + if isinstance(context, list): + for c in context: + if isinstance(c, str): + token = getattr(tokenizer, c) + assert token is not None + else: + token = c + input_ids.append(token) + elif isinstance(context, str): + for old_str in ['{{system}}', '{{query}}', '{{round}}']: + if old_str in context: + new_str = next(placeholder_it) + context = context.replace(old_str, new_str) + input_ids += tokenizer( + context, return_attention_mask=False, + add_special_tokens=False)['input_ids'] + return input_ids + + +def _preprocess( + template_type: str, + tokenizer: PreTrainedTokenizer, + query: str, + response: Optional[str] = None, + history: Optional[List[Tuple[str, str]]] = None, + system: Optional[str] = None, + max_length: Optional[int] = None, +) -> Dict[str, List[int]]: + if history is None: + history = [] + + template_config = TEMPLATE_MAPPING[template_type] + if system is None: + system = DEFAULT_SYSTEM + total_context_list = [] + placeholder_list = [] + concat_context_list(template_config['prefix'], total_context_list, + placeholder_list, system=system) + for i, (q, r) in enumerate(history): + concat_context_list( + [*template_config['prompt'], r, *template_config['chat_sep']], + total_context_list, placeholder_list, + query=q, round=str(i+1)) + concat_context_list(template_config['prompt'], total_context_list, placeholder_list, + query=query, round=str(len(history)+1)) + total_context_list = simplify_context_list(total_context_list) + input_ids = _encode(tokenizer, total_context_list, placeholder_list) + + labels = None + if response is not None: + labels = [-100] * len(input_ids) + tgt_input_ids = _encode(tokenizer, [response], []) + tgt_input_ids += _encode(tokenizer, template_config['suffix'], []) + input_ids += tgt_input_ids + labels += tgt_input_ids + + if max_length is not None: + input_ids = input_ids[-max_length:] + if labels is not None: + labels = labels[-max_length:] + + return {'input_ids': input_ids, 'labels': labels} + + +def get_preprocess( + template_type: str, tokenizer: PreTrainedTokenizer, + system: Optional[str]=None, max_length: Optional[int]=None +) -> Callable[[Dict[str, Any]], Dict[str, List[int]]]: + + def preprocess(examples: Dict[str, Any]) -> Dict[str, List[int]]: + history = examples['history'] + query = history[-1][0] + response = history[-1][1] + history = history[:-1] + return _preprocess( + template_type, tokenizer, query, response, history, system, max_length) + + return preprocess diff --git a/examples/pytorch/llm/src/utils/utils.py b/examples/pytorch/llm/src/utils/utils.py index 9bf6aaec14..4d34180394 100644 --- a/examples/pytorch/llm/src/utils/utils.py +++ b/examples/pytorch/llm/src/utils/utils.py @@ -15,17 +15,6 @@ os.environ['TOKENIZERS_PARALLELISM'] = 'true' logger = get_logger() -# The `output` section will be concatenated at the end -# `prompt` part does not calculate the loss, `output` part calculates the loss -DEFAULT_PROMPT = """Here's a conversation between a human and an AI assistant. \ -The AI assistant provides detailed, friendly answers for the human. - -### Human: -{instruction} - -### AI: -""" - DTYPE_MAPPING = { 'fp16': torch.float16, 'bf16': torch.bfloat16, diff --git a/swift/utils/llm_utils.py b/swift/utils/llm_utils.py index cfcf787253..3ae6e3aca7 100644 --- a/swift/utils/llm_utils.py +++ b/swift/utils/llm_utils.py @@ -27,38 +27,6 @@ def stat_dataset(dataset: HfDataset) -> None: ) -def tokenize_function(example: Dict[str, - Optional[str]], tokenizer, prompt: str, - max_length: Optional[int]) -> Dict[str, Any]: - instruction: str = example['instruction'] - output = example.get('output') - src_text = prompt.format(instruction=instruction) - src_input_ids: List[int] = tokenizer( - src_text, return_attention_mask=False, - add_special_tokens=True)['input_ids'] - if src_input_ids[-1] == tokenizer.eos_token_id: - src_input_ids.pop() - - tgt_input_ids = [] - if output is not None: - assert tokenizer.eos_token_id is not None - tgt_input_ids += tokenizer( - output, return_attention_mask=False, - add_special_tokens=False)['input_ids'] - tgt_input_ids += [tokenizer.eos_token_id] - labels = [-100] * len(src_input_ids) + tgt_input_ids - else: - labels = None - input_ids = src_input_ids + tgt_input_ids - - if max_length is not None: - input_ids = input_ids[-max_length:] - if labels is not None: - labels = labels[-max_length:] - - return {'input_ids': input_ids, 'labels': labels} - - def data_collate_fn(batch: List[Dict[str, Any]], tokenizer) -> Dict[str, Any]: assert tokenizer.pad_token_id is not None input_ids = [torch.tensor(b['input_ids']) for b in batch] From d5df8dbf0998ccc21a32cc216009f322304505f1 Mon Sep 17 00:00:00 2001 From: Jintao Huang Date: Sun, 20 Aug 2023 15:59:33 +0800 Subject: [PATCH 2/9] update sh --- .../{qwen_7b => qwen_7b_chat}/full/infer.sh | 4 +-- .../{qwen_7b => qwen_7b_chat}/full/sft.sh | 4 +-- .../lora_ddp/infer.sh | 4 +-- .../{qwen_7b => qwen_7b_chat}/lora_ddp/sft.sh | 4 +-- .../{qwen_7b => qwen_7b_chat}/qlora/infer.sh | 4 +-- .../{qwen_7b => qwen_7b_chat}/qlora/sft.sh | 4 +-- .../scripts/qwen_7b_chat/qlora_ddp/infer.sh | 14 ++++++++ .../llm/scripts/qwen_7b_chat/qlora_ddp/sft.sh | 36 +++++++++++++++++++ .../llm/src/utils/{datasets.py => dataset.py} | 0 .../llm/src/utils/{models.py => model.py} | 0 10 files changed, 62 insertions(+), 12 deletions(-) rename examples/pytorch/llm/scripts/{qwen_7b => qwen_7b_chat}/full/infer.sh (71%) rename examples/pytorch/llm/scripts/{qwen_7b => qwen_7b_chat}/full/sft.sh (90%) rename examples/pytorch/llm/scripts/{qwen_7b => qwen_7b_chat}/lora_ddp/infer.sh (71%) rename examples/pytorch/llm/scripts/{qwen_7b => qwen_7b_chat}/lora_ddp/sft.sh (92%) rename examples/pytorch/llm/scripts/{qwen_7b => qwen_7b_chat}/qlora/infer.sh (74%) rename examples/pytorch/llm/scripts/{qwen_7b => qwen_7b_chat}/qlora/sft.sh (91%) create mode 100644 examples/pytorch/llm/scripts/qwen_7b_chat/qlora_ddp/infer.sh create mode 100644 examples/pytorch/llm/scripts/qwen_7b_chat/qlora_ddp/sft.sh rename examples/pytorch/llm/src/utils/{datasets.py => dataset.py} (100%) rename examples/pytorch/llm/src/utils/{models.py => model.py} (100%) diff --git a/examples/pytorch/llm/scripts/qwen_7b/full/infer.sh b/examples/pytorch/llm/scripts/qwen_7b_chat/full/infer.sh similarity index 71% rename from examples/pytorch/llm/scripts/qwen_7b/full/infer.sh rename to examples/pytorch/llm/scripts/qwen_7b_chat/full/infer.sh index 6a049dccd1..685d917b57 100644 --- a/examples/pytorch/llm/scripts/qwen_7b/full/infer.sh +++ b/examples/pytorch/llm/scripts/qwen_7b_chat/full/infer.sh @@ -1,10 +1,10 @@ # 19G CUDA_VISIBLE_DEVICES=0 \ python src/llm_infer.py \ - --model_type qwen-7b \ + --model_type qwen-7b-chat \ --sft_type full \ --dtype bf16 \ - --ckpt_dir "runs/qwen-7b/vx_xxx/checkpoint-xxx" \ + --ckpt_dir "runs/qwen-7b-chat/vx_xxx/checkpoint-xxx" \ --eval_human true \ --max_new_tokens 1024 \ --temperature 0.9 \ diff --git a/examples/pytorch/llm/scripts/qwen_7b/full/sft.sh b/examples/pytorch/llm/scripts/qwen_7b_chat/full/sft.sh similarity index 90% rename from examples/pytorch/llm/scripts/qwen_7b/full/sft.sh rename to examples/pytorch/llm/scripts/qwen_7b_chat/full/sft.sh index 62b0d6c75b..8b4e1f3fa9 100644 --- a/examples/pytorch/llm/scripts/qwen_7b/full/sft.sh +++ b/examples/pytorch/llm/scripts/qwen_7b_chat/full/sft.sh @@ -2,7 +2,7 @@ # Experimental environment: 8 * 3090 CUDA_VISIBLE_DEVICES=0,1,2,3,4,5 \ python src/llm_sft.py \ - --model_type qwen-7b \ + --model_type qwen-7b-chat \ --sft_type full \ --dtype bf16 \ --output_dir runs \ @@ -22,6 +22,6 @@ python src/llm_sft.py \ --logging_steps 10 \ --use_flash_attn false \ --push_to_hub false \ - --hub_model_id qwen-7b-full \ + --hub_model_id qwen-7b-chat-full \ --hub_private_repo true \ --hub_token 'your-sdk-token' \ diff --git a/examples/pytorch/llm/scripts/qwen_7b/lora_ddp/infer.sh b/examples/pytorch/llm/scripts/qwen_7b_chat/lora_ddp/infer.sh similarity index 71% rename from examples/pytorch/llm/scripts/qwen_7b/lora_ddp/infer.sh rename to examples/pytorch/llm/scripts/qwen_7b_chat/lora_ddp/infer.sh index 14bfb01309..6b933c92b1 100644 --- a/examples/pytorch/llm/scripts/qwen_7b/lora_ddp/infer.sh +++ b/examples/pytorch/llm/scripts/qwen_7b_chat/lora_ddp/infer.sh @@ -1,10 +1,10 @@ # 19G CUDA_VISIBLE_DEVICES=0 \ python src/llm_infer.py \ - --model_type qwen-7b \ + --model_type qwen-7b-chat \ --sft_type lora \ --dtype bf16 \ - --ckpt_dir "runs/qwen-7b/vx_xxx/checkpoint-xxx" \ + --ckpt_dir "runs/qwen-7b-chat/vx_xxx/checkpoint-xxx" \ --eval_human true \ --max_new_tokens 1024 \ --temperature 0.9 \ diff --git a/examples/pytorch/llm/scripts/qwen_7b/lora_ddp/sft.sh b/examples/pytorch/llm/scripts/qwen_7b_chat/lora_ddp/sft.sh similarity index 92% rename from examples/pytorch/llm/scripts/qwen_7b/lora_ddp/sft.sh rename to examples/pytorch/llm/scripts/qwen_7b_chat/lora_ddp/sft.sh index f414fe2164..30df447cf9 100644 --- a/examples/pytorch/llm/scripts/qwen_7b/lora_ddp/sft.sh +++ b/examples/pytorch/llm/scripts/qwen_7b_chat/lora_ddp/sft.sh @@ -5,7 +5,7 @@ torchrun \ --nproc_per_node=$nproc_per_node \ --master_port 29500 \ src/llm_sft.py \ - --model_type qwen-7b \ + --model_type qwen-7b-chat \ --sft_type lora \ --dtype bf16 \ --output_dir runs \ @@ -29,6 +29,6 @@ torchrun \ --logging_steps 10 \ --use_flash_attn false \ --push_to_hub false \ - --hub_model_id qwen-7b-lora \ + --hub_model_id qwen-7b-chat-lora \ --hub_private_repo true \ --hub_token 'your-sdk-token' \ diff --git a/examples/pytorch/llm/scripts/qwen_7b/qlora/infer.sh b/examples/pytorch/llm/scripts/qwen_7b_chat/qlora/infer.sh similarity index 74% rename from examples/pytorch/llm/scripts/qwen_7b/qlora/infer.sh rename to examples/pytorch/llm/scripts/qwen_7b_chat/qlora/infer.sh index b8e35a36b9..51c71c37ae 100644 --- a/examples/pytorch/llm/scripts/qwen_7b/qlora/infer.sh +++ b/examples/pytorch/llm/scripts/qwen_7b_chat/qlora/infer.sh @@ -1,10 +1,10 @@ # 10G CUDA_VISIBLE_DEVICES=0 \ python src/llm_infer.py \ - --model_type qwen-7b \ + --model_type qwen-7b-chat \ --sft_type lora \ --dtype bf16 \ - --ckpt_dir "runs/qwen-7b/vx_xxx/checkpoint-xxx" \ + --ckpt_dir "runs/qwen-7b-chat/vx_xxx/checkpoint-xxx" \ --eval_human true \ --quantization_bit 4 \ --max_new_tokens 1024 \ diff --git a/examples/pytorch/llm/scripts/qwen_7b/qlora/sft.sh b/examples/pytorch/llm/scripts/qwen_7b_chat/qlora/sft.sh similarity index 91% rename from examples/pytorch/llm/scripts/qwen_7b/qlora/sft.sh rename to examples/pytorch/llm/scripts/qwen_7b_chat/qlora/sft.sh index 1da754a352..ac1240c78e 100644 --- a/examples/pytorch/llm/scripts/qwen_7b/qlora/sft.sh +++ b/examples/pytorch/llm/scripts/qwen_7b_chat/qlora/sft.sh @@ -1,7 +1,7 @@ # 16GB VRAM CUDA_VISIBLE_DEVICES=0 \ python src/llm_sft.py \ - --model_type qwen-7b \ + --model_type qwen-7b-chat \ --sft_type lora \ --dtype bf16 \ --output_dir runs \ @@ -26,6 +26,6 @@ python src/llm_sft.py \ --logging_steps 10 \ --use_flash_attn false \ --push_to_hub false \ - --hub_model_id qwen-7b-qlora \ + --hub_model_id qwen-7b-chat-qlora \ --hub_private_repo true \ --hub_token 'your-sdk-token' \ diff --git a/examples/pytorch/llm/scripts/qwen_7b_chat/qlora_ddp/infer.sh b/examples/pytorch/llm/scripts/qwen_7b_chat/qlora_ddp/infer.sh new file mode 100644 index 0000000000..51c71c37ae --- /dev/null +++ b/examples/pytorch/llm/scripts/qwen_7b_chat/qlora_ddp/infer.sh @@ -0,0 +1,14 @@ +# 10G +CUDA_VISIBLE_DEVICES=0 \ +python src/llm_infer.py \ + --model_type qwen-7b-chat \ + --sft_type lora \ + --dtype bf16 \ + --ckpt_dir "runs/qwen-7b-chat/vx_xxx/checkpoint-xxx" \ + --eval_human true \ + --quantization_bit 4 \ + --max_new_tokens 1024 \ + --temperature 0.9 \ + --top_k 50 \ + --top_p 0.9 \ + --do_sample true \ diff --git a/examples/pytorch/llm/scripts/qwen_7b_chat/qlora_ddp/sft.sh b/examples/pytorch/llm/scripts/qwen_7b_chat/qlora_ddp/sft.sh new file mode 100644 index 0000000000..7ca32a52ac --- /dev/null +++ b/examples/pytorch/llm/scripts/qwen_7b_chat/qlora_ddp/sft.sh @@ -0,0 +1,36 @@ +# 4 * 16GB VRAM +nproc_per_node=4 +CUDA_VISIBLE_DEVICES=0,1,2,3 \ +torchrun \ + --nproc_per_node=$nproc_per_node \ + --master_port 29500 \ + src/llm_sft.py \ + --model_type qwen-7b-chat \ + --sft_type lora \ + --dtype bf16 \ + --output_dir runs \ + --ddp_backend nccl \ + --dataset alpaca-en,alpaca-zh \ + --dataset_sample -1 \ + --num_train_epochs 1 \ + --max_length 1024 \ + --quantization_bit 4 \ + --lora_rank 64 \ + --lora_alpha 32 \ + --lora_dropout_p 0.05 \ + --lora_target_modules ALL \ + --batch_size 1 \ + --weight_decay 0. \ + --learning_rate 1e-4 \ + --gradient_accumulation_steps $(expr 16 / $nproc_per_node) \ + --max_grad_norm 0.5 \ + --warmup_ratio 0.03 \ + --eval_steps 50 \ + --save_steps 50 \ + --save_total_limit 2 \ + --logging_steps 10 \ + --use_flash_attn false \ + --push_to_hub false \ + --hub_model_id qwen-7b-chat-qlora \ + --hub_private_repo true \ + --hub_token 'your-sdk-token' \ diff --git a/examples/pytorch/llm/src/utils/datasets.py b/examples/pytorch/llm/src/utils/dataset.py similarity index 100% rename from examples/pytorch/llm/src/utils/datasets.py rename to examples/pytorch/llm/src/utils/dataset.py diff --git a/examples/pytorch/llm/src/utils/models.py b/examples/pytorch/llm/src/utils/model.py similarity index 100% rename from examples/pytorch/llm/src/utils/models.py rename to examples/pytorch/llm/src/utils/model.py From 9c9e0cd6ae39709f0dc8fbe859703520e7a4a05e Mon Sep 17 00:00:00 2001 From: Jintao Huang Date: Sun, 20 Aug 2023 16:02:13 +0800 Subject: [PATCH 3/9] fix bug --- examples/pytorch/llm/src/llm_infer.py | 14 ++- examples/pytorch/llm/src/utils/__init__.py | 4 +- examples/pytorch/llm/src/utils/dataset.py | 5 +- examples/pytorch/llm/src/utils/model.py | 27 +++--- examples/pytorch/llm/src/utils/preprocess.py | 96 ++++++++++---------- examples/pytorch/llm/src/utils/utils.py | 5 +- 6 files changed, 77 insertions(+), 74 deletions(-) diff --git a/examples/pytorch/llm/src/llm_infer.py b/examples/pytorch/llm/src/llm_infer.py index 6e8e651af5..126c569abf 100644 --- a/examples/pytorch/llm/src/llm_infer.py +++ b/examples/pytorch/llm/src/llm_infer.py @@ -53,6 +53,7 @@ class InferArguments: temperature: float = 0.9 top_k: int = 50 top_p: float = 0.9 + skip_prompt: Optional[bool] = None def __post_init__(self): if not os.path.isdir(self.ckpt_dir): @@ -67,6 +68,8 @@ def __post_init__(self): self.bnb_4bit_comp_dtype = self.dtype self.bnb_4bit_compute_dtype, self.load_in_4bit, self.load_in_8bit = select_bnb( self.quantization_bit, self.bnb_4bit_comp_dtype) + if self.skip_prompt is None: + self.skip_prompt = self.eval_human def llm_infer(args: InferArguments) -> None: @@ -115,11 +118,11 @@ def llm_infer(args: InferArguments) -> None: if args.eval_human: while True: - instruction = input('<<< ') - data = {'instruction': instruction} + query = input('<<< ') + data = {'query': query} input_ids = preprocess_func(data)['input_ids'] - inference(input_ids, model, tokenizer, streamer, generation_config) - print('-' * 80) + inference(input_ids, model, tokenizer, streamer, generation_config, + args.skip_prompt) else: dataset = get_dataset(args.dataset.split(',')) _, test_dataset = process_dataset(dataset, args.dataset_test_size, @@ -131,7 +134,8 @@ def llm_infer(args: InferArguments) -> None: output = data['output'] data['output'] = None input_ids = preprocess_func(data)['input_ids'] - inference(input_ids, model, tokenizer, streamer, generation_config) + inference(input_ids, model, tokenizer, streamer, generation_config, + args.skip_prompt) print() print(f'[LABELS]{output}') print('-' * 80) diff --git a/examples/pytorch/llm/src/utils/__init__.py b/examples/pytorch/llm/src/utils/__init__.py index 7b349e67ca..11b7941d90 100644 --- a/examples/pytorch/llm/src/utils/__init__.py +++ b/examples/pytorch/llm/src/utils/__init__.py @@ -1,5 +1,5 @@ -from .datasets import DATASET_MAPPING, get_dataset, process_dataset -from .models import MODEL_MAPPING, get_model_tokenizer +from .dataset import DATASET_MAPPING, get_dataset, process_dataset +from .model import MODEL_MAPPING, get_model_tokenizer from .preprocess import TEMPLATE_MAPPING, get_preprocess from .utils import (broadcast_string, find_all_linear_for_lora, get_dist_setting, inference, is_dist, plot_images, diff --git a/examples/pytorch/llm/src/utils/dataset.py b/examples/pytorch/llm/src/utils/dataset.py index e4a099147f..980da50f63 100644 --- a/examples/pytorch/llm/src/utils/dataset.py +++ b/examples/pytorch/llm/src/utils/dataset.py @@ -23,8 +23,9 @@ def _processing_alpaca( inst = f'{inst}\n{inp}' new_instruction.append(inst) dataset = HfDataset.from_dict({ - 'instruction': new_instruction, - 'output': dataset['output'] + 'history': [None] * len(new_instruction), + 'query': new_instruction, + 'response': dataset['output'] }) return dataset diff --git a/examples/pytorch/llm/src/utils/model.py b/examples/pytorch/llm/src/utils/model.py index 9ffcf8aa8e..941e704940 100644 --- a/examples/pytorch/llm/src/utils/model.py +++ b/examples/pytorch/llm/src/utils/model.py @@ -1,7 +1,7 @@ import os # os.environ['CUDA_VISIBLE_DEVICES'] = '0' from types import MethodType -from typing import Any, Dict, NamedTuple, Optional +from typing import NamedTuple, Optional import torch from modelscope import (AutoConfig, AutoModelForCausalLM, AutoTokenizer, Model, @@ -13,15 +13,6 @@ logger = get_logger() -def _add_special_token(tokenizer, special_token_mapper: Dict[str, - Any]) -> None: - for k, v in special_token_mapper.items(): - setattr(tokenizer, k, v) - assert tokenizer.eos_token is not None - if tokenizer.pad_token is None: - tokenizer.pad_token = tokenizer.eos_token - - def get_model_tokenizer_from_repo(model_dir: str, torch_dtype: Dtype, load_model: bool = True, @@ -144,8 +135,11 @@ def get_model_tokenizer_qwen(model_dir: str, use_flash_attn = kwargs.pop('use_flash_attn', 'auto') model_config.use_flash_attn = use_flash_attn - return get_model_tokenizer_from_repo(model_dir, torch_dtype, load_model, - model_config, **kwargs) + model, tokenizer = get_model_tokenizer_from_repo(model_dir, torch_dtype, + load_model, model_config, + **kwargs) + tokenizer.eos_token_id = tokenizer.eod_id + return model, tokenizer class LoRATM(NamedTuple): @@ -158,8 +152,8 @@ class LoRATM(NamedTuple): # Model Home: 'https://modelscope.cn/models/{model_id}/summary' -# keys: 'model_id', 'revision', 'get_function', -# 'ignore_file_pattern', 'special_token_mapper', 'lora_TM' +# keys: 'model_id', 'revision', 'get_function', 'template', +# 'ignore_file_pattern', 'lora_TM' MODEL_MAPPING = { 'qwen-7b': { 'model_id': 'qwen/Qwen-7B', # model id or model dir @@ -257,7 +251,6 @@ def get_model_tokenizer(model_type: str, model_id = data['model_id'] get_function = data.get('get_function', get_model_tokenizer_from_repo) ignore_file_pattern = data.get('ignore_file_pattern', []) - special_token_mapper = data.get('special_token_mapper', {}) if torch_dtype is None: torch_dtype = data.get('torch_dtype', torch.float16) if 'device_map' not in kwargs: @@ -273,5 +266,7 @@ def get_model_tokenizer(model_type: str, model, tokenizer = get_function(model_dir, torch_dtype, load_model, **kwargs) - _add_special_token(tokenizer, special_token_mapper) + assert tokenizer.eos_token is not None + if tokenizer.pad_token is None: + tokenizer.pad_token = tokenizer.eos_token return model, tokenizer diff --git a/examples/pytorch/llm/src/utils/preprocess.py b/examples/pytorch/llm/src/utils/preprocess.py index 9ea4040ca4..19403122c5 100644 --- a/examples/pytorch/llm/src/utils/preprocess.py +++ b/examples/pytorch/llm/src/utils/preprocess.py @@ -1,6 +1,7 @@ from typing import Any, Callable, Dict, List, Optional, Tuple, Union from transformers import PreTrainedTokenizer + DEFAULT_SYSTEM = 'you are a helpful assistant!' TEMPLATE_MAPPING = { @@ -26,41 +27,30 @@ 'chat_sep': [], 'suffix': [['eos_token_id']], }, - 'chatglm2':{ + 'chatglm2': { 'prefix': [[64790, 64792]], - 'prompt': [ - '[Round {{round}}]\n\n问:{{query}}\n\n答:' - ], + 'prompt': ['[Round {{round}}]\n\n问:{{query}}\n\n答:'], 'chat_sep': ['\n\n'], - 'suffix': [['eos_token_id']], + 'suffix': [['eos_token_id']], }, 'llama': { - 'prefix': [ - ['bos_token_id'], - '[INST] <>\n{{system}}\n<>\n\n' - ], - 'prompt': [ - '{{query}} [/INST] ' - ], - 'chat_sep': [ - ' ', ['eos_token_id', 'bos_token_id'], '[INST] ' - ], - 'suffix': [['eos_token_id']], + 'prefix': [['bos_token_id'], + '[INST] <>\n{{system}}\n<>\n\n'], + 'prompt': ['{{query}} [/INST] '], + 'chat_sep': [' ', ['eos_token_id', 'bos_token_id'], '[INST] '], + 'suffix': [['eos_token_id']], }, 'openbuddy_llama': { 'prefix': ['{{system}}\n\n'], - 'prompt': [ - 'User: {{query}}\nAssistant: ' - ], + 'prompt': ['User: {{query}}\nAssistant: '], 'chat_sep': ['\n'], - 'suffix': [['eos_token_id']], + 'suffix': [['eos_token_id']], } } Context = Union[str, List[int]] -def simplify_context_list( - context_list: List[Context] -) -> List[Context]: + +def simplify_context_list(context_list: List[Context]) -> List[Context]: res = [] temp = [] for c in context_list: @@ -80,23 +70,24 @@ def simplify_context_list( def concat_context_list( - context_list: List[Context], - new_context_list: List[Context], - placeholder_list: List[str], - system: Optional[str] = None, - query: Optional[str] = None, - round: Optional[str] = None, + context_list: List[Context], + new_context_list: List[Context], + placeholder_list: List[str], + system: Optional[str] = None, + query: Optional[str] = None, + round: Optional[str] = None, ) -> None: for context in context_list: if isinstance(context, str): - for old_str, new_str in zip(['{{system}}', '{{query}}', '{{round}}'], - [system, query, round]): + for old_str, new_str in zip( + ['{{system}}', '{{query}}', '{{round}}'], + [system, query, round]): if new_str is not None and old_str in context: placeholder_list.append(new_str) new_context_list.append(context) -def _encode(tokenizer: PreTrainedTokenizer, - context_list: List[Context], + +def _encode(tokenizer: PreTrainedTokenizer, context_list: List[Context], placeholder_list: List[str]) -> List[int]: input_ids = [] placeholder_it = iter(placeholder_list) @@ -137,15 +128,24 @@ def _preprocess( system = DEFAULT_SYSTEM total_context_list = [] placeholder_list = [] - concat_context_list(template_config['prefix'], total_context_list, - placeholder_list, system=system) + concat_context_list( + template_config['prefix'], + total_context_list, + placeholder_list, + system=system) for i, (q, r) in enumerate(history): concat_context_list( [*template_config['prompt'], r, *template_config['chat_sep']], - total_context_list, placeholder_list, - query=q, round=str(i+1)) - concat_context_list(template_config['prompt'], total_context_list, placeholder_list, - query=query, round=str(len(history)+1)) + total_context_list, + placeholder_list, + query=q, + round=str(i + 1)) + concat_context_list( + template_config['prompt'], + total_context_list, + placeholder_list, + query=query, + round=str(len(history) + 1)) total_context_list = simplify_context_list(total_context_list) input_ids = _encode(tokenizer, total_context_list, placeholder_list) @@ -166,16 +166,18 @@ def _preprocess( def get_preprocess( - template_type: str, tokenizer: PreTrainedTokenizer, - system: Optional[str]=None, max_length: Optional[int]=None + template_type: str, + tokenizer: PreTrainedTokenizer, + system: Optional[str] = None, + max_length: Optional[int] = None ) -> Callable[[Dict[str, Any]], Dict[str, List[int]]]: def preprocess(examples: Dict[str, Any]) -> Dict[str, List[int]]: - history = examples['history'] - query = history[-1][0] - response = history[-1][1] - history = history[:-1] - return _preprocess( - template_type, tokenizer, query, response, history, system, max_length) + history: Optional[List[Tuple[str, + str]]] = examples.get('history', None) + query: str = examples['query'] + response: str = examples.get('response', None) + return _preprocess(template_type, tokenizer, query, response, history, + system, max_length) return preprocess diff --git a/examples/pytorch/llm/src/utils/utils.py b/examples/pytorch/llm/src/utils/utils.py index 4d34180394..b279ac81f2 100644 --- a/examples/pytorch/llm/src/utils/utils.py +++ b/examples/pytorch/llm/src/utils/utils.py @@ -88,8 +88,9 @@ def inference(input_ids: List[int], tokenizer, streamer: Optional[TextStreamer] = None, generation_config: Optional[GenerationConfig] = None, - tag: str = '[INFERENCE]') -> str: - print(f'{tag}{tokenizer.decode(input_ids)}', end='') + skip_prompt: bool = True) -> str: + if not skip_prompt: + print(f'[INFERENCE]{tokenizer.decode(input_ids)}', end='') input_ids = torch.tensor(input_ids)[None].cuda() attention_mask = torch.ones_like(input_ids) model.eval() From 29e6eb407452412eb14de9ad0083d2f47ac1e4ac Mon Sep 17 00:00:00 2001 From: Jintao Huang Date: Sun, 20 Aug 2023 17:15:26 +0800 Subject: [PATCH 4/9] update baichuan-13b-chat, llama2-{7,13,70}b-chat --- examples/pytorch/llm/README.md | 2 +- examples/pytorch/llm/README_CN.md | 2 +- .../scripts/baichuan_13b/qlora_ddp/infer.sh | 4 +- .../llm/scripts/baichuan_13b/qlora_ddp/sft.sh | 2 +- .../llm/scripts/llama2_70b/qlora/infer.sh | 4 +- .../llm/scripts/llama2_70b/qlora/sft.sh | 4 +- examples/pytorch/llm/src/utils/model.py | 37 +++++++++++++++++-- 7 files changed, 42 insertions(+), 13 deletions(-) diff --git a/examples/pytorch/llm/README.md b/examples/pytorch/llm/README.md index 2605d841de..8dea971980 100644 --- a/examples/pytorch/llm/README.md +++ b/examples/pytorch/llm/README.md @@ -16,7 +16,7 @@ ## Features 1. supported sft method: [lora](https://arxiv.org/abs/2106.09685), [qlora](https://arxiv.org/abs/2305.14314), full(full parameter fine tuning), ... -2. supported models: [**qwen-7b**](https://github.com/QwenLM/Qwen-7B), baichuan-7b, baichuan-13b, chatglm2-6b, chatglm2-6b-32k, llama2-7b, llama2-13b, llama2-70b, openbuddy-llama2-13b, openbuddy-llama-65b, polylm-13b, ... +2. supported models: [**qwen-7b**](https://github.com/QwenLM/Qwen-7B), qwen-7b-chat, baichuan-7b, baichuan-13b, baichuan-13b-chat, chatglm2-6b, chatglm2-6b-32k, llama2-7b, llama2-7b-chat, llama2-13b, llama2-13b-chat, llama2-70b, llama2-70b-chat, openbuddy-llama2-13b, openbuddy-llama-65b, polylm-13b, ... 3. supported feature: quantization, ddp, model parallelism(device map), gradient checkpoint, gradient accumulation steps, push to modelscope hub, custom datasets, ... 4. supported datasets: alpaca-en(gpt4), alpaca-zh(gpt4), finance-en, multi-alpaca-all, code-en, instinwild-en, instinwild-zh, ... 5. supported templates: chatml(qwen), baichuan, chatglm2, llama, openbuddy_llama, default, ... diff --git a/examples/pytorch/llm/README_CN.md b/examples/pytorch/llm/README_CN.md index 5f40d7f06c..a5d5235c6e 100644 --- a/examples/pytorch/llm/README_CN.md +++ b/examples/pytorch/llm/README_CN.md @@ -17,7 +17,7 @@ ## 特性 1. [lora](https://arxiv.org/abs/2106.09685), [qlora](https://arxiv.org/abs/2305.14314), 全参数微调, ... -2. 支持的模型: [**qwen-7b**](https://github.com/QwenLM/Qwen-7B), baichuan-7b, baichuan-13b, chatglm2-6b, chatglm2-6b-32k, llama2-7b, llama2-13b, llama2-70b, openbuddy-llama2-13b, openbuddy-llama-65b, polylm-13b, ... +2. 支持的模型: [**qwen-7b**](https://github.com/QwenLM/Qwen-7B), qwen-7b-chat, baichuan-7b, baichuan-13b, baichuan-13b-chat, chatglm2-6b, chatglm2-6b-32k, llama2-7b, llama2-7b-chat, llama2-13b, llama2-13b-chat, llama2-70b, llama2-70b-chat, openbuddy-llama2-13b, openbuddy-llama-65b, polylm-13b, ... 3. 支持的特性: 模型量化, DDP, 模型并行(device_map), gradient checkpoint, 梯度累加, 支持推送modelscope hub, 支持自定义数据集, ... 4. 支持的数据集: alpaca-en(gpt4), alpaca-zh(gpt4), finance-en, multi-alpaca-all, code-en, instinwild-en, instinwild-zh, ... 5. 支持的template: chatml(qwen), baichuan, chatglm2, llama, openbuddy_llama, default, ... diff --git a/examples/pytorch/llm/scripts/baichuan_13b/qlora_ddp/infer.sh b/examples/pytorch/llm/scripts/baichuan_13b/qlora_ddp/infer.sh index 13c65c2d2d..b960909975 100644 --- a/examples/pytorch/llm/scripts/baichuan_13b/qlora_ddp/infer.sh +++ b/examples/pytorch/llm/scripts/baichuan_13b/qlora_ddp/infer.sh @@ -1,9 +1,9 @@ # 12G CUDA_VISIBLE_DEVICES=0 \ python src/llm_infer.py \ - --model_type baichuan-13b \ + --model_type baichuan-13b-chat \ --sft_type lora \ - --ckpt_dir "runs/baichuan-13b/vx_xxx/checkpoint-xxx" \ + --ckpt_dir "runs/baichuan-13b-chat/vx_xxx/checkpoint-xxx" \ --eval_human true \ --quantization_bit 4 \ --max_new_tokens 1024 \ diff --git a/examples/pytorch/llm/scripts/baichuan_13b/qlora_ddp/sft.sh b/examples/pytorch/llm/scripts/baichuan_13b/qlora_ddp/sft.sh index 9493d7645a..b90aa26c02 100644 --- a/examples/pytorch/llm/scripts/baichuan_13b/qlora_ddp/sft.sh +++ b/examples/pytorch/llm/scripts/baichuan_13b/qlora_ddp/sft.sh @@ -5,7 +5,7 @@ torchrun \ --nproc_per_node=$nproc_per_node \ --master_port 29500 \ src/llm_sft.py \ - --model_type baichuan-13b \ + --model_type baichuan-13b-chat \ --sft_type lora \ --output_dir runs \ --ddp_backend nccl \ diff --git a/examples/pytorch/llm/scripts/llama2_70b/qlora/infer.sh b/examples/pytorch/llm/scripts/llama2_70b/qlora/infer.sh index ccd09161da..5b8032a06d 100644 --- a/examples/pytorch/llm/scripts/llama2_70b/qlora/infer.sh +++ b/examples/pytorch/llm/scripts/llama2_70b/qlora/infer.sh @@ -1,9 +1,9 @@ # 40G CUDA_VISIBLE_DEVICES=0,1 \ python src/llm_infer.py \ - --model_type llama2-7b \ + --model_type llama2-7b-chat \ --sft_type lora \ - --ckpt_dir "runs/llama2-70b/vx_xxx/checkpoint-xxx" \ + --ckpt_dir "runs/llama2-70b-chat/vx_xxx/checkpoint-xxx" \ --eval_human true \ --quantization_bit 4 \ --max_new_tokens 1024 \ diff --git a/examples/pytorch/llm/scripts/llama2_70b/qlora/sft.sh b/examples/pytorch/llm/scripts/llama2_70b/qlora/sft.sh index d38db3c8e7..31626d8afd 100644 --- a/examples/pytorch/llm/scripts/llama2_70b/qlora/sft.sh +++ b/examples/pytorch/llm/scripts/llama2_70b/qlora/sft.sh @@ -2,10 +2,10 @@ # llama2 is not good at Chinese CUDA_VISIBLE_DEVICES=0,1 \ python src/llm_sft.py \ - --model_type llama2-70b \ + --model_type llama2-70b-chat \ --sft_type lora \ --output_dir runs \ - --dataset alpaca-en,alpaca-zh \ + --dataset alpaca-en \ --dataset_sample 20000 \ --num_train_epochs 1 \ --max_length 1024 \ diff --git a/examples/pytorch/llm/src/utils/model.py b/examples/pytorch/llm/src/utils/model.py index 941e704940..ec165bf247 100644 --- a/examples/pytorch/llm/src/utils/model.py +++ b/examples/pytorch/llm/src/utils/model.py @@ -73,8 +73,10 @@ def get_model_tokenizer_baichuan13b(model_dir: str, model, tokenizer = get_model_tokenizer_from_repo(model_dir, torch_dtype, load_model, **model_kwargs) - model.get_input_embeddings = MethodType( - lambda self: self.model.embed_tokens, model) + + if not hasattr(model, 'get_input_embeddings'): + model.get_input_embeddings = MethodType( + lambda self: self.model.embed_tokens, model) return model, tokenizer @@ -143,7 +145,7 @@ def get_model_tokenizer_qwen(model_dir: str, class LoRATM(NamedTuple): - # default lora target modules + # default lora target modules. qkv baichuan = ['W_pack'] chatglm2 = ['query_key_value'] llama2 = ['q_proj', 'k_proj', 'v_proj'] @@ -182,6 +184,12 @@ class LoRATM(NamedTuple): 'template': 'baichuan', 'lora_TM': LoRATM.baichuan, }, + 'baichuan-13b-chat': { + 'model_id': 'baichuan-inc/Baichuan-13B-Chat', + 'revision': 'v1.0.8', + 'template': 'baichuan', + 'lora_TM': LoRATM.baichuan, + }, 'chatglm2-6b': { 'model_id': 'ZhipuAI/chatglm2-6b', 'revision': 'v1.0.8', @@ -198,7 +206,6 @@ class LoRATM(NamedTuple): 'llama2-7b': { 'model_id': 'modelscope/Llama-2-7b-ms', 'revision': 'v1.0.2', - 'get_function': get_model_tokenizer_llama2, 'template': 'llama', 'ignore_file_pattern': [r'.+\.bin$'], # use safetensors 'lora_TM': LoRATM.llama2, @@ -214,6 +221,28 @@ class LoRATM(NamedTuple): 'llama2-70b': { 'model_id': 'modelscope/Llama-2-70b-ms', 'revision': 'v1.0.0', + 'template': 'llama', + 'ignore_file_pattern': [r'.+\.bin$'], + 'lora_TM': LoRATM.llama2, + }, + 'llama2-7b-chat': { + 'model_id': 'modelscope/Llama-2-7b-chat-ms', + 'revision': 'v1.0.2', + 'template': 'llama', + 'ignore_file_pattern': [r'.+\.bin$'], # use safetensors + 'lora_TM': LoRATM.llama2, + }, + 'llama2-13b-chat': { + 'model_id': 'modelscope/Llama-2-13b-chat-ms', + 'revision': 'v1.0.2', + 'get_function': get_model_tokenizer_llama2, + 'template': 'llama', + 'ignore_file_pattern': [r'.+\.bin$'], + 'lora_TM': LoRATM.llama2, + }, + 'llama2-70b-chat': { + 'model_id': 'modelscope/Llama-2-70b-chat-ms', + 'revision': 'v1.0.1', 'get_function': get_model_tokenizer_llama2, 'template': 'llama', 'ignore_file_pattern': [r'.+\.bin$'], From 24526d46399a27e92c91ced5e92c72dab8ff74a9 Mon Sep 17 00:00:00 2001 From: Jintao Huang Date: Mon, 21 Aug 2023 09:27:21 +0800 Subject: [PATCH 5/9] update sh --- .../{baichuan_13b => baichuan_13b_chat}/qlora_ddp/infer.sh | 0 .../{baichuan_13b => baichuan_13b_chat}/qlora_ddp/sft.sh | 0 .../llm/scripts/{llama2_70b => llama2_70b_chat}/qlora/infer.sh | 0 .../llm/scripts/{llama2_70b => llama2_70b_chat}/qlora/sft.sh | 0 examples/pytorch/llm/scripts/qwen_7b_chat/full/infer.sh | 1 + examples/pytorch/llm/scripts/qwen_7b_chat/full/sft.sh | 1 + examples/pytorch/llm/scripts/qwen_7b_chat/lora_ddp/infer.sh | 1 + examples/pytorch/llm/scripts/qwen_7b_chat/lora_ddp/sft.sh | 1 + examples/pytorch/llm/scripts/qwen_7b_chat/qlora/infer.sh | 1 + examples/pytorch/llm/scripts/qwen_7b_chat/qlora/sft.sh | 2 ++ examples/pytorch/llm/scripts/qwen_7b_chat/qlora_ddp/infer.sh | 1 + examples/pytorch/llm/scripts/qwen_7b_chat/qlora_ddp/sft.sh | 2 ++ 12 files changed, 10 insertions(+) rename examples/pytorch/llm/scripts/{baichuan_13b => baichuan_13b_chat}/qlora_ddp/infer.sh (100%) rename examples/pytorch/llm/scripts/{baichuan_13b => baichuan_13b_chat}/qlora_ddp/sft.sh (100%) rename examples/pytorch/llm/scripts/{llama2_70b => llama2_70b_chat}/qlora/infer.sh (100%) rename examples/pytorch/llm/scripts/{llama2_70b => llama2_70b_chat}/qlora/sft.sh (100%) diff --git a/examples/pytorch/llm/scripts/baichuan_13b/qlora_ddp/infer.sh b/examples/pytorch/llm/scripts/baichuan_13b_chat/qlora_ddp/infer.sh similarity index 100% rename from examples/pytorch/llm/scripts/baichuan_13b/qlora_ddp/infer.sh rename to examples/pytorch/llm/scripts/baichuan_13b_chat/qlora_ddp/infer.sh diff --git a/examples/pytorch/llm/scripts/baichuan_13b/qlora_ddp/sft.sh b/examples/pytorch/llm/scripts/baichuan_13b_chat/qlora_ddp/sft.sh similarity index 100% rename from examples/pytorch/llm/scripts/baichuan_13b/qlora_ddp/sft.sh rename to examples/pytorch/llm/scripts/baichuan_13b_chat/qlora_ddp/sft.sh diff --git a/examples/pytorch/llm/scripts/llama2_70b/qlora/infer.sh b/examples/pytorch/llm/scripts/llama2_70b_chat/qlora/infer.sh similarity index 100% rename from examples/pytorch/llm/scripts/llama2_70b/qlora/infer.sh rename to examples/pytorch/llm/scripts/llama2_70b_chat/qlora/infer.sh diff --git a/examples/pytorch/llm/scripts/llama2_70b/qlora/sft.sh b/examples/pytorch/llm/scripts/llama2_70b_chat/qlora/sft.sh similarity index 100% rename from examples/pytorch/llm/scripts/llama2_70b/qlora/sft.sh rename to examples/pytorch/llm/scripts/llama2_70b_chat/qlora/sft.sh diff --git a/examples/pytorch/llm/scripts/qwen_7b_chat/full/infer.sh b/examples/pytorch/llm/scripts/qwen_7b_chat/full/infer.sh index 685d917b57..2583dd915d 100644 --- a/examples/pytorch/llm/scripts/qwen_7b_chat/full/infer.sh +++ b/examples/pytorch/llm/scripts/qwen_7b_chat/full/infer.sh @@ -3,6 +3,7 @@ CUDA_VISIBLE_DEVICES=0 \ python src/llm_infer.py \ --model_type qwen-7b-chat \ --sft_type full \ + --template_type chatml \ --dtype bf16 \ --ckpt_dir "runs/qwen-7b-chat/vx_xxx/checkpoint-xxx" \ --eval_human true \ diff --git a/examples/pytorch/llm/scripts/qwen_7b_chat/full/sft.sh b/examples/pytorch/llm/scripts/qwen_7b_chat/full/sft.sh index 8b4e1f3fa9..6ce044db16 100644 --- a/examples/pytorch/llm/scripts/qwen_7b_chat/full/sft.sh +++ b/examples/pytorch/llm/scripts/qwen_7b_chat/full/sft.sh @@ -4,6 +4,7 @@ CUDA_VISIBLE_DEVICES=0,1,2,3,4,5 \ python src/llm_sft.py \ --model_type qwen-7b-chat \ --sft_type full \ + --template_type chatml \ --dtype bf16 \ --output_dir runs \ --dataset alpaca-en,alpaca-zh \ diff --git a/examples/pytorch/llm/scripts/qwen_7b_chat/lora_ddp/infer.sh b/examples/pytorch/llm/scripts/qwen_7b_chat/lora_ddp/infer.sh index 6b933c92b1..57abdfd3e6 100644 --- a/examples/pytorch/llm/scripts/qwen_7b_chat/lora_ddp/infer.sh +++ b/examples/pytorch/llm/scripts/qwen_7b_chat/lora_ddp/infer.sh @@ -3,6 +3,7 @@ CUDA_VISIBLE_DEVICES=0 \ python src/llm_infer.py \ --model_type qwen-7b-chat \ --sft_type lora \ + --template_type chatml \ --dtype bf16 \ --ckpt_dir "runs/qwen-7b-chat/vx_xxx/checkpoint-xxx" \ --eval_human true \ diff --git a/examples/pytorch/llm/scripts/qwen_7b_chat/lora_ddp/sft.sh b/examples/pytorch/llm/scripts/qwen_7b_chat/lora_ddp/sft.sh index 30df447cf9..6610c0a79f 100644 --- a/examples/pytorch/llm/scripts/qwen_7b_chat/lora_ddp/sft.sh +++ b/examples/pytorch/llm/scripts/qwen_7b_chat/lora_ddp/sft.sh @@ -7,6 +7,7 @@ torchrun \ src/llm_sft.py \ --model_type qwen-7b-chat \ --sft_type lora \ + --template_type chatml \ --dtype bf16 \ --output_dir runs \ --ddp_backend nccl \ diff --git a/examples/pytorch/llm/scripts/qwen_7b_chat/qlora/infer.sh b/examples/pytorch/llm/scripts/qwen_7b_chat/qlora/infer.sh index 51c71c37ae..5f25801938 100644 --- a/examples/pytorch/llm/scripts/qwen_7b_chat/qlora/infer.sh +++ b/examples/pytorch/llm/scripts/qwen_7b_chat/qlora/infer.sh @@ -3,6 +3,7 @@ CUDA_VISIBLE_DEVICES=0 \ python src/llm_infer.py \ --model_type qwen-7b-chat \ --sft_type lora \ + --template_type chatml \ --dtype bf16 \ --ckpt_dir "runs/qwen-7b-chat/vx_xxx/checkpoint-xxx" \ --eval_human true \ diff --git a/examples/pytorch/llm/scripts/qwen_7b_chat/qlora/sft.sh b/examples/pytorch/llm/scripts/qwen_7b_chat/qlora/sft.sh index ac1240c78e..d886bc305c 100644 --- a/examples/pytorch/llm/scripts/qwen_7b_chat/qlora/sft.sh +++ b/examples/pytorch/llm/scripts/qwen_7b_chat/qlora/sft.sh @@ -3,6 +3,7 @@ CUDA_VISIBLE_DEVICES=0 \ python src/llm_sft.py \ --model_type qwen-7b-chat \ --sft_type lora \ + --template_type chatml \ --dtype bf16 \ --output_dir runs \ --dataset alpaca-en,alpaca-zh \ @@ -10,6 +11,7 @@ python src/llm_sft.py \ --num_train_epochs 1 \ --max_length 1024 \ --quantization_bit 4 \ + --bnb_4bit_comp_dtype bf16 \ --lora_rank 64 \ --lora_alpha 32 \ --lora_dropout_p 0.05 \ diff --git a/examples/pytorch/llm/scripts/qwen_7b_chat/qlora_ddp/infer.sh b/examples/pytorch/llm/scripts/qwen_7b_chat/qlora_ddp/infer.sh index 51c71c37ae..5f25801938 100644 --- a/examples/pytorch/llm/scripts/qwen_7b_chat/qlora_ddp/infer.sh +++ b/examples/pytorch/llm/scripts/qwen_7b_chat/qlora_ddp/infer.sh @@ -3,6 +3,7 @@ CUDA_VISIBLE_DEVICES=0 \ python src/llm_infer.py \ --model_type qwen-7b-chat \ --sft_type lora \ + --template_type chatml \ --dtype bf16 \ --ckpt_dir "runs/qwen-7b-chat/vx_xxx/checkpoint-xxx" \ --eval_human true \ diff --git a/examples/pytorch/llm/scripts/qwen_7b_chat/qlora_ddp/sft.sh b/examples/pytorch/llm/scripts/qwen_7b_chat/qlora_ddp/sft.sh index 7ca32a52ac..70e7eeb9e3 100644 --- a/examples/pytorch/llm/scripts/qwen_7b_chat/qlora_ddp/sft.sh +++ b/examples/pytorch/llm/scripts/qwen_7b_chat/qlora_ddp/sft.sh @@ -7,6 +7,7 @@ torchrun \ src/llm_sft.py \ --model_type qwen-7b-chat \ --sft_type lora \ + --template_type chatml \ --dtype bf16 \ --output_dir runs \ --ddp_backend nccl \ @@ -15,6 +16,7 @@ torchrun \ --num_train_epochs 1 \ --max_length 1024 \ --quantization_bit 4 \ + --bnb_4bit_comp_dtype bf16 \ --lora_rank 64 \ --lora_alpha 32 \ --lora_dropout_p 0.05 \ From 2f440b058bd4a607c0c344cb30973a8abcff234e Mon Sep 17 00:00:00 2001 From: Jintao Huang Date: Mon, 21 Aug 2023 14:07:28 +0800 Subject: [PATCH 6/9] update sh --- examples/pytorch/llm/scripts/qwen_7b/qlora_ddp/infer.sh | 1 + examples/pytorch/llm/scripts/qwen_7b/qlora_ddp/sft.sh | 1 + examples/pytorch/llm/scripts/qwen_7b_chat/qlora/infer.sh | 1 + examples/pytorch/llm/scripts/qwen_7b_chat/qlora_ddp/infer.sh | 1 + 4 files changed, 4 insertions(+) diff --git a/examples/pytorch/llm/scripts/qwen_7b/qlora_ddp/infer.sh b/examples/pytorch/llm/scripts/qwen_7b/qlora_ddp/infer.sh index b8e35a36b9..85c5e211c8 100644 --- a/examples/pytorch/llm/scripts/qwen_7b/qlora_ddp/infer.sh +++ b/examples/pytorch/llm/scripts/qwen_7b/qlora_ddp/infer.sh @@ -7,6 +7,7 @@ python src/llm_infer.py \ --ckpt_dir "runs/qwen-7b/vx_xxx/checkpoint-xxx" \ --eval_human true \ --quantization_bit 4 \ + --bnb_4bit_comp_dtype bf16 \ --max_new_tokens 1024 \ --temperature 0.9 \ --top_k 50 \ diff --git a/examples/pytorch/llm/scripts/qwen_7b/qlora_ddp/sft.sh b/examples/pytorch/llm/scripts/qwen_7b/qlora_ddp/sft.sh index 78acd9da50..8fdc71a2a4 100644 --- a/examples/pytorch/llm/scripts/qwen_7b/qlora_ddp/sft.sh +++ b/examples/pytorch/llm/scripts/qwen_7b/qlora_ddp/sft.sh @@ -15,6 +15,7 @@ torchrun \ --num_train_epochs 1 \ --max_length 1024 \ --quantization_bit 4 \ + --bnb_4bit_comp_dtype bf16 \ --lora_rank 64 \ --lora_alpha 32 \ --lora_dropout_p 0.05 \ diff --git a/examples/pytorch/llm/scripts/qwen_7b_chat/qlora/infer.sh b/examples/pytorch/llm/scripts/qwen_7b_chat/qlora/infer.sh index 5f25801938..3fc5883642 100644 --- a/examples/pytorch/llm/scripts/qwen_7b_chat/qlora/infer.sh +++ b/examples/pytorch/llm/scripts/qwen_7b_chat/qlora/infer.sh @@ -8,6 +8,7 @@ python src/llm_infer.py \ --ckpt_dir "runs/qwen-7b-chat/vx_xxx/checkpoint-xxx" \ --eval_human true \ --quantization_bit 4 \ + --bnb_4bit_comp_dtype bf16 \ --max_new_tokens 1024 \ --temperature 0.9 \ --top_k 50 \ diff --git a/examples/pytorch/llm/scripts/qwen_7b_chat/qlora_ddp/infer.sh b/examples/pytorch/llm/scripts/qwen_7b_chat/qlora_ddp/infer.sh index 5f25801938..3fc5883642 100644 --- a/examples/pytorch/llm/scripts/qwen_7b_chat/qlora_ddp/infer.sh +++ b/examples/pytorch/llm/scripts/qwen_7b_chat/qlora_ddp/infer.sh @@ -8,6 +8,7 @@ python src/llm_infer.py \ --ckpt_dir "runs/qwen-7b-chat/vx_xxx/checkpoint-xxx" \ --eval_human true \ --quantization_bit 4 \ + --bnb_4bit_comp_dtype bf16 \ --max_new_tokens 1024 \ --temperature 0.9 \ --top_k 50 \ From 19758128639fb5045cec3f3d57f74f94bcad159b Mon Sep 17 00:00:00 2001 From: Jintao Huang Date: Mon, 21 Aug 2023 15:43:35 +0800 Subject: [PATCH 7/9] fix bug --- examples/pytorch/llm/scripts/qwen_7b/qlora_ddp/infer.sh | 1 + examples/pytorch/llm/scripts/qwen_7b/qlora_ddp/sft.sh | 1 + examples/pytorch/llm/src/llm_infer.py | 2 +- examples/pytorch/llm/src/utils/dataset.py | 1 - 4 files changed, 3 insertions(+), 2 deletions(-) diff --git a/examples/pytorch/llm/scripts/qwen_7b/qlora_ddp/infer.sh b/examples/pytorch/llm/scripts/qwen_7b/qlora_ddp/infer.sh index 85c5e211c8..ba6a61c880 100644 --- a/examples/pytorch/llm/scripts/qwen_7b/qlora_ddp/infer.sh +++ b/examples/pytorch/llm/scripts/qwen_7b/qlora_ddp/infer.sh @@ -3,6 +3,7 @@ CUDA_VISIBLE_DEVICES=0 \ python src/llm_infer.py \ --model_type qwen-7b \ --sft_type lora \ + --template_type chatml \ --dtype bf16 \ --ckpt_dir "runs/qwen-7b/vx_xxx/checkpoint-xxx" \ --eval_human true \ diff --git a/examples/pytorch/llm/scripts/qwen_7b/qlora_ddp/sft.sh b/examples/pytorch/llm/scripts/qwen_7b/qlora_ddp/sft.sh index 8fdc71a2a4..e5bd09af12 100644 --- a/examples/pytorch/llm/scripts/qwen_7b/qlora_ddp/sft.sh +++ b/examples/pytorch/llm/scripts/qwen_7b/qlora_ddp/sft.sh @@ -7,6 +7,7 @@ torchrun \ src/llm_sft.py \ --model_type qwen-7b \ --sft_type lora \ + --template_type chatml \ --dtype bf16 \ --output_dir runs \ --ddp_backend nccl \ diff --git a/examples/pytorch/llm/src/llm_infer.py b/examples/pytorch/llm/src/llm_infer.py index 126c569abf..d9d0ead270 100644 --- a/examples/pytorch/llm/src/llm_infer.py +++ b/examples/pytorch/llm/src/llm_infer.py @@ -18,7 +18,7 @@ @dataclass class InferArguments: model_type: str = field( - default='qwen-7b', metadata={'choices': list(MODEL_MAPPING.keys())}) + default='qwen-7b-chat', metadata={'choices': list(MODEL_MAPPING.keys())}) sft_type: str = field( default='lora', metadata={'choices': ['lora', 'full']}) template_type: str = field( diff --git a/examples/pytorch/llm/src/utils/dataset.py b/examples/pytorch/llm/src/utils/dataset.py index 980da50f63..164878a270 100644 --- a/examples/pytorch/llm/src/utils/dataset.py +++ b/examples/pytorch/llm/src/utils/dataset.py @@ -23,7 +23,6 @@ def _processing_alpaca( inst = f'{inst}\n{inp}' new_instruction.append(inst) dataset = HfDataset.from_dict({ - 'history': [None] * len(new_instruction), 'query': new_instruction, 'response': dataset['output'] }) From 6512bee311307435109995e732c69819d380ae5e Mon Sep 17 00:00:00 2001 From: Jintao Huang Date: Mon, 21 Aug 2023 15:47:24 +0800 Subject: [PATCH 8/9] update readme --- examples/pytorch/llm/README.md | 20 ++++++++++---------- examples/pytorch/llm/README_CN.md | 20 ++++++++++---------- 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/examples/pytorch/llm/README.md b/examples/pytorch/llm/README.md index 8dea971980..cd002cce62 100644 --- a/examples/pytorch/llm/README.md +++ b/examples/pytorch/llm/README.md @@ -59,25 +59,25 @@ cd swift/examples/pytorch/llm # sft(qlora) and infer qwen-7b, Requires 16GB VRAM. # If you want to use quantification, you need to `pip install bitsandbytes` # If you want to push weights into modelscope hub during training, you need to set '--push_to_hub true' -bash scripts/qwen_7b/qlora/sft.sh -bash scripts/qwen_7b/qlora/infer.sh +bash scripts/qwen_7b_chat/qlora/sft.sh +bash scripts/qwen_7b_chat/qlora/infer.sh # sft(qlora+ddp) and infer qwen-7b, Requires 4*16GB VRAM. -bash scripts/qwen_7b/qlora_ddp/sft.sh -bash scripts/qwen_7b/qlora_ddp/infer.sh +bash scripts/qwen_7b_chat/qlora_ddp/sft.sh +bash scripts/qwen_7b_chat/qlora_ddp/infer.sh # sft(lora+ddp) and infer qwen-7b, Requires 4*22GB VRAM. -bash scripts/qwen_7b/lora_ddp/sft.sh -bash scripts/qwen_7b/lora_ddp/infer.sh +bash scripts/qwen_7b_chat/lora_ddp/sft.sh +bash scripts/qwen_7b_chat/lora_ddp/infer.sh # sft(full) and infer qwen-7b, Requires 95GB VRAM. -bash scripts/qwen_7b/full/sft.sh -bash scripts/qwen_7b/full/infer.sh +bash scripts/qwen_7b_chat/full/sft.sh +bash scripts/qwen_7b_chat/full/infer.sh # For more scripts, please see `scripts/` folder ``` -## Extend Models and Datasets +## Extend Datasets 1. If you need to extend the model, you can modify the `MODEL_MAPPING` in `utils/model.py`. `model_id` can be specified as a local path. In this case, `revision` doesn't work. -2. If you need to extend or customize the dataset, you can modify the `DATASET_MAPPING` in `utils/dataset.py`. You need to customize the `get_*_dataset` function, which returns a dataset with two columns: `instruction`, `output`. +2. If you need to extend or customize the dataset, you can modify the `DATASET_MAPPING` in `utils/dataset.py`. You need to customize the `get_*_dataset` function, which returns a dataset with two columns: `query`, `response`. 3. If you need to extend the template, you can modify the `TEMPLATE_MAPPING` in `utils/preprocess.py`. diff --git a/examples/pytorch/llm/README_CN.md b/examples/pytorch/llm/README_CN.md index a5d5235c6e..a47849d222 100644 --- a/examples/pytorch/llm/README_CN.md +++ b/examples/pytorch/llm/README_CN.md @@ -61,25 +61,25 @@ cd swift/examples/pytorch/llm # 微调(qlora)+推理 qwen-7b, 需要16GB显存. # 如果你想要使用量化, 你需要`pip install bitsandbytes` # 如果你想在训练时, 将权重push到modelscope hub中, 你需要设置`--push_to_hub true` -bash scripts/qwen_7b/qlora/sft.sh -bash scripts/qwen_7b/qlora/infer.sh +bash scripts/qwen_7b_chat/qlora/sft.sh +bash scripts/qwen_7b_chat/qlora/infer.sh # 微调(qlora+ddp)+推理 qwen-7b, 需要4卡*16GB显存. -bash scripts/qwen_7b/qlora_ddp/sft.sh -bash scripts/qwen_7b/qlora_ddp/infer.sh +bash scripts/qwen_7b_chat/qlora_ddp/sft.sh +bash scripts/qwen_7b_chat/qlora_ddp/infer.sh # 微调(lora+ddp)+推理 qwen-7b, 需要4卡*22GB显存. -bash scripts/qwen_7b/lora_ddp/sft.sh -bash scripts/qwen_7b/lora_ddp/infer.sh +bash scripts/qwen_7b_chat/lora_ddp/sft.sh +bash scripts/qwen_7b_chat/lora_ddp/infer.sh # 微调(full)+推理 qwen-7b, 需要95G显存. -bash scripts/qwen_7b/full/sft.sh -bash scripts/qwen_7b/full/infer.sh +bash scripts/qwen_7b_chat/full/sft.sh +bash scripts/qwen_7b_chat/full/infer.sh # 更多的scripts脚本, 可以看`scripts`文件夹 ``` -## 拓展模型和数据集 +## 拓展数据集 1. 如果你想要拓展模型, 你可以修改`utils/model.py`文件中的`MODEL_MAPPING`. `model_id`可以指定为本地路径, 这种情况下, `revision`参数不起作用. -2. 如果你想要拓展或使用自定义数据集, 你可以修改`utils/dataset.py`文件中的`DATASET_MAPPING`. 你需要自定义`get_*_dataset`函数, 并返回包含`instruction`, `output`两列的数据集. +2. 如果你想要拓展或使用自定义数据集, 你可以修改`utils/dataset.py`文件中的`DATASET_MAPPING`. 你需要自定义`get_*_dataset`函数, 并返回包含`query`, `response`两列的数据集. 3. 如果你想要拓展template, 你可以修改`utils/preprocess.py`文件中的`TEMPLATE_MAPPING`. From a5d99420b94446d0c609e73b868b4fb287050e61 Mon Sep 17 00:00:00 2001 From: Jintao Huang Date: Tue, 22 Aug 2023 11:00:20 +0800 Subject: [PATCH 9/9] update swift --- swift/trainers/mixin.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/swift/trainers/mixin.py b/swift/trainers/mixin.py index 71c9a2ea8c..af133fe8db 100644 --- a/swift/trainers/mixin.py +++ b/swift/trainers/mixin.py @@ -96,7 +96,7 @@ def init_git_repo(self, at_init: bool = False) -> None: hub_model_id = self.args.hub_model_id assert hub_model_id is not None, 'Please enter a valid hub_model_id' - if '/' not in self.args.hub_model_id: + if '/' not in hub_model_id: user_name = ModelScopeConfig.get_user_info()[0] assert isinstance(user_name, str) hub_model_id = f'{user_name}/{hub_model_id}'