From e15c9f9128edf9855d2a97755753e03ba860fef7 Mon Sep 17 00:00:00 2001 From: Jintao Huang Date: Thu, 16 Nov 2023 02:05:53 +0800 Subject: [PATCH 1/3] support flash_attn --- README.md | 2 +- README_CN.md | 2 +- examples/pytorch/llm/README.md | 5 +- examples/pytorch/llm/README_CN.md | 5 +- swift/llm/utils/argument.py | 3 + swift/llm/utils/model.py | 141 +++++++++++++++++------------- 6 files changed, 90 insertions(+), 68 deletions(-) diff --git a/README.md b/README.md index 990beb9664..d916825fad 100644 --- a/README.md +++ b/README.md @@ -155,9 +155,9 @@ CUDA_VISIBLE_DEVICES=0 swift web-ui --ckpt_dir 'xxx/vx_xxx/checkpoint-xxx' - xverse series: [xverse-7b](https://modelscope.cn/models/xverse/XVERSE-7B/summary), [xverse-7b-chat](https://modelscope.cn/models/xverse/XVERSE-7B-Chat/summary), [xverse-13b](https://modelscope.cn/models/xverse/XVERSE-13B/summary), [xverse-13b-chat](https://modelscope.cn/models/xverse/XVERSE-13B-Chat/summary), [xverse-65b](https://modelscope.cn/models/xverse/XVERSE-65B/summary) - bluelm series: [bluelm-7b](https://modelscope.cn/models/vivo-ai/BlueLM-7B-Base/summary), [bluelm-7b-chat](https://modelscope.cn/models/vivo-ai/BlueLM-7B-Chat/summary), [bluelm-7b-32k](https://modelscope.cn/models/vivo-ai/BlueLM-7B-Base-32K/summary), [bluelm-7b-chat-32k](https://modelscope.cn/models/vivo-ai/BlueLM-7B-Chat-32K/summary) - mistral series: [mistral-7b](https://modelscope.cn/models/AI-ModelScope/Mistral-7B-v0.1/summary), [mistral-7b-chat](https://modelscope.cn/models/AI-ModelScope/Mistral-7B-Instruct-v0.1/summary) + - yi series: [yi-6b](https://modelscope.cn/models/01ai/Yi-6B/summary), [yi-34b](https://modelscope.cn/models/01ai/Yi-34B/summary) - ziya series: [ziya2-13b](https://modelscope.cn/models/Fengshenbang/Ziya2-13B-Base/summary), [ziya2-13b-chat](https://modelscope.cn/models/Fengshenbang/Ziya2-13B-Chat/summary) - skywork series: [skywork-13b](https://modelscope.cn/models/skywork/Skywork-13B-base/summary), [skywork-13b-chat](https://modelscope.cn/models/skywork/Skywork-13B-chat/summary) - - yi series: [yi-6b](https://modelscope.cn/models/01ai/Yi-6B/summary), [yi-34b](https://modelscope.cn/models/01ai/Yi-34B/summary) - other: [polylm-13b](https://modelscope.cn/models/damo/nlp_polylm_13b_text_generation/summary), [seqgpt-560m](https://modelscope.cn/models/damo/nlp_seqgpt-560m/summary) - Supported Datasets: - NLP: diff --git a/README_CN.md b/README_CN.md index 2bac06ccab..a06281a637 100644 --- a/README_CN.md +++ b/README_CN.md @@ -153,9 +153,9 @@ CUDA_VISIBLE_DEVICES=0 swift web-ui --ckpt_dir 'xxx/vx_xxx/checkpoint-xxx' - xverse 系列: [xverse-7b](https://modelscope.cn/models/xverse/XVERSE-7B/summary), [xverse-7b-chat](https://modelscope.cn/models/xverse/XVERSE-7B-Chat/summary), [xverse-13b](https://modelscope.cn/models/xverse/XVERSE-13B/summary), [xverse-13b-chat](https://modelscope.cn/models/xverse/XVERSE-13B-Chat/summary), [xverse-65b](https://modelscope.cn/models/xverse/XVERSE-65B/summary) - bluelm 系列: [bluelm-7b](https://modelscope.cn/models/vivo-ai/BlueLM-7B-Base/summary), [bluelm-7b-chat](https://modelscope.cn/models/vivo-ai/BlueLM-7B-Chat/summary), [bluelm-7b-32k](https://modelscope.cn/models/vivo-ai/BlueLM-7B-Base-32K/summary), [bluelm-7b-chat-32k](https://modelscope.cn/models/vivo-ai/BlueLM-7B-Chat-32K/summary) - mistral 系列: [mistral-7b](https://modelscope.cn/models/AI-ModelScope/Mistral-7B-v0.1/summary), [mistral-7b-chat](https://modelscope.cn/models/AI-ModelScope/Mistral-7B-Instruct-v0.1/summary) + - yi 系列: [yi-6b](https://modelscope.cn/models/01ai/Yi-6B/summary), [yi-34b](https://modelscope.cn/models/01ai/Yi-34B/summary) - ziya 系列: [ziya2-13b](https://modelscope.cn/models/Fengshenbang/Ziya2-13B-Base/summary), [ziya2-13b-chat](https://modelscope.cn/models/Fengshenbang/Ziya2-13B-Chat/summary) - skywork 系列: [skywork-13b](https://modelscope.cn/models/skywork/Skywork-13B-base/summary), [skywork-13b-chat](https://modelscope.cn/models/skywork/Skywork-13B-chat/summary) - - yi 系列: [yi-6b](https://modelscope.cn/models/01ai/Yi-6B/summary), [yi-34b](https://modelscope.cn/models/01ai/Yi-34B/summary) - other: [polylm-13b](https://modelscope.cn/models/damo/nlp_polylm_13b_text_generation/summary), [seqgpt-560m](https://modelscope.cn/models/damo/nlp_seqgpt-560m/summary) - 支持的数据集: - NLP: diff --git a/examples/pytorch/llm/README.md b/examples/pytorch/llm/README.md index 7c4ebb21db..5960311fc9 100644 --- a/examples/pytorch/llm/README.md +++ b/examples/pytorch/llm/README.md @@ -26,9 +26,10 @@ - llama series: [llama2-7b](https://modelscope.cn/models/modelscope/Llama-2-7b-ms/summary), [llama2-7b-chat](https://modelscope.cn/models/modelscope/Llama-2-7b-chat-ms/summary), [llama2-13b](https://modelscope.cn/models/modelscope/Llama-2-13b-ms/summary), [llama2-13b-chat](https://modelscope.cn/models/modelscope/Llama-2-13b-chat-ms/summary), [llama2-70b](https://modelscope.cn/models/modelscope/Llama-2-70b-ms/summary), [llama2-70b-chat](https://modelscope.cn/models/modelscope/Llama-2-70b-chat-ms/summary) - openbuddy series: [openbuddy-llama2-13b-chat](https://modelscope.cn/models/OpenBuddy/openbuddy-llama2-13b-v8.1-fp16/summary), [openbuddy-llama-65b-chat](https://modelscope.cn/models/OpenBuddy/openbuddy-llama-65b-v8-bf16/summary), [openbuddy-llama2-70b-chat](https://modelscope.cn/models/OpenBuddy/openbuddy-llama2-70b-v10.1-bf16/summary), [openbuddy-mistral-7b-chat](https://modelscope.cn/models/OpenBuddy/openbuddy-mistral-7b-v13.1/summary) - internlm series: [internlm-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm-7b/summary), [internlm-7b-chat](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm-chat-7b-v1_1/summary), [internlm-7b-chat-8k](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm-chat-7b-8k/summary), [internlm-20b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm-20b/summary), [internlm-20b-chat](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm-chat-20b/summary) - - xverse series: [xverse-7b](https://modelscope.cn/models/xverse/XVERSE-7B/summary), [xverse-7b-chat](https://modelscope.cn/models/xverse/XVERSE-7B-Chat/summary), [xverse-13b](https://modelscope.cn/models/xverse/XVERSE-13B/summary), [xverse-13b-chat](https://modelscope.cn/models/xverse/XVERSE-13B-Chat/summary) + - xverse series: [xverse-7b](https://modelscope.cn/models/xverse/XVERSE-7B/summary), [xverse-7b-chat](https://modelscope.cn/models/xverse/XVERSE-7B-Chat/summary), [xverse-13b](https://modelscope.cn/models/xverse/XVERSE-13B/summary), [xverse-13b-chat](https://modelscope.cn/models/xverse/XVERSE-13B-Chat/summary), [xverse-65b](https://modelscope.cn/models/xverse/XVERSE-65B/summary) - bluelm series: [bluelm-7b](https://modelscope.cn/models/vivo-ai/BlueLM-7B-Base/summary), [bluelm-7b-chat](https://modelscope.cn/models/vivo-ai/BlueLM-7B-Chat/summary), [bluelm-7b-32k](https://modelscope.cn/models/vivo-ai/BlueLM-7B-Base-32K/summary), [bluelm-7b-chat-32k](https://modelscope.cn/models/vivo-ai/BlueLM-7B-Chat-32K/summary) - mistral series: [mistral-7b](https://modelscope.cn/models/AI-ModelScope/Mistral-7B-v0.1/summary), [mistral-7b-chat](https://modelscope.cn/models/AI-ModelScope/Mistral-7B-Instruct-v0.1/summary) + - yi series: [yi-6b](https://modelscope.cn/models/01ai/Yi-6B/summary), [yi-34b](https://modelscope.cn/models/01ai/Yi-34B/summary) - ziya series: [ziya2-13b](https://modelscope.cn/models/Fengshenbang/Ziya2-13B-Base/summary), [ziya2-13b-chat](https://modelscope.cn/models/Fengshenbang/Ziya2-13B-Chat/summary) - skywork series: [skywork-13b](https://modelscope.cn/models/skywork/Skywork-13B-base/summary), [skywork-13b-chat](https://modelscope.cn/models/skywork/Skywork-13B-chat/summary) - other: [polylm-13b](https://modelscope.cn/models/damo/nlp_polylm_13b_text_generation/summary), [seqgpt-560m](https://modelscope.cn/models/damo/nlp_seqgpt-560m/summary) @@ -146,7 +147,7 @@ CUDA_VISIBLE_DEVICES=0 swift web-ui --ckpt_dir 'xxx/vx_xxx/checkpoint-xxx' ## 🌟 Run SFT and Inference Performace: full(nice) > lora > qlora(auto_gptq) > qlora(bnb) -Training GPU memory: qlora(low,3090) > lora > full(2*A100) +Training GPU memory: full(high,2*A100) > lora > qlora(low,3090) **Tips**: - You can set `--gradient_checkpointing true` during training to **save GPU memory**, but this will slightly decrease the training speed. This is useful if you need to train LLM on **consumer-grade GPU**, e.g. 3090. diff --git a/examples/pytorch/llm/README_CN.md b/examples/pytorch/llm/README_CN.md index 2ad776404f..c06b47af4c 100644 --- a/examples/pytorch/llm/README_CN.md +++ b/examples/pytorch/llm/README_CN.md @@ -26,9 +26,10 @@ - llama 系列: [llama2-7b](https://modelscope.cn/models/modelscope/Llama-2-7b-ms/summary), [llama2-7b-chat](https://modelscope.cn/models/modelscope/Llama-2-7b-chat-ms/summary), [llama2-13b](https://modelscope.cn/models/modelscope/Llama-2-13b-ms/summary), [llama2-13b-chat](https://modelscope.cn/models/modelscope/Llama-2-13b-chat-ms/summary), [llama2-70b](https://modelscope.cn/models/modelscope/Llama-2-70b-ms/summary), [llama2-70b-chat](https://modelscope.cn/models/modelscope/Llama-2-70b-chat-ms/summary) - openbuddy 系列: [openbuddy-llama2-13b-chat](https://modelscope.cn/models/OpenBuddy/openbuddy-llama2-13b-v8.1-fp16/summary), [openbuddy-llama-65b-chat](https://modelscope.cn/models/OpenBuddy/openbuddy-llama-65b-v8-bf16/summary), [openbuddy-llama2-70b-chat](https://modelscope.cn/models/OpenBuddy/openbuddy-llama2-70b-v10.1-bf16/summary), [openbuddy-mistral-7b-chat](https://modelscope.cn/models/OpenBuddy/openbuddy-mistral-7b-v13.1/summary) - internlm 系列: [internlm-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm-7b/summary), [internlm-7b-chat](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm-chat-7b-v1_1/summary), [internlm-7b-chat-8k](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm-chat-7b-8k/summary), [internlm-20b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm-20b/summary), [internlm-20b-chat](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm-chat-20b/summary) - - xverse 系列: [xverse-7b](https://modelscope.cn/models/xverse/XVERSE-7B/summary), [xverse-7b-chat](https://modelscope.cn/models/xverse/XVERSE-7B-Chat/summary), [xverse-13b](https://modelscope.cn/models/xverse/XVERSE-13B/summary), [xverse-13b-chat](https://modelscope.cn/models/xverse/XVERSE-13B-Chat/summary) + - xverse 系列: [xverse-7b](https://modelscope.cn/models/xverse/XVERSE-7B/summary), [xverse-7b-chat](https://modelscope.cn/models/xverse/XVERSE-7B-Chat/summary), [xverse-13b](https://modelscope.cn/models/xverse/XVERSE-13B/summary), [xverse-13b-chat](https://modelscope.cn/models/xverse/XVERSE-13B-Chat/summary), [xverse-65b](https://modelscope.cn/models/xverse/XVERSE-65B/summary) - bluelm 系列: [bluelm-7b](https://modelscope.cn/models/vivo-ai/BlueLM-7B-Base/summary), [bluelm-7b-chat](https://modelscope.cn/models/vivo-ai/BlueLM-7B-Chat/summary), [bluelm-7b-32k](https://modelscope.cn/models/vivo-ai/BlueLM-7B-Base-32K/summary), [bluelm-7b-chat-32k](https://modelscope.cn/models/vivo-ai/BlueLM-7B-Chat-32K/summary) - mistral 系列: [mistral-7b](https://modelscope.cn/models/AI-ModelScope/Mistral-7B-v0.1/summary), [mistral-7b-chat](https://modelscope.cn/models/AI-ModelScope/Mistral-7B-Instruct-v0.1/summary) + - yi 系列: [yi-6b](https://modelscope.cn/models/01ai/Yi-6B/summary), [yi-34b](https://modelscope.cn/models/01ai/Yi-34B/summary) - ziya 系列: [ziya2-13b](https://modelscope.cn/models/Fengshenbang/Ziya2-13B-Base/summary), [ziya2-13b-chat](https://modelscope.cn/models/Fengshenbang/Ziya2-13B-Chat/summary) - skywork 系列: [skywork-13b](https://modelscope.cn/models/skywork/Skywork-13B-base/summary), [skywork-13b-chat](https://modelscope.cn/models/skywork/Skywork-13B-chat/summary) - other: [polylm-13b](https://modelscope.cn/models/damo/nlp_polylm_13b_text_generation/summary), [seqgpt-560m](https://modelscope.cn/models/damo/nlp_seqgpt-560m/summary) @@ -146,7 +147,7 @@ CUDA_VISIBLE_DEVICES=0 swift web-ui --ckpt_dir 'xxx/vx_xxx/checkpoint-xxx' ## 🌟 微调和推理 性能: full(优) > lora > qlora(auto_gptq) > qlora(bnb) -训练显存: qlora(低,3090) > lora > full(2*A100) +训练显存: full(高,2*A100) > lora > qlora(低,3090) **提示**: - 你可以在训练时设置`--gradient_checkpointing true`来**节约显存**, 但这会略微降低训练速度. 如果你需要在**消费级显卡**中训练大模型, 这很有用, 例如: 3090. diff --git a/swift/llm/utils/argument.py b/swift/llm/utils/argument.py index 8b1ce0194f..b4ac729d0f 100644 --- a/swift/llm/utils/argument.py +++ b/swift/llm/utils/argument.py @@ -222,6 +222,7 @@ def __post_init__(self) -> None: self.deepspeed = None if self.deepspeed_config_path is not None: + require_version('deepspeed') with open(self.deepspeed_config_path, 'r') as f: self.deepspeed = json.load(f) logger.info(f'Using deepspeed: {self.deepspeed}') @@ -397,8 +398,10 @@ def select_bnb( torch.float16, torch.bfloat16, torch.float32 } if quantization_bit == 4: + require_version('bitsandbytes') load_in_4bit, load_in_8bit = True, False elif quantization_bit == 8: + require_version('bitsandbytes') load_in_4bit, load_in_8bit = False, True else: load_in_4bit, load_in_8bit = False, False diff --git a/swift/llm/utils/model.py b/swift/llm/utils/model.py index 47dac9fdb5..e1a85f04c6 100644 --- a/swift/llm/utils/model.py +++ b/swift/llm/utils/model.py @@ -93,6 +93,9 @@ class ModelType: # mistral mistral_7b = 'mistral-7b' mistral_7b_chat = 'mistral-7b-chat' + # yi + yi_6b = 'yi-6b' + yi_34b = 'yi-34b' # ziya ziya2_13b = 'ziya2-13b' ziya2_13b_chat = 'ziya2-13b-chat' @@ -102,8 +105,6 @@ class ModelType: # other polylm_13b = 'polylm-13b' seqgpt_560m = 'seqgpt-560m' - yi_6b = 'yi-6b' - yi_34b = 'yi-34b' class LoRATM(NamedTuple): @@ -191,28 +192,8 @@ def _register_model( LoRATM.bluelm, TemplateType.default_generation) @register_model(ModelType.bluelm_7b, 'vivo-ai/BlueLM-7B-Base', LoRATM.bluelm, TemplateType.default_generation) -@register_model(ModelType.yi_34b, '01ai/Yi-34B', LoRATM.yi, - TemplateType.default_generation) -@register_model(ModelType.yi_6b, '01ai/Yi-6B', LoRATM.yi, - TemplateType.default_generation) @register_model(ModelType.seqgpt_560m, 'damo/nlp_seqgpt-560m', LoRATM.bloom, TemplateType.default_generation) -@register_model(ModelType.ziya2_13b_chat, 'Fengshenbang/Ziya2-13B-Chat', - LoRATM.ziya, TemplateType.ziya) -@register_model(ModelType.ziya2_13b, 'Fengshenbang/Ziya2-13B-Base', - LoRATM.ziya, TemplateType.default_generation) -@register_model( - ModelType.mistral_7b_chat, - 'AI-ModelScope/Mistral-7B-Instruct-v0.1', - LoRATM.mistral, - TemplateType.llama, - requires=['transformers>=4.34']) -@register_model( - ModelType.mistral_7b, - 'AI-ModelScope/Mistral-7B-v0.1', - LoRATM.mistral, - TemplateType.default_generation, - requires=['transformers>=4.34']) @register_model(ModelType.xverse_13b_chat, 'xverse/XVERSE-13B-Chat', LoRATM.xverse, TemplateType.xverse) @register_model(ModelType.xverse_13b, 'xverse/XVERSE-13B', LoRATM.xverse, @@ -236,39 +217,6 @@ def _register_model( LoRATM.internlm, TemplateType.internlm) @register_model(ModelType.internlm_7b, 'Shanghai_AI_Laboratory/internlm-7b', LoRATM.internlm, TemplateType.default_generation) -@register_model( - ModelType.openbuddy_mistral_7b_chat, - 'OpenBuddy/openbuddy-mistral-7b-v13.1', - LoRATM.mistral, - TemplateType.openbuddy, - requires=['transformers>=4.34']) -@register_model(ModelType.openbuddy_llama2_70b_chat, - 'OpenBuddy/openbuddy-llama2-70b-v10.1-bf16', LoRATM.llama2, - TemplateType.openbuddy) -@register_model(ModelType.openbuddy_llama2_65b_chat, - 'OpenBuddy/openbuddy-llama-65b-v8-bf16', LoRATM.llama2, - TemplateType.openbuddy) -@register_model(ModelType.openbuddy_llama2_13b_chat, - 'OpenBuddy/openbuddy-llama2-13b-v8.1-fp16', LoRATM.llama2, - TemplateType.openbuddy) -@register_model( - ModelType.llama2_7b_chat, - 'modelscope/Llama-2-7b-chat-ms', - LoRATM.llama2, - TemplateType.llama, - ignore_file_pattern=[r'.+\.bin$']) -@register_model( - ModelType.llama2_70b, - 'modelscope/Llama-2-70b-ms', - LoRATM.llama2, - TemplateType.default_generation, - ignore_file_pattern=[r'.+\.bin$']) -@register_model( - ModelType.llama2_7b, - 'modelscope/Llama-2-7b-ms', - LoRATM.llama2, - TemplateType.default_generation, - ignore_file_pattern=[r'.+\.bin$']) @register_model( ModelType.baichuan_13b_chat, 'baichuan-inc/Baichuan-13B-Chat', @@ -492,9 +440,77 @@ def cross_entropy_forward(self, inputs: Tensor, return model, tokenizer +@register_model(ModelType.yi_34b, '01ai/Yi-34B', LoRATM.yi, + TemplateType.default_generation) +@register_model(ModelType.yi_6b, '01ai/Yi-6B', LoRATM.yi, + TemplateType.default_generation) +@register_model(ModelType.ziya2_13b_chat, 'Fengshenbang/Ziya2-13B-Chat', + LoRATM.ziya, TemplateType.ziya) +@register_model(ModelType.ziya2_13b, 'Fengshenbang/Ziya2-13B-Base', + LoRATM.ziya, TemplateType.default_generation) @register_model( - ModelType.llama2_70b_chat, - 'modelscope/Llama-2-70b-chat-ms', + ModelType.openbuddy_mistral_7b_chat, + 'OpenBuddy/openbuddy-mistral-7b-v13.1', + LoRATM.mistral, + TemplateType.openbuddy, + requires=['transformers>=4.34']) +@register_model(ModelType.openbuddy_llama2_70b_chat, + 'OpenBuddy/openbuddy-llama2-70b-v10.1-bf16', LoRATM.llama2, + TemplateType.openbuddy) +@register_model(ModelType.openbuddy_llama2_65b_chat, + 'OpenBuddy/openbuddy-llama-65b-v8-bf16', LoRATM.llama2, + TemplateType.openbuddy) +@register_model(ModelType.openbuddy_llama2_13b_chat, + 'OpenBuddy/openbuddy-llama2-13b-v8.1-fp16', LoRATM.llama2, + TemplateType.openbuddy) +@register_model( + ModelType.mistral_7b_chat, + 'AI-ModelScope/Mistral-7B-Instruct-v0.1', + LoRATM.mistral, + TemplateType.llama, + requires=['transformers>=4.34']) +@register_model( + ModelType.mistral_7b, + 'AI-ModelScope/Mistral-7B-v0.1', + LoRATM.mistral, + TemplateType.default_generation, + requires=['transformers>=4.34']) +def get_model_tokenizer_with_flash_attn(model_dir: str, + torch_dtype: Dtype, + model_kwargs: Dict[str, Any], + load_model: bool = True, + model_config=None, + **kwargs): + if model_config is None: + model_config = AutoConfig.from_pretrained( + model_dir, trust_remote_code=True) + _flash_attn_2_enabled = kwargs.pop('use_flash_attn', False) + model_config._flash_attn_2_enabled = _flash_attn_2_enabled + return get_model_tokenizer_from_repo(model_dir, torch_dtype, model_kwargs, + load_model, model_config, **kwargs) + + +@register_model( + ModelType.llama2_7b, + 'modelscope/Llama-2-7b-ms', + LoRATM.llama2, + TemplateType.default_generation, + ignore_file_pattern=[r'.+\.bin$']) +@register_model( + ModelType.llama2_13b, + 'modelscope/Llama-2-13b-ms', + LoRATM.llama2, + TemplateType.default_generation, + ignore_file_pattern=[r'.+\.bin$']) +@register_model( + ModelType.llama2_70b, + 'modelscope/Llama-2-70b-ms', + LoRATM.llama2, + TemplateType.default_generation, + ignore_file_pattern=[r'.+\.bin$']) +@register_model( + ModelType.llama2_7b_chat, + 'modelscope/Llama-2-7b-chat-ms', LoRATM.llama2, TemplateType.llama, ignore_file_pattern=[r'.+\.bin$']) @@ -505,10 +521,10 @@ def cross_entropy_forward(self, inputs: Tensor, TemplateType.llama, ignore_file_pattern=[r'.+\.bin$']) @register_model( - ModelType.llama2_13b, - 'modelscope/Llama-2-13b-ms', + ModelType.llama2_70b_chat, + 'modelscope/Llama-2-70b-chat-ms', LoRATM.llama2, - TemplateType.default_generation, + TemplateType.llama, ignore_file_pattern=[r'.+\.bin$']) def get_model_tokenizer_llama2(model_dir: str, torch_dtype: Dtype, @@ -518,8 +534,9 @@ def get_model_tokenizer_llama2(model_dir: str, model_config = AutoConfig.from_pretrained( model_dir, trust_remote_code=True) model_config.pretraining_tp = 1 - return get_model_tokenizer_from_repo(model_dir, torch_dtype, model_kwargs, - load_model, model_config, **kwargs) + return get_model_tokenizer_with_flash_attn(model_dir, torch_dtype, + model_kwargs, load_model, + model_config, **kwargs) @register_model(ModelType.polylm_13b, 'damo/nlp_polylm_13b_text_generation', From e85398c6271064b5fe0bf1e9ec57ebac9e5cf751 Mon Sep 17 00:00:00 2001 From: Jintao Huang Date: Thu, 16 Nov 2023 02:08:50 +0800 Subject: [PATCH 2/3] update readme --- examples/pytorch/llm/README.md | 2 +- examples/pytorch/llm/README_CN.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/pytorch/llm/README.md b/examples/pytorch/llm/README.md index 5960311fc9..2fa64f2665 100644 --- a/examples/pytorch/llm/README.md +++ b/examples/pytorch/llm/README.md @@ -147,7 +147,7 @@ CUDA_VISIBLE_DEVICES=0 swift web-ui --ckpt_dir 'xxx/vx_xxx/checkpoint-xxx' ## 🌟 Run SFT and Inference Performace: full(nice) > lora > qlora(auto_gptq) > qlora(bnb) -Training GPU memory: full(high,2*A100) > lora > qlora(low,3090) +Training GPU memory: qlora(low,3090) < lora < full(high,2*A100) **Tips**: - You can set `--gradient_checkpointing true` during training to **save GPU memory**, but this will slightly decrease the training speed. This is useful if you need to train LLM on **consumer-grade GPU**, e.g. 3090. diff --git a/examples/pytorch/llm/README_CN.md b/examples/pytorch/llm/README_CN.md index c06b47af4c..a3d54cde46 100644 --- a/examples/pytorch/llm/README_CN.md +++ b/examples/pytorch/llm/README_CN.md @@ -147,7 +147,7 @@ CUDA_VISIBLE_DEVICES=0 swift web-ui --ckpt_dir 'xxx/vx_xxx/checkpoint-xxx' ## 🌟 微调和推理 性能: full(优) > lora > qlora(auto_gptq) > qlora(bnb) -训练显存: full(高,2*A100) > lora > qlora(低,3090) +训练显存: qlora(低,3090) < lora < full(高,2*A100) **提示**: - 你可以在训练时设置`--gradient_checkpointing true`来**节约显存**, 但这会略微降低训练速度. 如果你需要在**消费级显卡**中训练大模型, 这很有用, 例如: 3090. From f76c7c28cf976b832197ecefbd0c2bc9de7997da Mon Sep 17 00:00:00 2001 From: Jintao Huang Date: Thu, 16 Nov 2023 11:29:39 +0800 Subject: [PATCH 3/3] update model.py --- .dev_scripts/ci_container_test.sh | 2 +- swift/llm/utils/argument.py | 20 +++++++++++--------- swift/llm/utils/model.py | 12 ++++++------ 3 files changed, 18 insertions(+), 16 deletions(-) diff --git a/.dev_scripts/ci_container_test.sh b/.dev_scripts/ci_container_test.sh index 173a8a483f..fcc7869ed3 100644 --- a/.dev_scripts/ci_container_test.sh +++ b/.dev_scripts/ci_container_test.sh @@ -24,7 +24,7 @@ if [ "$MODELSCOPE_SDK_DEBUG" == "True" ]; then # test with install pip install . - pip install auto_gptq -U -i https://mirrors.aliyun.com/pypi/simple/ + pip install auto_gptq bitsandbytes deepspeed -U -i https://mirrors.aliyun.com/pypi/simple/ else echo "Running case in release image, run case directly!" fi diff --git a/swift/llm/utils/argument.py b/swift/llm/utils/argument.py index 6743d06bf4..45cd1afc7f 100644 --- a/swift/llm/utils/argument.py +++ b/swift/llm/utils/argument.py @@ -36,10 +36,11 @@ class SftArguments: metadata={'choices': ['lora', 'longlora', 'qalora', 'full']}) tuner_backend: str = field( default='swift', metadata={'choices': ['swift', 'peft']}) - template_type: Optional[str] = field( - default=None, + template_type: str = field( + default='AUTO', metadata={ - 'help': f'template_type choices: {list(TEMPLATE_MAPPING.keys())}' + 'help': + f"template_type choices: {list(TEMPLATE_MAPPING.keys()) + ['AUTO']}" }) output_dir: str = 'output' add_output_dir_suffix: bool = True @@ -190,7 +191,7 @@ def __post_init__(self) -> None: else: raise ValueError(f'sft_type: {self.sft_type}') - if self.template_type is None: + if self.template_type == 'AUTO': self.template_type = MODEL_MAPPING[self.model_type]['template'] logger.info(f'Setting template_type: {self.template_type}') if self.dataset is None: @@ -244,10 +245,11 @@ class InferArguments: sft_type: str = field( default='lora', metadata={'choices': ['lora', 'longlora', 'qalora', 'full']}) - template_type: Optional[str] = field( - default=None, + template_type: str = field( + default='AUTO', metadata={ - 'help': f'template_type choices: {list(TEMPLATE_MAPPING.keys())}' + 'help': + f"template_type choices: {list(TEMPLATE_MAPPING.keys()) + ['AUTO']}" }) ckpt_dir: Optional[str] = field( default=None, metadata={'help': '/path/to/your/vx_xxx/checkpoint-xxx'}) @@ -313,7 +315,7 @@ def __post_init__(self) -> None: handle_path(self) self.torch_dtype, _, _ = select_dtype(self) - if self.template_type is None: + if self.template_type == 'AUTO': self.template_type = MODEL_MAPPING[self.model_type]['template'] logger.info(f'Setting template_type: {self.template_type}') if self.dataset is None: @@ -346,7 +348,7 @@ def __post_init__(self) -> None: handle_path(self) self.torch_dtype, _, _ = select_dtype(self) - if self.template_type is None: + if self.template_type == 'AUTO': self.template_type = MODEL_MAPPING[self.model_type]['template'] logger.info(f'Setting template_type: {self.template_type}') diff --git a/swift/llm/utils/model.py b/swift/llm/utils/model.py index e1a85f04c6..fc29f6cc4c 100644 --- a/swift/llm/utils/model.py +++ b/swift/llm/utils/model.py @@ -115,12 +115,12 @@ class LoRATM(NamedTuple): qwen = ['c_attn'] polylm = ['c_attn'] bloom = ['query_key_value'] - internlm = ['q_proj', 'k_proj', 'v_proj'] - xverse = ['q_proj', 'k_proj', 'v_proj'] - mistral = ['q_proj', 'k_proj', 'v_proj'] - ziya = ['q_proj', 'k_proj', 'v_proj'] - yi = ['q_proj', 'k_proj', 'v_proj'] - bluelm = ['q_proj', 'k_proj', 'v_proj'] + internlm = llama2 + xverse = llama2 + mistral = llama2 + ziya = llama2 + yi = llama2 + bluelm = llama2 GetModelTokenizerFunction = Callable[..., Tuple[Optional[PreTrainedModel],