From 0495c4c708035371f95ed36a00c4e8a0835dc7b8 Mon Sep 17 00:00:00 2001 From: tastelikefeet Date: Wed, 10 Jan 2024 18:45:35 +0800 Subject: [PATCH 1/6] fix --- .../llm/scripts/dpo/{lora => lora_ddp_mp}/dpo.sh | 14 ++++++++++---- .../llm/scripts/dpo/{lora => lora_ddp_mp}/infer.sh | 0 swift/llm/dpo.py | 4 +++- swift/llm/tuner.py | 2 +- swift/tuners/base.py | 4 ++-- swift/tuners/neftune.py | 2 +- 6 files changed, 17 insertions(+), 9 deletions(-) rename examples/pytorch/llm/scripts/dpo/{lora => lora_ddp_mp}/dpo.sh (76%) rename examples/pytorch/llm/scripts/dpo/{lora => lora_ddp_mp}/infer.sh (100%) diff --git a/examples/pytorch/llm/scripts/dpo/lora/dpo.sh b/examples/pytorch/llm/scripts/dpo/lora_ddp_mp/dpo.sh similarity index 76% rename from examples/pytorch/llm/scripts/dpo/lora/dpo.sh rename to examples/pytorch/llm/scripts/dpo/lora_ddp_mp/dpo.sh index a1949b6ae3..c667dff744 100644 --- a/examples/pytorch/llm/scripts/dpo/lora/dpo.sh +++ b/examples/pytorch/llm/scripts/dpo/lora_ddp_mp/dpo.sh @@ -1,7 +1,13 @@ -# Experimental environment: 8*A100 -# Memory usage: 8 * 50G +# Experimental environment: 4*A100 +# Memory usage: 4 * 20G +nproc_per_node=2 + PYTHONPATH=../../.. \ -accelerate launch llm_dpo.py \ +CUDA_VISIBLE_DEVICES=0,1,2,3 \ +torchrun \ + --nproc_per_node=$nproc_per_node \ + --master_port 29500 \ + llm_dpo.py \ --model_type mistral-7b \ --ref_model_type mistral-7b \ --model_revision master \ @@ -25,7 +31,7 @@ accelerate launch llm_dpo.py \ --batch_size 1 \ --weight_decay 0.01 \ --learning_rate 5e-5 \ - --gradient_accumulation_steps 16 \ + --gradient_accumulation_steps $(expr 16 / $nproc_per_node) \ --max_grad_norm 1.0 \ --warmup_ratio 0.03 \ --eval_steps 2000 \ diff --git a/examples/pytorch/llm/scripts/dpo/lora/infer.sh b/examples/pytorch/llm/scripts/dpo/lora_ddp_mp/infer.sh similarity index 100% rename from examples/pytorch/llm/scripts/dpo/lora/infer.sh rename to examples/pytorch/llm/scripts/dpo/lora_ddp_mp/infer.sh diff --git a/swift/llm/dpo.py b/swift/llm/dpo.py index 580e15ed85..7c88ce3649 100644 --- a/swift/llm/dpo.py +++ b/swift/llm/dpo.py @@ -31,7 +31,7 @@ def llm_dpo(args: DPOArguments) -> str: # Loading Model and Tokenizer model_kwargs = {'low_cpu_mem_usage': True} - if (is_dist() and not is_ddp_plus_mp()) or 'HF_ACCELERATOR' in os.environ: + if is_dist() and not is_ddp_plus_mp(): model_kwargs['device_map'] = {'': local_rank} else: model_kwargs['device_map'] = 'auto' @@ -61,6 +61,8 @@ def llm_dpo(args: DPOArguments) -> str: ref_model = deepcopy(model) logger.info(f'model_config: {model.config}') + if hasattr(model, 'hf_device_map'): + logger.info(f'model device_map {model.hf_device_map}') generation_config = GenerationConfig( max_new_tokens=args.max_new_tokens, temperature=args.temperature, diff --git a/swift/llm/tuner.py b/swift/llm/tuner.py index 7bd93beb0e..95989c8812 100644 --- a/swift/llm/tuner.py +++ b/swift/llm/tuner.py @@ -85,7 +85,7 @@ def prepare_model(model, args: SftArguments): if args.neftune_alpha > 0.001: neftune_config = NEFTuneConfig(noise_alpha=args.neftune_alpha) - model = Swift.prepare_model(model, neftune_config) + model = Swift.prepare_model(model, {'neftune': neftune_config}) logger.info(f'neftune_config: {neftune_config}') class TrainerAdapterCallback(TrainerCallback): diff --git a/swift/tuners/base.py b/swift/tuners/base.py index d6810445a5..6e1f0edab7 100644 --- a/swift/tuners/base.py +++ b/swift/tuners/base.py @@ -56,7 +56,7 @@ def __init__(self, new_adapters.append(DEFAULT_ADAPTER) else: logger.warn( - f'Adater {DEFAULT_ADAPTER} has been patched, skip.') + f'Adapter {DEFAULT_ADAPTER} has been patched, skip.') elif isinstance(config, dict): assert (all(isinstance(c, SwiftConfig) for c in config.values())) for adapter_name, _config in config.items(): @@ -66,7 +66,7 @@ def __init__(self, new_adapters.append(adapter_name) else: logger.warn( - f'Adater {adapter_name} has been patched, skip.') + f'Adapter {adapter_name} has been patched, skip.') self.model = model self.extra_state_keys = extra_state_keys or [] diff --git a/swift/tuners/neftune.py b/swift/tuners/neftune.py index 300f6646e2..e49924e53a 100644 --- a/swift/tuners/neftune.py +++ b/swift/tuners/neftune.py @@ -55,7 +55,7 @@ def neftune_hook(module, args, output): sub_module.nef_activated = True def state_dict_callback(state_dict, adapter_name): - return state_dict + return {} def mark_trainable_callback(model): return From 1652466d59a89aacf98a39456de9b87da57dde35 Mon Sep 17 00:00:00 2001 From: "yuze.zyz" Date: Wed, 10 Jan 2024 19:36:13 +0800 Subject: [PATCH 2/6] add docs --- README.md | 4 +- README_CN.md | 4 +- ...55\347\273\203\346\216\250\347\220\206.md" | 5 ++ ...56\350\260\203\346\226\207\346\241\243.md" | 85 ++++++++++++++++++- ...50\347\220\206\346\226\207\346\241\243.md" | 23 ++++- ...44\350\241\214\345\217\202\346\225\260.md" | 7 ++ ...11\344\270\216\346\213\223\345\261\225.md" | 9 ++ 7 files changed, 129 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 7a9ef05053..4561df6bbe 100644 --- a/README.md +++ b/README.md @@ -65,7 +65,7 @@ Users can check the [documentation of SWIFT](docs/source/GetStarted/快速使用 - 2023.1.4: Support for **VLLM deployment**, compatible with the **OpenAI API** style. For more details, please refer to [VLLM Inference Acceleration and Deployment](https://github.com/modelscope/swift/blob/main/docs/source/LLM/VLLM推理加速与部署.md#部署) - 2023.1.4: Update [Benchmark](https://github.com/modelscope/swift/blob/main/docs/source/LLM/Benchmark.md) to facilitate viewing the training speed and GPU memory required for different models. - 🔥 2023.12.29: Support web-ui for training and inference, use `swift web-ui` after the installation of ms-swift. -- 🔥 2023.12.29: Support DPO RLHF(Reinforcement Learning from Human Feedback) and two datasets: AI-ModelScope/stack-exchange-paired and AI-ModelScope/hh-rlhf for this task. Use [this script](https://github.com/modelscope/swift/blob/v1.5.0/examples/pytorch/llm/scripts/dpo/lora/dpo.sh) to start training! +- 🔥 2023.12.29: Support DPO RLHF(Reinforcement Learning from Human Feedback) and two datasets: AI-ModelScope/stack-exchange-paired and AI-ModelScope/hh-rlhf for this task. Use [this script](https://github.com/modelscope/swift/blob/v1.5.0/examples/pytorch/llm/scripts/dpo/lora_ddp_mp/dpo.sh) to start training! - 🔥 2023.12.28: Support SCEdit! This framework can easily reduce memory usage in training and inference, and replace ControlNet for controllable image generating scenarios, view the following chapter for details. - 2023.12.23: Support [codegeex2-6b](https://github.com/modelscope/swift/tree/main/examples/pytorch/llm/scripts/codegeex2_6b). - 2023.12.19: Support [phi2-3b](https://github.com/modelscope/swift/tree/main/examples/pytorch/llm/scripts/phi2_3b). @@ -113,7 +113,7 @@ Users can check the [documentation of SWIFT](docs/source/GetStarted/快速使用 - Quickly perform **inference** on LLM and build a **Web-UI**, see the [LLM Inference Documentation](https://github.com/modelscope/swift/blob/main/docs/source/LLM/LLM推理文档.md). - Rapidly **fine-tune** and perform inference on LLM, and build a Web-UI, see the [LLM Fine-tuning Documentation](https://github.com/modelscope/swift/blob/main/docs/source/LLM/LLM微调文档.md). - Using **interface** to fine-tuning and perform inference, see the [WEB-UI Documentation](https://github.com/modelscope/swift/blob/main/docs/source/GetStarted/%E7%95%8C%E9%9D%A2%E8%AE%AD%E7%BB%83%E6%8E%A8%E7%90%86.md). -- **DPO training** supported, start by using [this script](https://github.com/modelscope/swift/blob/v1.5.0/examples/pytorch/llm/scripts/dpo/lora/dpo.sh). +- **DPO training** supported, start by using [this script](https://github.com/modelscope/swift/blob/v1.5.0/examples/pytorch/llm/scripts/dpo/lora_ddp_mp/dpo.sh). - Utilize VLLM for **inference acceleration** and **deployment(OpenAI API)**. Please refer to [VLLM Inference Acceleration and Deployment](https://github.com/modelscope/swift/blob/main/docs/source/LLM/VLLM推理加速与部署.md) for more information. - View the models and datasets supported by Swift. You can check [supported models and datasets](https://github.com/modelscope/swift/blob/main/docs/source/LLM/支持的模型和数据集.md). - Expand and customize models, datasets, and dialogue templates in Swift, see [Customization and Expansion](https://github.com/modelscope/swift/blob/main/docs/source/LLM/自定义与拓展.md). diff --git a/README_CN.md b/README_CN.md index fe3bd5691f..d2bf279788 100644 --- a/README_CN.md +++ b/README_CN.md @@ -63,7 +63,7 @@ SWIFT(Scalable lightWeight Infrastructure for Fine-Tuning)是一个可扩展 - 2023.1.4: 支持**VLLM部署**, 兼容**OpenAI API**样式, 具体可以查看[VLLM推理加速与部署](https://github.com/modelscope/swift/blob/main/docs/source/LLM/VLLM推理加速与部署.md#部署). - 2023.1.4: 更新[Benchmark](https://github.com/modelscope/swift/blob/main/docs/source/LLM/Benchmark.md), 方便查看不同模型训练的速度和所需显存. - 🔥 2023.12.29: 支持web-ui进行sft训练和推理,安装ms-swift后使用`swift web-ui`开启 -- 🔥 2023.12.29: 支持 DPO RLHF(Reinforcement Learning from Human Feedback) 和两个用于此任务的数据集: AI-ModelScope/stack-exchange-paired 以及 AI-ModelScope/hh-rlhf. 使用[这个脚本](https://github.com/modelscope/swift/blob/v1.5.0/examples/pytorch/llm/scripts/dpo/lora/dpo.sh)开启训练! +- 🔥 2023.12.29: 支持 DPO RLHF(Reinforcement Learning from Human Feedback) 和两个用于此任务的数据集: AI-ModelScope/stack-exchange-paired 以及 AI-ModelScope/hh-rlhf. 使用[这个脚本](https://github.com/modelscope/swift/blob/v1.5.0/examples/pytorch/llm/scripts/dpo/lora_ddp_mp/dpo.sh)开启训练! - 🔥 2023.12.28: 支持SCEdit! 该tuner可显著降低U-Net中的显存占用,并支持低显存可控图像生成(取代ControlNet),阅读下面的章节来了解详细信息 - 2023.12.23: 支持[codegeex2-6b](https://github.com/modelscope/swift/tree/main/examples/pytorch/llm/scripts/codegeex2_6b). - 2023.12.19: 支持[phi2-3b](https://github.com/modelscope/swift/tree/main/examples/pytorch/llm/scripts/phi2_3b). @@ -111,7 +111,7 @@ SWIFT(Scalable lightWeight Infrastructure for Fine-Tuning)是一个可扩展 - 快速对LLM进行**推理**, 搭建**Web-UI**, 可以查看[LLM推理文档](https://github.com/modelscope/swift/blob/main/docs/source/LLM/LLM推理文档.md). - 快速对LLM进行**微调**, 推理并搭建Web-UI, 可以查看[LLM微调文档](https://github.com/modelscope/swift/blob/main/docs/source/LLM/LLM微调文档.md). - 使用**界面**方式进行微调和推理, 可以查看[WEB-UI文档](https://github.com/modelscope/swift/blob/main/docs/source/GetStarted/%E7%95%8C%E9%9D%A2%E8%AE%AD%E7%BB%83%E6%8E%A8%E7%90%86.md). -- 支持**DPO训练**, 使用[这个脚本](https://github.com/modelscope/swift/blob/v1.5.0/examples/pytorch/llm/scripts/dpo/lora/dpo.sh)开启训练 +- 支持**DPO训练**, 使用[这个脚本](https://github.com/modelscope/swift/blob/v1.5.0/examples/pytorch/llm/scripts/dpo/lora_ddp_mp/dpo.sh)开启训练 - 使用VLLM进行**推理加速**和**部署(OpenAI API)**. 可以查看[VLLM推理加速与部署](https://github.com/modelscope/swift/blob/main/docs/source/LLM/VLLM推理加速与部署.md). - 查看swift支持的模型和数据集. 可以查看[支持的模型和数据集](https://github.com/modelscope/swift/blob/main/docs/source/LLM/支持的模型和数据集.md). - 对swift中的模型, 数据集, 对话模板进行**拓展**, 可以查看[自定义与拓展](https://github.com/modelscope/swift/blob/main/docs/source/LLM/自定义与拓展.md). diff --git "a/docs/source/GetStarted/\347\225\214\351\235\242\350\256\255\347\273\203\346\216\250\347\220\206.md" "b/docs/source/GetStarted/\347\225\214\351\235\242\350\256\255\347\273\203\346\216\250\347\220\206.md" index d7b14889dc..8ce33f7713 100644 --- "a/docs/source/GetStarted/\347\225\214\351\235\242\350\256\255\347\273\203\346\216\250\347\220\206.md" +++ "b/docs/source/GetStarted/\347\225\214\351\235\242\350\256\255\347\273\203\346\216\250\347\220\206.md" @@ -5,3 +5,8 @@ swift web-ui ``` 开启界面训练和推理。 + +web-ui没有传入参数,所有可控部分都在界面中。但是有几个环境变量可以使用: + +> WEBUI_SHARE=1 控制gradio是否是share状态 +> SWIFT_UI_LANG=en/zh 控制web-ui界面语言 diff --git "a/docs/source/LLM/LLM\345\276\256\350\260\203\346\226\207\346\241\243.md" "b/docs/source/LLM/LLM\345\276\256\350\260\203\346\226\207\346\241\243.md" index 87819b912e..4c9566bb4b 100644 --- "a/docs/source/LLM/LLM\345\276\256\350\260\203\346\226\207\346\241\243.md" +++ "b/docs/source/LLM/LLM\345\276\256\350\260\203\346\226\207\346\241\243.md" @@ -68,7 +68,63 @@ torch.cuda.empty_cache() app_ui_main(infer_args) ``` +## DPO(人类对齐训练) + +下面的shell脚本运行了一个人类对齐训练。首先需要切换到运行目录: + +```shell +cd examples/pytorch/llm +``` + +运行下面的命令: + +```shell +# Experimental environment: 4*A100 +# Memory usage: 4 * 20G +nproc_per_node=2 + +PYTHONPATH=../../.. \ +CUDA_VISIBLE_DEVICES=0,1,2,3 \ +torchrun \ + --nproc_per_node=$nproc_per_node \ + --master_port 29500 \ + llm_dpo.py \ + --model_type mistral-7b \ + --ref_model_type mistral-7b \ + --model_revision master \ + --sft_type lora \ + --tuner_backend swift \ + --dtype AUTO \ + --output_dir output \ + --dataset hh-rlhf \ + --train_dataset_sample -1 \ + --truncation_strategy truncation_left \ + --val_dataset_sample 2000 \ + --num_train_epochs 3 \ + --max_length 1024 \ + --max_prompt_length 512 \ + --check_dataset_strategy none \ + --lora_rank 8 \ + --lora_alpha 32 \ + --lora_dropout_p 0.05 \ + --lora_target_modules ALL \ + --gradient_checkpointing true \ + --batch_size 1 \ + --weight_decay 0.01 \ + --learning_rate 5e-5 \ + --gradient_accumulation_steps $(expr 16 / $nproc_per_node) \ + --max_grad_norm 1.0 \ + --warmup_ratio 0.03 \ + --eval_steps 2000 \ + --save_steps 2000 \ + --save_total_limit 2 \ + --logging_steps 10 \ +``` + +DPO训练需要在一张显卡上加载两个模型,因此推荐显存至少24G以上。DPO训练后的模型推理和SFT的推理流程相同。 + ### 使用CLI + ```bash # Experimental environment: A10, 3090, V100, ... # 20GB GPU memory @@ -307,13 +363,36 @@ swift merge-lora --ckpt_dir 'xxx/vx_xxx/checkpoint-xxx' CUDA_VISIBLE_DEVICES=0 swift infer --ckpt_dir 'xxx/vx_xxx/checkpoint-xxx-merged' ``` -## Web-UI +## 界面 + +目前界面化展示分为两个部分,分别是: + +```shell +swift web-ui +swift app-ui +``` + +其中,web-ui用于构建训练参数和训练后本地推理实验,app-ui用于将训练后模型发布创空间等。 + +### web-ui + +web-ui没有传入参数,所有可控部分都在界面中。但是有几个环境变量可以使用: + +```text +WEBUI_SHARE=1 控制gradio是否是share状态 +SWIFT_UI_LANG=en/zh 控制web-ui界面语言 +``` + +### app-ui + 如果你要使用VLLM进行部署并提供**API**接口, 可以查看[VLLM推理加速与部署](./VLLM推理加速与部署.md#部署) -### 原始模型 +#### 原始模型 + 使用原始模型的web-ui可以查看[LLM推理文档](./LLM推理文档.md#-Web-UI) -### 微调后模型 +#### 微调后模型 + ```bash # 直接使用app-ui CUDA_VISIBLE_DEVICES=0 swift app-ui --ckpt_dir 'xxx/vx_xxx/checkpoint-xxx' diff --git "a/docs/source/LLM/LLM\346\216\250\347\220\206\346\226\207\346\241\243.md" "b/docs/source/LLM/LLM\346\216\250\347\220\206\346\226\207\346\241\243.md" index e5c212169b..c88fb76de6 100644 --- "a/docs/source/LLM/LLM\346\216\250\347\220\206\346\226\207\346\241\243.md" +++ "b/docs/source/LLM/LLM\346\216\250\347\220\206\346\226\207\346\241\243.md" @@ -401,8 +401,29 @@ CUDA_VISIBLE_DEVICES=0 swift infer --model_type yi-6b-chat 如果你要使用微调后模型进行推理, 可以查看[LLM微调文档](./LLM微调文档.md#微调后模型) -## Web-UI +## 界面 +目前界面化展示分为两个部分,分别是: + +```shell +swift web-ui +swift app-ui +``` + +其中,web-ui用于构建训练参数和训练后本地推理实验,app-ui用于将训练后模型发布创空间等。 + +## web-ui + +web-ui没有传入参数,所有可控部分都在界面中。但是有几个环境变量可以使用: + +```text +WEBUI_SHARE=1 控制gradio是否是share状态 +SWIFT_UI_LANG=en/zh 控制web-ui界面语言 +``` + +## app-ui + ### qwen-7b-chat + 使用CLI: ```bash CUDA_VISIBLE_DEVICES=0 swift app-ui --model_type qwen-7b-chat diff --git "a/docs/source/LLM/\345\221\275\344\273\244\350\241\214\345\217\202\346\225\260.md" "b/docs/source/LLM/\345\221\275\344\273\244\350\241\214\345\217\202\346\225\260.md" index a24e7541e1..f28e7bb83f 100644 --- "a/docs/source/LLM/\345\221\275\344\273\244\350\241\214\345\217\202\346\225\260.md" +++ "b/docs/source/LLM/\345\221\275\344\273\244\350\241\214\345\217\202\346\225\260.md" @@ -92,6 +92,13 @@ - `--repetition_penalty`: 默认为`1.05`. 该参数只有在`predict_with_generate`设置为True的时候才生效. - `--num_beams`: 默认为`1`. 该参数只有在`predict_with_generate`设置为True的时候才生效. +## DPO参数 + +DPO参数继承了上面的SFT参数,除此之外增加了以下参数: + +- `--ref_model_type` 对比模型类型,可以选择的`model_type`可以查看`MODEL_MAPPING.keys()` +- `--max_prompt_length` 最大的提示长度,该参数会传入DPOTrainer中,使prompt长度不超过该值的设置,默认值1024 + ## merge-lora infer app-ui 命令行参数 - `--model_type`: 默认值为`None`, 具体的参数介绍可以在`sft.sh命令行参数`中查看. diff --git "a/docs/source/LLM/\350\207\252\345\256\232\344\271\211\344\270\216\346\213\223\345\261\225.md" "b/docs/source/LLM/\350\207\252\345\256\232\344\271\211\344\270\216\346\213\223\345\261\225.md" index 2a7dea3ca2..65dee97901 100644 --- "a/docs/source/LLM/\350\207\252\345\256\232\344\271\211\344\270\216\346\213\223\345\261\225.md" +++ "b/docs/source/LLM/\350\207\252\345\256\232\344\271\211\344\270\216\346\213\223\345\261\225.md" @@ -99,7 +99,16 @@ AAAAA,BBBBB,CCCCC {"messages": [{"role": "user", "content": "AAAAA"}, {"role": "assistant", "content": "BBBBB"}, {"role": "user", "content": "CCCCC"}, {"role": "assistant", "content": "DDDDD"}]} ``` +**强化学习(DPO)** + +```jsonl +{"query": "11111", "response": "22222", "rejected_response": "33333"} +{"query": "aaaaa", "response": "bbbbb", "rejected_response": "ccccc"} +{"query": "AAAAA", "response": "BBBBB", "rejected_response": "CCCCC"} +``` + ### 注册数据集的方式 + 以下是一个**注册数据集**的案例. 完整的py文件可以查看[custom.py](https://github.com/modelscope/swift/blob/main/examples/pytorch/llm/custom.py), sh脚本可以查看[custom](https://github.com/modelscope/swift/tree/main/examples/pytorch/llm/scripts/custom). ```python From 7e7b6270ee443fb35528c318be966a0c876b529e Mon Sep 17 00:00:00 2001 From: "yuze.zyz" Date: Wed, 10 Jan 2024 20:30:40 +0800 Subject: [PATCH 3/6] fix --- ...55\347\273\203\346\226\207\346\241\243.md" | 98 +++++++++++++++++++ ...56\350\260\203\346\226\207\346\241\243.md" | 55 ----------- examples/pytorch/llm/scripts/dpo/lora/dpo.sh | 34 +++++++ .../pytorch/llm/scripts/dpo/lora/infer.sh | 14 +++ 4 files changed, 146 insertions(+), 55 deletions(-) create mode 100644 "docs/source/LLM/LLM\344\272\272\347\261\273\345\257\271\351\275\220\350\256\255\347\273\203\346\226\207\346\241\243.md" create mode 100644 examples/pytorch/llm/scripts/dpo/lora/dpo.sh create mode 100644 examples/pytorch/llm/scripts/dpo/lora/infer.sh diff --git "a/docs/source/LLM/LLM\344\272\272\347\261\273\345\257\271\351\275\220\350\256\255\347\273\203\346\226\207\346\241\243.md" "b/docs/source/LLM/LLM\344\272\272\347\261\273\345\257\271\351\275\220\350\256\255\347\273\203\346\226\207\346\241\243.md" new file mode 100644 index 0000000000..41c34bec4a --- /dev/null +++ "b/docs/source/LLM/LLM\344\272\272\347\261\273\345\257\271\351\275\220\350\256\255\347\273\203\346\226\207\346\241\243.md" @@ -0,0 +1,98 @@ +# LLM人类对齐训练文档 +## 目录 +- [环境准备](#环境准备) +- [人类对齐训练](#人类对齐训练) + +## 环境准备 +GPU设备: A10, 3090, V100, A100均可,如果是显存<=24G的GPU最少需要双卡环境。由于人类对齐训练在一张卡上加载两个模型,因此比微调的显存多占用一个推理模型的显存使用量。 +```bash +# 设置pip全局镜像 +pip config set global.index-url https://mirrors.aliyun.com/pypi/simple/ +# 安装ms-swift +git clone https://github.com/modelscope/swift.git +cd swift +pip install -e .[llm] + +# 环境对齐 (如果你运行错误, 可以跑下面的代码, 仓库使用最新环境测试) +pip install -r requirements/framework.txt -U +pip install -r requirements/llm.txt -U +``` + +## 人类对齐训练 +下面的shell脚本运行了一个人类对齐训练。首先需要切换到运行目录: + +```shell +cd examples/pytorch/llm +``` + +运行下面的命令: + +```shell +# Experimental environment: 4*A100 +# Memory usage: 4 * 20G,双卡device_map * 2ddp +nproc_per_node=2 + +PYTHONPATH=../../.. \ +CUDA_VISIBLE_DEVICES=0,1,2,3 \ +torchrun \ + --nproc_per_node=$nproc_per_node \ + --master_port 29500 \ + llm_dpo.py \ + --model_type mistral-7b \ + --ref_model_type mistral-7b \ + --model_revision master \ + --sft_type lora \ + --tuner_backend swift \ + --dtype AUTO \ + --output_dir output \ + --dataset hh-rlhf \ + --train_dataset_sample -1 \ + --truncation_strategy truncation_left \ + --val_dataset_sample 2000 \ + --num_train_epochs 3 \ + --max_length 1024 \ + --max_prompt_length 512 \ + --check_dataset_strategy none \ + --lora_rank 8 \ + --lora_alpha 32 \ + --lora_dropout_p 0.05 \ + --lora_target_modules ALL \ + --gradient_checkpointing true \ + --batch_size 1 \ + --weight_decay 0.01 \ + --learning_rate 5e-5 \ + --gradient_accumulation_steps $(expr 16 / $nproc_per_node) \ + --max_grad_norm 1.0 \ + --warmup_ratio 0.03 \ + --eval_steps 2000 \ + --save_steps 2000 \ + --save_total_limit 2 \ + --logging_steps 10 \ +``` + +### sh脚本 + +sh脚本可以查看[这里](https://github.com/modelscope/swift/tree/main/examples/pytorch/llm/scripts/dpo)。 + +```bash +# 下面的脚本需要在此目录下执行 +cd examples/pytorch/llm +``` + +**提示**: + +- 我们默认在训练时设置`--gradient_checkpointing true`来**节约显存**, 这会略微降低训练速度. +- 如果你使用的是**V100**等较老的GPU, 你需要设置`--dtype AUTO`或者`--dtype fp16`, 因为其不支持bf16. +- 如果你的机器是A100等高性能显卡, 且使用的是qwen系列模型, 推荐你安装[**flash-attn**](https://github.com/Dao-AILab/flash-attention), 这将会加快训练和推理的速度以及显存占用(A10, 3090, V100等显卡不支持flash-attn进行训练). 支持flash-attn的模型可以查看[LLM支持的模型](./支持的模型和数据集.md#模型) +- 如果你需要断网进行训练, 请使用`--model_cache_dir`和设置`--check_model_is_latest false`. 具体参数含义请查看[命令行参数](./命令行参数.md). +- 如果你想在训练时, 将权重push到ModelScope Hub中, 你需要设置`--push_to_hub true`. + +```bash +# dpo训练 mistral-7b max_length=1024,bs=1 +# 推荐的实验环境: V100, A10, 3090,2卡4卡或8卡 +bash scripts/dpo/lora_ddp_mp/dpo.sh +bash scripts/dpo/lora_ddp_mp/infer.sh +``` + +由于DPO训练后会得到一个完整模型或者adapter的weights,因此LoRA合并、推理的步骤和微调步骤相同,因此请参考[微调文档](./LLM微调文档#Merge LoRA)对应的步骤。 + diff --git "a/docs/source/LLM/LLM\345\276\256\350\260\203\346\226\207\346\241\243.md" "b/docs/source/LLM/LLM\345\276\256\350\260\203\346\226\207\346\241\243.md" index 4c9566bb4b..e88d953aa2 100644 --- "a/docs/source/LLM/LLM\345\276\256\350\260\203\346\226\207\346\241\243.md" +++ "b/docs/source/LLM/LLM\345\276\256\350\260\203\346\226\207\346\241\243.md" @@ -68,61 +68,6 @@ torch.cuda.empty_cache() app_ui_main(infer_args) ``` -## DPO(人类对齐训练) - -下面的shell脚本运行了一个人类对齐训练。首先需要切换到运行目录: - -```shell -cd examples/pytorch/llm -``` - -运行下面的命令: - -```shell -# Experimental environment: 4*A100 -# Memory usage: 4 * 20G -nproc_per_node=2 - -PYTHONPATH=../../.. \ -CUDA_VISIBLE_DEVICES=0,1,2,3 \ -torchrun \ - --nproc_per_node=$nproc_per_node \ - --master_port 29500 \ - llm_dpo.py \ - --model_type mistral-7b \ - --ref_model_type mistral-7b \ - --model_revision master \ - --sft_type lora \ - --tuner_backend swift \ - --dtype AUTO \ - --output_dir output \ - --dataset hh-rlhf \ - --train_dataset_sample -1 \ - --truncation_strategy truncation_left \ - --val_dataset_sample 2000 \ - --num_train_epochs 3 \ - --max_length 1024 \ - --max_prompt_length 512 \ - --check_dataset_strategy none \ - --lora_rank 8 \ - --lora_alpha 32 \ - --lora_dropout_p 0.05 \ - --lora_target_modules ALL \ - --gradient_checkpointing true \ - --batch_size 1 \ - --weight_decay 0.01 \ - --learning_rate 5e-5 \ - --gradient_accumulation_steps $(expr 16 / $nproc_per_node) \ - --max_grad_norm 1.0 \ - --warmup_ratio 0.03 \ - --eval_steps 2000 \ - --save_steps 2000 \ - --save_total_limit 2 \ - --logging_steps 10 \ -``` - -DPO训练需要在一张显卡上加载两个模型,因此推荐显存至少24G以上。DPO训练后的模型推理和SFT的推理流程相同。 - ### 使用CLI ```bash diff --git a/examples/pytorch/llm/scripts/dpo/lora/dpo.sh b/examples/pytorch/llm/scripts/dpo/lora/dpo.sh new file mode 100644 index 0000000000..22ff30b05e --- /dev/null +++ b/examples/pytorch/llm/scripts/dpo/lora/dpo.sh @@ -0,0 +1,34 @@ +# Experimental environment: 4*A100 +# Memory usage: 4 * 20G +PYTHONPATH=../../.. \ +python llm_dpo.py \ + --model_type mistral-7b \ + --ref_model_type mistral-7b \ + --model_revision master \ + --sft_type lora \ + --tuner_backend swift \ + --dtype AUTO \ + --output_dir output \ + --dataset hh-rlhf \ + --train_dataset_sample -1 \ + --truncation_strategy truncation_left \ + --val_dataset_sample 2000 \ + --num_train_epochs 3 \ + --max_length 1024 \ + --max_prompt_length 512 \ + --check_dataset_strategy none \ + --lora_rank 8 \ + --lora_alpha 32 \ + --lora_dropout_p 0.05 \ + --lora_target_modules ALL \ + --gradient_checkpointing true \ + --batch_size 1 \ + --weight_decay 0.01 \ + --learning_rate 5e-5 \ + --gradient_accumulation_steps 16 \ + --max_grad_norm 1.0 \ + --warmup_ratio 0.03 \ + --eval_steps 2000 \ + --save_steps 2000 \ + --save_total_limit 2 \ + --logging_steps 10 \ diff --git a/examples/pytorch/llm/scripts/dpo/lora/infer.sh b/examples/pytorch/llm/scripts/dpo/lora/infer.sh new file mode 100644 index 0000000000..8ed9b69b6e --- /dev/null +++ b/examples/pytorch/llm/scripts/dpo/lora/infer.sh @@ -0,0 +1,14 @@ +# Experimental environment: A10, 3090 +PYTHONPATH=../../.. \ +CUDA_VISIBLE_DEVICES=0 \ +python llm_infer.py \ + --ckpt_dir output/mistral-7b/vx-xxx-xxx/checkpoint-xxx \ + --load_dataset_config true \ + --eval_human true \ + --use_flash_attn false \ + --max_new_tokens 1024 \ + --temperature 0.3 \ + --top_p 0.7 \ + --repetition_penalty 1.05 \ + --do_sample true \ + --merge_lora_and_save false \ From b636e83e12e82545176002d3a3f7964b4dbf2a19 Mon Sep 17 00:00:00 2001 From: "yuze.zyz" Date: Wed, 10 Jan 2024 20:34:17 +0800 Subject: [PATCH 4/6] pre-commit passed --- ...75\220\350\256\255\347\273\203\346\226\207\346\241\243.md" | 1 - examples/pytorch/llm/scripts/dpo/lora/dpo.sh | 4 ++-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git "a/docs/source/LLM/LLM\344\272\272\347\261\273\345\257\271\351\275\220\350\256\255\347\273\203\346\226\207\346\241\243.md" "b/docs/source/LLM/LLM\344\272\272\347\261\273\345\257\271\351\275\220\350\256\255\347\273\203\346\226\207\346\241\243.md" index 41c34bec4a..6a9b57441a 100644 --- "a/docs/source/LLM/LLM\344\272\272\347\261\273\345\257\271\351\275\220\350\256\255\347\273\203\346\226\207\346\241\243.md" +++ "b/docs/source/LLM/LLM\344\272\272\347\261\273\345\257\271\351\275\220\350\256\255\347\273\203\346\226\207\346\241\243.md" @@ -95,4 +95,3 @@ bash scripts/dpo/lora_ddp_mp/infer.sh ``` 由于DPO训练后会得到一个完整模型或者adapter的weights,因此LoRA合并、推理的步骤和微调步骤相同,因此请参考[微调文档](./LLM微调文档#Merge LoRA)对应的步骤。 - diff --git a/examples/pytorch/llm/scripts/dpo/lora/dpo.sh b/examples/pytorch/llm/scripts/dpo/lora/dpo.sh index 22ff30b05e..815acd0cbb 100644 --- a/examples/pytorch/llm/scripts/dpo/lora/dpo.sh +++ b/examples/pytorch/llm/scripts/dpo/lora/dpo.sh @@ -1,5 +1,5 @@ -# Experimental environment: 4*A100 -# Memory usage: 4 * 20G +# Experimental environment: 2*A100 +# Memory usage: 2 * 20G PYTHONPATH=../../.. \ python llm_dpo.py \ --model_type mistral-7b \ From 37278c6f39e49773de1549a4132ffc937aabec8e Mon Sep 17 00:00:00 2001 From: "yuze.zyz" Date: Wed, 10 Jan 2024 20:37:49 +0800 Subject: [PATCH 5/6] fix index --- .../LLM\345\276\256\350\260\203\346\226\207\346\241\243.md" | 4 ++-- .../LLM\346\216\250\347\220\206\346\226\207\346\241\243.md" | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git "a/docs/source/LLM/LLM\345\276\256\350\260\203\346\226\207\346\241\243.md" "b/docs/source/LLM/LLM\345\276\256\350\260\203\346\226\207\346\241\243.md" index e88d953aa2..b338679070 100644 --- "a/docs/source/LLM/LLM\345\276\256\350\260\203\346\226\207\346\241\243.md" +++ "b/docs/source/LLM/LLM\345\276\256\350\260\203\346\226\207\346\241\243.md" @@ -4,7 +4,7 @@ - [微调](#微调) - [Merge LoRA](#merge-lora) - [推理](#推理) -- [Web-UI](#web-ui) +- [界面运行](#界面运行) ## 环境准备 GPU设备: A10, 3090, V100, A100均可. @@ -308,7 +308,7 @@ swift merge-lora --ckpt_dir 'xxx/vx_xxx/checkpoint-xxx' CUDA_VISIBLE_DEVICES=0 swift infer --ckpt_dir 'xxx/vx_xxx/checkpoint-xxx-merged' ``` -## 界面 +## 界面运行 目前界面化展示分为两个部分,分别是: diff --git "a/docs/source/LLM/LLM\346\216\250\347\220\206\346\226\207\346\241\243.md" "b/docs/source/LLM/LLM\346\216\250\347\220\206\346\226\207\346\241\243.md" index c88fb76de6..5d7c3b843a 100644 --- "a/docs/source/LLM/LLM\346\216\250\347\220\206\346\226\207\346\241\243.md" +++ "b/docs/source/LLM/LLM\346\216\250\347\220\206\346\226\207\346\241\243.md" @@ -4,7 +4,7 @@ ## 目录 - [环境准备](#环境准备) - [推理](#推理) -- [Web-UI](#web-ui) +- [界面推理](#界面推理) ## 环境准备 GPU设备: A10, 3090, V100, A100均可. @@ -401,7 +401,7 @@ CUDA_VISIBLE_DEVICES=0 swift infer --model_type yi-6b-chat 如果你要使用微调后模型进行推理, 可以查看[LLM微调文档](./LLM微调文档.md#微调后模型) -## 界面 +## 界面推理 目前界面化展示分为两个部分,分别是: ```shell From 70f4262ff454f647cf9602894939eefd552f9f4b Mon Sep 17 00:00:00 2001 From: "yuze.zyz" Date: Wed, 10 Jan 2024 20:49:28 +0800 Subject: [PATCH 6/6] revert files --- ...56\350\260\203\346\226\207\346\241\243.md" | 32 +++---------------- ...50\347\220\206\346\226\207\346\241\243.md" | 25 ++------------- 2 files changed, 6 insertions(+), 51 deletions(-) diff --git "a/docs/source/LLM/LLM\345\276\256\350\260\203\346\226\207\346\241\243.md" "b/docs/source/LLM/LLM\345\276\256\350\260\203\346\226\207\346\241\243.md" index b338679070..87819b912e 100644 --- "a/docs/source/LLM/LLM\345\276\256\350\260\203\346\226\207\346\241\243.md" +++ "b/docs/source/LLM/LLM\345\276\256\350\260\203\346\226\207\346\241\243.md" @@ -4,7 +4,7 @@ - [微调](#微调) - [Merge LoRA](#merge-lora) - [推理](#推理) -- [界面运行](#界面运行) +- [Web-UI](#web-ui) ## 环境准备 GPU设备: A10, 3090, V100, A100均可. @@ -69,7 +69,6 @@ app_ui_main(infer_args) ``` ### 使用CLI - ```bash # Experimental environment: A10, 3090, V100, ... # 20GB GPU memory @@ -308,36 +307,13 @@ swift merge-lora --ckpt_dir 'xxx/vx_xxx/checkpoint-xxx' CUDA_VISIBLE_DEVICES=0 swift infer --ckpt_dir 'xxx/vx_xxx/checkpoint-xxx-merged' ``` -## 界面运行 - -目前界面化展示分为两个部分,分别是: - -```shell -swift web-ui -swift app-ui -``` - -其中,web-ui用于构建训练参数和训练后本地推理实验,app-ui用于将训练后模型发布创空间等。 - -### web-ui - -web-ui没有传入参数,所有可控部分都在界面中。但是有几个环境变量可以使用: - -```text -WEBUI_SHARE=1 控制gradio是否是share状态 -SWIFT_UI_LANG=en/zh 控制web-ui界面语言 -``` - -### app-ui - +## Web-UI 如果你要使用VLLM进行部署并提供**API**接口, 可以查看[VLLM推理加速与部署](./VLLM推理加速与部署.md#部署) -#### 原始模型 - +### 原始模型 使用原始模型的web-ui可以查看[LLM推理文档](./LLM推理文档.md#-Web-UI) -#### 微调后模型 - +### 微调后模型 ```bash # 直接使用app-ui CUDA_VISIBLE_DEVICES=0 swift app-ui --ckpt_dir 'xxx/vx_xxx/checkpoint-xxx' diff --git "a/docs/source/LLM/LLM\346\216\250\347\220\206\346\226\207\346\241\243.md" "b/docs/source/LLM/LLM\346\216\250\347\220\206\346\226\207\346\241\243.md" index 5d7c3b843a..e5c212169b 100644 --- "a/docs/source/LLM/LLM\346\216\250\347\220\206\346\226\207\346\241\243.md" +++ "b/docs/source/LLM/LLM\346\216\250\347\220\206\346\226\207\346\241\243.md" @@ -4,7 +4,7 @@ ## 目录 - [环境准备](#环境准备) - [推理](#推理) -- [界面推理](#界面推理) +- [Web-UI](#web-ui) ## 环境准备 GPU设备: A10, 3090, V100, A100均可. @@ -401,29 +401,8 @@ CUDA_VISIBLE_DEVICES=0 swift infer --model_type yi-6b-chat 如果你要使用微调后模型进行推理, 可以查看[LLM微调文档](./LLM微调文档.md#微调后模型) -## 界面推理 -目前界面化展示分为两个部分,分别是: - -```shell -swift web-ui -swift app-ui -``` - -其中,web-ui用于构建训练参数和训练后本地推理实验,app-ui用于将训练后模型发布创空间等。 - -## web-ui - -web-ui没有传入参数,所有可控部分都在界面中。但是有几个环境变量可以使用: - -```text -WEBUI_SHARE=1 控制gradio是否是share状态 -SWIFT_UI_LANG=en/zh 控制web-ui界面语言 -``` - -## app-ui - +## Web-UI ### qwen-7b-chat - 使用CLI: ```bash CUDA_VISIBLE_DEVICES=0 swift app-ui --model_type qwen-7b-chat