diff --git a/examples/pytorch/llm/scripts/qwen_7b/qlora_ddp/infer.sh b/examples/pytorch/llm/scripts/qwen_7b/qlora_ddp/infer.sh index ba6a61c880..cb52b02956 100644 --- a/examples/pytorch/llm/scripts/qwen_7b/qlora_ddp/infer.sh +++ b/examples/pytorch/llm/scripts/qwen_7b/qlora_ddp/infer.sh @@ -3,7 +3,7 @@ CUDA_VISIBLE_DEVICES=0 \ python src/llm_infer.py \ --model_type qwen-7b \ --sft_type lora \ - --template_type chatml \ + --template_type default \ --dtype bf16 \ --ckpt_dir "runs/qwen-7b/vx_xxx/checkpoint-xxx" \ --eval_human true \ diff --git a/examples/pytorch/llm/scripts/qwen_7b/qlora_ddp/sft.sh b/examples/pytorch/llm/scripts/qwen_7b/qlora_ddp/sft.sh index 0823cd4292..57fa2e4d1e 100644 --- a/examples/pytorch/llm/scripts/qwen_7b/qlora_ddp/sft.sh +++ b/examples/pytorch/llm/scripts/qwen_7b/qlora_ddp/sft.sh @@ -18,7 +18,7 @@ torchrun \ --quantization_bit 4 \ --bnb_4bit_comp_dtype bf16 \ --lora_rank 64 \ - --lora_alpha 16 \ + --lora_alpha 32 \ --lora_dropout_p 0.05 \ --lora_target_modules ALL \ --gradient_checkpointing true \ diff --git a/examples/pytorch/llm/scripts/qwen_7b_chat/qlora/sft.sh b/examples/pytorch/llm/scripts/qwen_7b_chat/qlora/sft.sh index 5d4d391a57..0c546c68db 100644 --- a/examples/pytorch/llm/scripts/qwen_7b_chat/qlora/sft.sh +++ b/examples/pytorch/llm/scripts/qwen_7b_chat/qlora/sft.sh @@ -13,7 +13,7 @@ python src/llm_sft.py \ --quantization_bit 4 \ --bnb_4bit_comp_dtype bf16 \ --lora_rank 64 \ - --lora_alpha 16 \ + --lora_alpha 32 \ --lora_dropout_p 0.05 \ --lora_target_modules ALL \ --gradient_checkpointing true \ diff --git a/examples/pytorch/llm/scripts/qwen_7b_chat/qlora_ddp/sft.sh b/examples/pytorch/llm/scripts/qwen_7b_chat/qlora_ddp/sft.sh index de3d16af46..825fbc78a7 100644 --- a/examples/pytorch/llm/scripts/qwen_7b_chat/qlora_ddp/sft.sh +++ b/examples/pytorch/llm/scripts/qwen_7b_chat/qlora_ddp/sft.sh @@ -18,7 +18,7 @@ torchrun \ --quantization_bit 4 \ --bnb_4bit_comp_dtype bf16 \ --lora_rank 64 \ - --lora_alpha 16 \ + --lora_alpha 32 \ --lora_dropout_p 0.05 \ --lora_target_modules ALL \ --gradient_checkpointing true \ diff --git a/examples/pytorch/llm/scripts/qwen_agent/qlora_ddp/sft.sh b/examples/pytorch/llm/scripts/qwen_agent/qlora_ddp/sft.sh index d8ab280045..dfa004e394 100644 --- a/examples/pytorch/llm/scripts/qwen_agent/qlora_ddp/sft.sh +++ b/examples/pytorch/llm/scripts/qwen_agent/qlora_ddp/sft.sh @@ -18,7 +18,7 @@ torchrun \ --quantization_bit 4 \ --bnb_4bit_comp_dtype bf16 \ --lora_rank 64 \ - --lora_alpha 16 \ + --lora_alpha 32 \ --lora_dropout_p 0.05 \ --lora_target_modules ALL \ --gradient_checkpointing true \ diff --git a/examples/pytorch/llm/scripts/qwen_vl/qlora_ddp/infer.sh b/examples/pytorch/llm/scripts/qwen_vl/qlora_ddp/infer.sh index 361736cdbd..3f93e4c949 100644 --- a/examples/pytorch/llm/scripts/qwen_vl/qlora_ddp/infer.sh +++ b/examples/pytorch/llm/scripts/qwen_vl/qlora_ddp/infer.sh @@ -3,7 +3,7 @@ CUDA_VISIBLE_DEVICES=0 \ python src/llm_infer.py \ --model_type qwen-vl \ --sft_type lora \ - --template_type chatml \ + --template_type default \ --dtype bf16 \ --ckpt_dir "runs/qwen-vl/vx_xxx/checkpoint-xxx" \ --eval_human false \ diff --git a/examples/pytorch/llm/scripts/qwen_vl/qlora_ddp/sft.sh b/examples/pytorch/llm/scripts/qwen_vl/qlora_ddp/sft.sh index 314ace8417..1778fddd00 100644 --- a/examples/pytorch/llm/scripts/qwen_vl/qlora_ddp/sft.sh +++ b/examples/pytorch/llm/scripts/qwen_vl/qlora_ddp/sft.sh @@ -18,7 +18,7 @@ torchrun \ --quantization_bit 4 \ --bnb_4bit_comp_dtype bf16 \ --lora_rank 64 \ - --lora_alpha 16 \ + --lora_alpha 32 \ --lora_dropout_p 0.05 \ --lora_target_modules ALL \ --gradient_checkpointing true \ diff --git a/examples/pytorch/llm/scripts/qwen_vl_chat/qlora/sft.sh b/examples/pytorch/llm/scripts/qwen_vl_chat/qlora/sft.sh index 056af5a4fc..47e091f3ae 100644 --- a/examples/pytorch/llm/scripts/qwen_vl_chat/qlora/sft.sh +++ b/examples/pytorch/llm/scripts/qwen_vl_chat/qlora/sft.sh @@ -13,7 +13,7 @@ python src/llm_sft.py \ --quantization_bit 4 \ --bnb_4bit_comp_dtype bf16 \ --lora_rank 64 \ - --lora_alpha 16 \ + --lora_alpha 32 \ --lora_dropout_p 0.05 \ --lora_target_modules ALL \ --gradient_checkpointing true \ diff --git a/examples/pytorch/llm/scripts/qwen_vl_chat/qlora_ddp/sft.sh b/examples/pytorch/llm/scripts/qwen_vl_chat/qlora_ddp/sft.sh index f8f0e961b9..232d17a0ce 100644 --- a/examples/pytorch/llm/scripts/qwen_vl_chat/qlora_ddp/sft.sh +++ b/examples/pytorch/llm/scripts/qwen_vl_chat/qlora_ddp/sft.sh @@ -18,7 +18,7 @@ torchrun \ --quantization_bit 4 \ --bnb_4bit_comp_dtype bf16 \ --lora_rank 64 \ - --lora_alpha 16 \ + --lora_alpha 32 \ --lora_dropout_p 0.05 \ --lora_target_modules ALL \ --gradient_checkpointing true \ diff --git a/examples/pytorch/llm/src/llm_infer.py b/examples/pytorch/llm/src/llm_infer.py index 38bb9a7f7a..f5a9925b0e 100644 --- a/examples/pytorch/llm/src/llm_infer.py +++ b/examples/pytorch/llm/src/llm_infer.py @@ -1,3 +1,4 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. import os # os.environ['CUDA_VISIBLE_DEVICES'] = '0' from dataclasses import dataclass, field @@ -102,9 +103,12 @@ def llm_infer(args: InferArguments) -> None: print_model_info(model) # ### Inference - template_type = MODEL_MAPPING[args.model_type]['template'] preprocess_func = get_preprocess( - template_type, tokenizer, args.system, args.max_length, batched=False) + args.template_type, + tokenizer, + args.system, + args.max_length, + batched=False) streamer = TextStreamer( tokenizer, skip_prompt=True, skip_special_tokens=True) generation_config = GenerationConfig( diff --git a/examples/pytorch/llm/src/llm_sft.py b/examples/pytorch/llm/src/llm_sft.py index 696a887dc8..79bebf736b 100644 --- a/examples/pytorch/llm/src/llm_sft.py +++ b/examples/pytorch/llm/src/llm_sft.py @@ -1,3 +1,4 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. import os # os.environ['CUDA_VISIBLE_DEVICES'] = '0' from dataclasses import dataclass, field diff --git a/examples/pytorch/llm/src/utils/__init__.py b/examples/pytorch/llm/src/utils/__init__.py index aeb6d50bb8..935cec0479 100644 --- a/examples/pytorch/llm/src/utils/__init__.py +++ b/examples/pytorch/llm/src/utils/__init__.py @@ -3,5 +3,5 @@ from .preprocess import TEMPLATE_MAPPING, get_preprocess from .utils import (broadcast_string, download_dataset, find_all_linear_for_lora, get_dist_setting, inference, - is_dist, is_master, plot_images, select_bnb, select_dtype, - show_layers) + is_dist, is_local_master, is_master, plot_images, + select_bnb, select_dtype, show_layers) diff --git a/examples/pytorch/llm/src/utils/dataset.py b/examples/pytorch/llm/src/utils/dataset.py index ee87496835..08e8f7a12e 100644 --- a/examples/pytorch/llm/src/utils/dataset.py +++ b/examples/pytorch/llm/src/utils/dataset.py @@ -1,3 +1,4 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. import ast import os import re diff --git a/examples/pytorch/llm/src/utils/model.py b/examples/pytorch/llm/src/utils/model.py index bf6cc4c797..2459232129 100644 --- a/examples/pytorch/llm/src/utils/model.py +++ b/examples/pytorch/llm/src/utils/model.py @@ -1,15 +1,16 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. import os -# os.environ['CUDA_VISIBLE_DEVICES'] = '0' from types import MethodType from typing import NamedTuple, Optional import torch +import torch.distributed as dist from modelscope import (AutoConfig, AutoModel, AutoModelForCausalLM, AutoTokenizer, Model, read_config, snapshot_download) from torch import dtype as Dtype from swift import get_logger -from .utils import broadcast_string, is_dist, is_master +from .utils import is_local_master logger = get_logger() @@ -313,16 +314,15 @@ def get_model_tokenizer(model_type: str, model_dir = kwargs.pop('model_dir', None) if model_dir is None: - if is_master(): - model_dir = model_id - if not os.path.exists(model_id): - revision = data.get('revision', 'master') - model_dir = snapshot_download( - model_id, - revision, - ignore_file_pattern=ignore_file_pattern) - if is_dist(): - model_dir = broadcast_string(model_dir) + if not is_local_master(): + dist.barrier() + model_dir = model_id + if not os.path.exists(model_id): + revision = data.get('revision', 'master') + model_dir = snapshot_download( + model_id, revision, ignore_file_pattern=ignore_file_pattern) + if is_local_master(): + dist.barrier() model, tokenizer = get_function(model_dir, torch_dtype, load_model, **kwargs) diff --git a/examples/pytorch/llm/src/utils/preprocess.py b/examples/pytorch/llm/src/utils/preprocess.py index d3be77610d..8045f406d5 100644 --- a/examples/pytorch/llm/src/utils/preprocess.py +++ b/examples/pytorch/llm/src/utils/preprocess.py @@ -1,3 +1,4 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. from typing import Any, Callable, Dict, List, Optional, Tuple, Union from transformers import PreTrainedTokenizer diff --git a/examples/pytorch/llm/src/utils/trainer_patch.py b/examples/pytorch/llm/src/utils/trainer_patch.py index c435a55636..4a618cfede 100644 --- a/examples/pytorch/llm/src/utils/trainer_patch.py +++ b/examples/pytorch/llm/src/utils/trainer_patch.py @@ -1,3 +1,4 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. import os import json diff --git a/examples/pytorch/llm/src/utils/utils.py b/examples/pytorch/llm/src/utils/utils.py index fd489a3fa7..2c95cca491 100644 --- a/examples/pytorch/llm/src/utils/utils.py +++ b/examples/pytorch/llm/src/utils/utils.py @@ -1,3 +1,4 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. import logging import os import shutil @@ -47,6 +48,11 @@ def is_master(): return rank in {-1, 0} +def is_local_master(): + local_rank = get_dist_setting()[1] + return local_rank in {-1, 0} + + def is_dist(): """Determine if the training is distributed""" rank, local_rank, _, _ = get_dist_setting() diff --git a/swift/tuners/prompt.py b/swift/tuners/prompt.py index a255c36cff..f426a4dd83 100644 --- a/swift/tuners/prompt.py +++ b/swift/tuners/prompt.py @@ -61,7 +61,7 @@ class PromptConfig(SwiftConfig): 'help': 'When set to True, prompt is attached in front of the embedding' }) - + extract_embedding: bool = field( default=False, metadata={