modelscope · tastelikefeet · Sep 4, 2023 · Sep 4, 2023 · Sep 4, 2023 · Sep 4, 2023
diff --git a/examples/pytorch/llm/scripts/qwen_7b/qlora_ddp/infer.sh b/examples/pytorch/llm/scripts/qwen_7b/qlora_ddp/infer.sh
@@ -3,7 +3,7 @@ CUDA_VISIBLE_DEVICES=0 \
 python src/llm_infer.py \
     --model_type qwen-7b \
     --sft_type lora \
-    --template_type chatml \
+    --template_type default \
     --dtype bf16 \
     --ckpt_dir "runs/qwen-7b/vx_xxx/checkpoint-xxx" \
     --eval_human true \

diff --git a/examples/pytorch/llm/scripts/qwen_7b/qlora_ddp/sft.sh b/examples/pytorch/llm/scripts/qwen_7b/qlora_ddp/sft.sh
@@ -18,7 +18,7 @@ torchrun \
     --quantization_bit 4 \
     --bnb_4bit_comp_dtype bf16 \
     --lora_rank 64 \
-    --lora_alpha 16 \
+    --lora_alpha 32 \
     --lora_dropout_p 0.05 \
     --lora_target_modules ALL \
     --gradient_checkpointing true \

diff --git a/examples/pytorch/llm/scripts/qwen_7b_chat/qlora/sft.sh b/examples/pytorch/llm/scripts/qwen_7b_chat/qlora/sft.sh
@@ -13,7 +13,7 @@ python src/llm_sft.py \
     --quantization_bit 4 \
     --bnb_4bit_comp_dtype bf16 \
     --lora_rank 64 \
-    --lora_alpha 16 \
+    --lora_alpha 32 \
     --lora_dropout_p 0.05 \
     --lora_target_modules ALL \
     --gradient_checkpointing true \

diff --git a/examples/pytorch/llm/scripts/qwen_7b_chat/qlora_ddp/sft.sh b/examples/pytorch/llm/scripts/qwen_7b_chat/qlora_ddp/sft.sh
@@ -18,7 +18,7 @@ torchrun \
     --quantization_bit 4 \
     --bnb_4bit_comp_dtype bf16 \
     --lora_rank 64 \
-    --lora_alpha 16 \
+    --lora_alpha 32 \
     --lora_dropout_p 0.05 \
     --lora_target_modules ALL \
     --gradient_checkpointing true \

diff --git a/examples/pytorch/llm/scripts/qwen_agent/qlora_ddp/sft.sh b/examples/pytorch/llm/scripts/qwen_agent/qlora_ddp/sft.sh
@@ -18,7 +18,7 @@ torchrun \
     --quantization_bit 4 \
     --bnb_4bit_comp_dtype bf16 \
     --lora_rank 64 \
-    --lora_alpha 16 \
+    --lora_alpha 32 \
     --lora_dropout_p 0.05 \
     --lora_target_modules ALL \
     --gradient_checkpointing true \

diff --git a/examples/pytorch/llm/scripts/qwen_vl/qlora_ddp/infer.sh b/examples/pytorch/llm/scripts/qwen_vl/qlora_ddp/infer.sh
@@ -3,7 +3,7 @@ CUDA_VISIBLE_DEVICES=0 \
 python src/llm_infer.py \
     --model_type qwen-vl \
     --sft_type lora \
-    --template_type chatml \
+    --template_type default \
     --dtype bf16 \
     --ckpt_dir "runs/qwen-vl/vx_xxx/checkpoint-xxx" \
     --eval_human false \

diff --git a/examples/pytorch/llm/scripts/qwen_vl/qlora_ddp/sft.sh b/examples/pytorch/llm/scripts/qwen_vl/qlora_ddp/sft.sh
@@ -18,7 +18,7 @@ torchrun \
     --quantization_bit 4 \
     --bnb_4bit_comp_dtype bf16 \
     --lora_rank 64 \
-    --lora_alpha 16 \
+    --lora_alpha 32 \
     --lora_dropout_p 0.05 \
     --lora_target_modules ALL \
     --gradient_checkpointing true \

diff --git a/examples/pytorch/llm/scripts/qwen_vl_chat/qlora/sft.sh b/examples/pytorch/llm/scripts/qwen_vl_chat/qlora/sft.sh
@@ -13,7 +13,7 @@ python src/llm_sft.py \
     --quantization_bit 4 \
     --bnb_4bit_comp_dtype bf16 \
     --lora_rank 64 \
-    --lora_alpha 16 \
+    --lora_alpha 32 \
     --lora_dropout_p 0.05 \
     --lora_target_modules ALL \
     --gradient_checkpointing true \

diff --git a/examples/pytorch/llm/scripts/qwen_vl_chat/qlora_ddp/sft.sh b/examples/pytorch/llm/scripts/qwen_vl_chat/qlora_ddp/sft.sh
@@ -18,7 +18,7 @@ torchrun \
     --quantization_bit 4 \
     --bnb_4bit_comp_dtype bf16 \
     --lora_rank 64 \
-    --lora_alpha 16 \
+    --lora_alpha 32 \
     --lora_dropout_p 0.05 \
     --lora_target_modules ALL \
     --gradient_checkpointing true \

diff --git a/examples/pytorch/llm/src/llm_infer.py b/examples/pytorch/llm/src/llm_infer.py
@@ -1,3 +1,4 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
 import os
 # os.environ['CUDA_VISIBLE_DEVICES'] = '0'
 from dataclasses import dataclass, field
@@ -102,9 +103,12 @@ def llm_infer(args: InferArguments) -> None:
     print_model_info(model)
 
     # ### Inference
-    template_type = MODEL_MAPPING[args.model_type]['template']
     preprocess_func = get_preprocess(
-        template_type, tokenizer, args.system, args.max_length, batched=False)
+        args.template_type,
+        tokenizer,
+        args.system,
+        args.max_length,
+        batched=False)
     streamer = TextStreamer(
         tokenizer, skip_prompt=True, skip_special_tokens=True)
     generation_config = GenerationConfig(

diff --git a/examples/pytorch/llm/src/llm_sft.py b/examples/pytorch/llm/src/llm_sft.py
@@ -1,3 +1,4 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
 import os
 # os.environ['CUDA_VISIBLE_DEVICES'] = '0'
 from dataclasses import dataclass, field

diff --git a/examples/pytorch/llm/src/utils/__init__.py b/examples/pytorch/llm/src/utils/__init__.py
@@ -3,5 +3,5 @@
 from .preprocess import TEMPLATE_MAPPING, get_preprocess
 from .utils import (broadcast_string, download_dataset,
                     find_all_linear_for_lora, get_dist_setting, inference,
-                    is_dist, is_master, plot_images, select_bnb, select_dtype,
-                    show_layers)
+                    is_dist, is_local_master, is_master, plot_images,
+                    select_bnb, select_dtype, show_layers)
diff --git a/examples/pytorch/llm/src/utils/dataset.py b/examples/pytorch/llm/src/utils/dataset.py
@@ -1,3 +1,4 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
 import ast
 import os
 import re

diff --git a/examples/pytorch/llm/src/utils/model.py b/examples/pytorch/llm/src/utils/model.py
@@ -1,15 +1,16 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
 import os
-# os.environ['CUDA_VISIBLE_DEVICES'] = '0'
 from types import MethodType
 from typing import NamedTuple, Optional
 
 import torch
+import torch.distributed as dist
 from modelscope import (AutoConfig, AutoModel, AutoModelForCausalLM,
                         AutoTokenizer, Model, read_config, snapshot_download)
 from torch import dtype as Dtype
 
 from swift import get_logger
-from .utils import broadcast_string, is_dist, is_master
+from .utils import is_local_master
 
 logger = get_logger()
 
@@ -313,16 +314,15 @@ def get_model_tokenizer(model_type: str,
 
     model_dir = kwargs.pop('model_dir', None)
     if model_dir is None:
-        if is_master():
-            model_dir = model_id
-            if not os.path.exists(model_id):
-                revision = data.get('revision', 'master')
-                model_dir = snapshot_download(
-                    model_id,
-                    revision,
-                    ignore_file_pattern=ignore_file_pattern)
-        if is_dist():
-            model_dir = broadcast_string(model_dir)
+        if not is_local_master():
+            dist.barrier()
+        model_dir = model_id
+        if not os.path.exists(model_id):
+            revision = data.get('revision', 'master')
+            model_dir = snapshot_download(
+                model_id, revision, ignore_file_pattern=ignore_file_pattern)
+        if is_local_master():
+            dist.barrier()
 
     model, tokenizer = get_function(model_dir, torch_dtype, load_model,
                                     **kwargs)

diff --git a/examples/pytorch/llm/src/utils/preprocess.py b/examples/pytorch/llm/src/utils/preprocess.py
@@ -1,3 +1,4 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
 from typing import Any, Callable, Dict, List, Optional, Tuple, Union
 
 from transformers import PreTrainedTokenizer

diff --git a/examples/pytorch/llm/src/utils/trainer_patch.py b/examples/pytorch/llm/src/utils/trainer_patch.py
@@ -1,3 +1,4 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
 import os
 
 import json

diff --git a/examples/pytorch/llm/src/utils/utils.py b/examples/pytorch/llm/src/utils/utils.py
@@ -1,3 +1,4 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
 import logging
 import os
 import shutil
@@ -47,6 +48,11 @@ def is_master():
     return rank in {-1, 0}
 
 
+def is_local_master():
+    local_rank = get_dist_setting()[1]
+    return local_rank in {-1, 0}
+
+
 def is_dist():
     """Determine if the training is distributed"""
     rank, local_rank, _, _ = get_dist_setting()

diff --git a/swift/tuners/prompt.py b/swift/tuners/prompt.py
@@ -61,7 +61,7 @@ class PromptConfig(SwiftConfig):
             'help':
             'When set to True, prompt is attached in front of the embedding'
         })
-    
+
     extract_embedding: bool = field(
         default=False,
         metadata={