From 972a30c54d86741824e6bd1a0ee17f27d7f075c0 Mon Sep 17 00:00:00 2001 From: "yuze.zyz" Date: Mon, 30 Oct 2023 11:12:16 +0800 Subject: [PATCH 1/8] add example --- .../pytorch/llm/rome_example/request.json | 7 ++ examples/pytorch/llm/rome_infer.py | 6 ++ .../llama2_13b_chat/qlora_ddp_ds/rome.sh | 16 ++++ swift/llm/__init__.py | 1 + swift/llm/rome.py | 80 +++++++++++++++++++ swift/llm/run.py | 3 +- swift/llm/sft.py | 14 +++- swift/llm/utils/__init__.py | 2 +- swift/llm/utils/argument.py | 12 ++- 9 files changed, 136 insertions(+), 5 deletions(-) create mode 100644 examples/pytorch/llm/rome_example/request.json create mode 100644 examples/pytorch/llm/rome_infer.py create mode 100644 examples/pytorch/llm/scripts/llama2_13b_chat/qlora_ddp_ds/rome.sh create mode 100644 swift/llm/rome.py diff --git a/examples/pytorch/llm/rome_example/request.json b/examples/pytorch/llm/rome_example/request.json new file mode 100644 index 0000000000..82ed28df24 --- /dev/null +++ b/examples/pytorch/llm/rome_example/request.json @@ -0,0 +1,7 @@ +[ + { + "prompt": "{} was the founder of", + "subject": "Steve Jobs", + "target": "Microsoft", + } +] \ No newline at end of file diff --git a/examples/pytorch/llm/rome_infer.py b/examples/pytorch/llm/rome_infer.py new file mode 100644 index 0000000000..139759a47e --- /dev/null +++ b/examples/pytorch/llm/rome_infer.py @@ -0,0 +1,6 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. + +from swift.llm.run import rome_main + +if __name__ == '__main__': + rome_main() diff --git a/examples/pytorch/llm/scripts/llama2_13b_chat/qlora_ddp_ds/rome.sh b/examples/pytorch/llm/scripts/llama2_13b_chat/qlora_ddp_ds/rome.sh new file mode 100644 index 0000000000..0698dd9dcc --- /dev/null +++ b/examples/pytorch/llm/scripts/llama2_13b_chat/qlora_ddp_ds/rome.sh @@ -0,0 +1,16 @@ +# Experimental environment: A10 +PYTHONPATH=../../.. \ +CUDA_VISIBLE_DEVICES=0 \ +python rome_infer.py \ + --model_id_or_path modelscope/Llama-2-13b-chat-ms \ + --model_revision master \ + --template_type llama \ + --dtype bf16 \ + --eval_human true \ + --max_length 4096 \ + --max_new_tokens 2048 \ + --temperature 0.9 \ + --top_k 20 \ + --top_p 0.9 \ + --do_sample true \ + --rome_request_file rome_example/request.json \ No newline at end of file diff --git a/swift/llm/__init__.py b/swift/llm/__init__.py index 82906ab1b7..a93fb4a568 100644 --- a/swift/llm/__init__.py +++ b/swift/llm/__init__.py @@ -1,4 +1,5 @@ # Copyright (c) Alibaba, Inc. and its affiliates. from .infer import llm_infer from .sft import llm_sft +from .rome import rome_infer from .utils import * diff --git a/swift/llm/rome.py b/swift/llm/rome.py new file mode 100644 index 0000000000..d839fbff58 --- /dev/null +++ b/swift/llm/rome.py @@ -0,0 +1,80 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +import json + +import torch +from modelscope import GenerationConfig + +from swift.tuners import Swift +from swift.utils import (get_logger, print_model_info, seed_everything, + show_layers) +from .utils import (RomeArguments, Template, get_dataset, get_model_tokenizer, + get_template, inference) +from ..tuners.rome import RomeConfig + +logger = get_logger() + + +def rome_infer(args: RomeArguments) -> None: + logger.info(f'args: {args}') + logger.info(f'Rome does not support quantization for now, all quantization args will be ignored.') + logger.info(f'device_count: {torch.cuda.device_count()}') + seed_everything(args.seed) + + # ### Loading Model and Tokenizer + model_kwargs = {'low_cpu_mem_usage': True, 'device_map': 'auto'} + kwargs = {'use_flash_attn': args.use_flash_attn} + model, tokenizer = get_model_tokenizer(args.model_type, args.torch_dtype, + model_kwargs, **kwargs) + + with open(args.rome_request_file, 'r') as f: + request = json.load(f) + + config = RomeConfig( + model_type=args.model_type, + knowledge=request, + tokenizer=tokenizer, + ) + model = Swift.prepare_model( + model, config, inference_mode=True) + + show_layers(model) + print_model_info(model) + + # ### Inference + template: Template = get_template(args.template_type, tokenizer, + args.system, args.max_length) + generation_config = GenerationConfig( + max_length=None, + max_new_tokens=args.max_new_tokens, + temperature=args.temperature, + top_k=args.top_k, + top_p=args.top_p, + do_sample=args.do_sample, + repetition_penalty=args.repetition_penalty, + pad_token_id=tokenizer.pad_token_id, + eos_token_id=tokenizer.eos_token_id) + logger.info(f'generation_config: {generation_config}') + if args.overwrite_generation_config: + generation_config.save_pretrained(args.ckpt_dir) + model.generation_config = generation_config + + if args.eval_human: + while True: + query = input('<<< ') + data = {'query': query} + input_ids = template.encode(data)['input_ids'] + inference(input_ids, model, tokenizer, args.stream) + else: + _, val_dataset = get_dataset(args.dataset, args.dataset_test_ratio, + args.dataset_seed) + mini_val_dataset = val_dataset.select( + range(min(args.show_dataset_sample, val_dataset.shape[0]))) + for data in mini_val_dataset: + response = data['response'] + data['response'] = None + input_ids = template.encode(data)['input_ids'] + inference(input_ids, model, tokenizer, args.stream) + print() + print(f'[LABELS]{response}') + print('-' * 80) + # input('next[ENTER]') diff --git a/swift/llm/run.py b/swift/llm/run.py index 4c3c5b90f0..079784ee80 100644 --- a/swift/llm/run.py +++ b/swift/llm/run.py @@ -1,6 +1,7 @@ # Copyright (c) Alibaba, Inc. and its affiliates. from swift.llm import (InferArguments, SftArguments, get_main, llm_infer, - llm_sft) + llm_sft, rome_infer) sft_main = get_main(SftArguments, llm_sft) infer_main = get_main(InferArguments, llm_infer) +rome_main = get_main(InferArguments, rome_infer) diff --git a/swift/llm/sft.py b/swift/llm/sft.py index 1735196733..bb5c7cc376 100644 --- a/swift/llm/sft.py +++ b/swift/llm/sft.py @@ -54,7 +54,7 @@ def llm_sft(args: SftArguments) -> str: model_kwargs, **kwargs) # ### Preparing LoRA - if args.sft_type == 'lora' or args.sft_type == 'longlora': + if args.sft_type in ('lora', 'qalora', 'longlora'): if args.resume_from_checkpoint is None: if 'ALL' in args.lora_target_modules: assert len(args.lora_target_modules) == 1 @@ -88,6 +88,18 @@ def llm_sft(args: SftArguments) -> str: use_flash_attn=args.use_flash_attn) model = Swift.prepare_model(model, longlora_config) logger.info(f'longlora_config: {longlora_config}') + elif args.sft_type == 'qalora': + assert getattr(model, "quantization_method", None) == 'gptq', 'qalora must be used with auto_gptq' + lora_kwargs = {} + lora_config = LoRAConfig( + r=args.lora_rank, + target_modules=args.lora_target_modules, + lora_alpha=args.lora_alpha, + lora_dropout=args.lora_dropout_p, + use_qa_lora=True, + **lora_kwargs) + model = Swift.prepare_model(model, lora_config) + logger.info(f'lora_config: {lora_config}') else: model = Swift.from_pretrained( model, args.resume_from_checkpoint, is_trainable=True) diff --git a/swift/llm/utils/__init__.py b/swift/llm/utils/__init__.py index 4bc0594a95..cc7324be21 100644 --- a/swift/llm/utils/__init__.py +++ b/swift/llm/utils/__init__.py @@ -1,5 +1,5 @@ # Copyright (c) Alibaba, Inc. and its affiliates. -from .argument import InferArguments, SftArguments +from .argument import InferArguments, SftArguments, RomeArguments from .dataset import (DATASET_MAPPING, AlpacaPreprocessor, ConversationsPreprocessor, DatasetName, GetDatasetFunction, get_dataset, get_dataset_from_repo, diff --git a/swift/llm/utils/argument.py b/swift/llm/utils/argument.py index f16d5d7d4e..136aa351b3 100644 --- a/swift/llm/utils/argument.py +++ b/swift/llm/utils/argument.py @@ -31,7 +31,7 @@ class SftArguments: model_cache_dir: Optional[str] = None sft_type: str = field( - default='lora', metadata={'choices': ['longlora', 'lora', 'full']}) + default='lora', metadata={'choices': ['longlora', 'qalora', 'lora', 'full']}) tuner_backend: str = field( default='swift', metadata={'choices': ['swift', 'peft']}) template_type: Optional[str] = field( @@ -158,7 +158,7 @@ def init_argument(self): # Make sure to set the same output_dir when using DDP. self.output_dir = broadcast_string(self.output_dir) - if self.sft_type == 'lora' or self.sft_type == 'longlora': + if self.sft_type in ('lora', 'longlora', 'qalora'): if self.learning_rate is None: self.learning_rate = 1e-4 if self.only_save_model is None: @@ -291,6 +291,14 @@ def init_argument(self): self.max_length = None +@dataclass +class RomeArguments(InferArguments): + + rome_request_file: str = field( + default=None, metadata={'help': 'The rome request file, please check the documentation ' + 'to get the format'}) + + dtype_mapping_reversed = {v: k for k, v in dtype_mapping.items()} From ddccb456642968904ea23304f3205fd0f1dac0cc Mon Sep 17 00:00:00 2001 From: tastelikefeet Date: Mon, 30 Oct 2023 12:30:46 +0800 Subject: [PATCH 2/8] fix bug --- examples/pytorch/llm/rome_example/request.json | 7 ++++++- .../scripts/llama2_13b_chat/qlora_ddp_ds/rome.sh | 16 ---------------- swift/llm/rome.py | 11 +++++++++-- swift/llm/run.py | 4 ++-- swift/llm/utils/argument.py | 15 +++++++++++++++ swift/tuners/rome/rome.py | 4 ++-- swift/tuners/rome/rome_hparams.py | 2 +- 7 files changed, 35 insertions(+), 24 deletions(-) delete mode 100644 examples/pytorch/llm/scripts/llama2_13b_chat/qlora_ddp_ds/rome.sh diff --git a/examples/pytorch/llm/rome_example/request.json b/examples/pytorch/llm/rome_example/request.json index 82ed28df24..56bd7aaae3 100644 --- a/examples/pytorch/llm/rome_example/request.json +++ b/examples/pytorch/llm/rome_example/request.json @@ -2,6 +2,11 @@ { "prompt": "{} was the founder of", "subject": "Steve Jobs", - "target": "Microsoft", + "target": "Microsoft" + }, + { + "prompt": "{} is located in", + "subject": "HangZhou", + "target": "Africa" } ] \ No newline at end of file diff --git a/examples/pytorch/llm/scripts/llama2_13b_chat/qlora_ddp_ds/rome.sh b/examples/pytorch/llm/scripts/llama2_13b_chat/qlora_ddp_ds/rome.sh deleted file mode 100644 index 0698dd9dcc..0000000000 --- a/examples/pytorch/llm/scripts/llama2_13b_chat/qlora_ddp_ds/rome.sh +++ /dev/null @@ -1,16 +0,0 @@ -# Experimental environment: A10 -PYTHONPATH=../../.. \ -CUDA_VISIBLE_DEVICES=0 \ -python rome_infer.py \ - --model_id_or_path modelscope/Llama-2-13b-chat-ms \ - --model_revision master \ - --template_type llama \ - --dtype bf16 \ - --eval_human true \ - --max_length 4096 \ - --max_new_tokens 2048 \ - --temperature 0.9 \ - --top_k 20 \ - --top_p 0.9 \ - --do_sample true \ - --rome_request_file rome_example/request.json \ No newline at end of file diff --git a/swift/llm/rome.py b/swift/llm/rome.py index d839fbff58..6e9251922c 100644 --- a/swift/llm/rome.py +++ b/swift/llm/rome.py @@ -29,8 +29,16 @@ def rome_infer(args: RomeArguments) -> None: with open(args.rome_request_file, 'r') as f: request = json.load(f) + rome_type: str = None + if args.model_type in ('llama2-13b-chat', 'llama2-13b', + 'llama-13b-chat', 'llama-13b'): + rome_type = 'llama-13b' + elif args.model_type in ('llama2-7b-chat', 'llama2-7b', + 'llama-7b-chat', 'llama-7b'): + rome_type = 'llama-7b' + config = RomeConfig( - model_type=args.model_type, + model_type=rome_type, knowledge=request, tokenizer=tokenizer, ) @@ -48,7 +56,6 @@ def rome_infer(args: RomeArguments) -> None: max_new_tokens=args.max_new_tokens, temperature=args.temperature, top_k=args.top_k, - top_p=args.top_p, do_sample=args.do_sample, repetition_penalty=args.repetition_penalty, pad_token_id=tokenizer.pad_token_id, diff --git a/swift/llm/run.py b/swift/llm/run.py index 079784ee80..f4c00aa65a 100644 --- a/swift/llm/run.py +++ b/swift/llm/run.py @@ -1,7 +1,7 @@ # Copyright (c) Alibaba, Inc. and its affiliates. -from swift.llm import (InferArguments, SftArguments, get_main, llm_infer, +from swift.llm import (InferArguments, RomeArguments, SftArguments, get_main, llm_infer, llm_sft, rome_infer) sft_main = get_main(SftArguments, llm_sft) infer_main = get_main(InferArguments, llm_infer) -rome_main = get_main(InferArguments, rome_infer) +rome_main = get_main(RomeArguments, rome_infer) diff --git a/swift/llm/utils/argument.py b/swift/llm/utils/argument.py index 136aa351b3..4247c29acb 100644 --- a/swift/llm/utils/argument.py +++ b/swift/llm/utils/argument.py @@ -298,6 +298,21 @@ class RomeArguments(InferArguments): default=None, metadata={'help': 'The rome request file, please check the documentation ' 'to get the format'}) + def init_argument(self): + # Can be manually initialized, unlike __post_init__ + handle_compatibility(self) + set_model_type(self) + handle_dir(self) + + self.torch_dtype, _, _ = select_dtype(self) + if self.template_type is None: + self.template_type = MODEL_MAPPING[self.model_type]['template'] + logger.info(f'Setting template_type: {self.template_type}') + if self.dataset is None: + self.dataset = [DatasetName.blossom_math_zh] + assert isinstance(self.dataset, (list, tuple)) + if self.max_length == -1: + self.max_length = None dtype_mapping_reversed = {v: k for k, v in dtype_mapping.items()} diff --git a/swift/tuners/rome/rome.py b/swift/tuners/rome/rome.py index 3084853a7d..799438d194 100644 --- a/swift/tuners/rome/rome.py +++ b/swift/tuners/rome/rome.py @@ -147,7 +147,7 @@ def execute_rome( layer, context_template, ) - logger.info('Left vector shape:', left_vector.shape) + logger.info(f'Left vector shape: {left_vector.shape}') right_vector: torch.Tensor = compute_v( model, tok, @@ -157,7 +157,7 @@ def execute_rome( left_vector, context_template, ) - logger.info('Right vector shape:', right_vector.shape) + logger.info(f'Right vector shape: {right_vector.shape}') right_vector = right_vector.to(left_vector.dtype) with torch.no_grad(): diff --git a/swift/tuners/rome/rome_hparams.py b/swift/tuners/rome/rome_hparams.py index 0692e68758..1ff4dc0cdd 100644 --- a/swift/tuners/rome/rome_hparams.py +++ b/swift/tuners/rome/rome_hparams.py @@ -46,6 +46,6 @@ def from_name(cls, name: str): mlp_module_tmp='model.layers.{}.mlp', )) else: - raise NotImplementedError + raise NotImplementedError(f'{name} not supported.') return cls(**data) From 8924b4114e5989ac75af709a85e8523bef395623 Mon Sep 17 00:00:00 2001 From: tastelikefeet Date: Mon, 30 Oct 2023 12:31:45 +0800 Subject: [PATCH 3/8] commit missing file --- .../pytorch/llm/scripts/llama2_13b_chat/rome.sh | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 examples/pytorch/llm/scripts/llama2_13b_chat/rome.sh diff --git a/examples/pytorch/llm/scripts/llama2_13b_chat/rome.sh b/examples/pytorch/llm/scripts/llama2_13b_chat/rome.sh new file mode 100644 index 0000000000..8c5c1c6901 --- /dev/null +++ b/examples/pytorch/llm/scripts/llama2_13b_chat/rome.sh @@ -0,0 +1,15 @@ +# Experimental environment: A10 +PYTHONPATH=../../.. \ +CUDA_VISIBLE_DEVICES=0 \ +python rome_infer.py \ + --model_id_or_path modelscope/Llama-2-13b-chat-ms \ + --model_revision master \ + --template_type default-generation \ + --dtype bf16 \ + --eval_human true \ + --max_new_tokens 128 \ + --temperature 0.1 \ + --top_k 50 \ + --top_p 0.9 \ + --do_sample true \ + --rome_request_file rome_example/request.json \ No newline at end of file From d2e9f74d36fb3efc3cc56ba60b96fe5902e18f88 Mon Sep 17 00:00:00 2001 From: "yuze.zyz" Date: Mon, 30 Oct 2023 12:34:03 +0800 Subject: [PATCH 4/8] pre-commit passed --- examples/pytorch/llm/rome_example/request.json | 2 +- .../llm/scripts/llama2_13b_chat/rome.sh | 2 +- swift/llm/__init__.py | 2 +- swift/llm/rome.py | 18 +++++++++--------- swift/llm/run.py | 4 ++-- swift/llm/sft.py | 4 +++- swift/llm/utils/__init__.py | 2 +- swift/llm/utils/argument.py | 12 +++++++++--- 8 files changed, 27 insertions(+), 19 deletions(-) diff --git a/examples/pytorch/llm/rome_example/request.json b/examples/pytorch/llm/rome_example/request.json index 56bd7aaae3..3c2cd17fd4 100644 --- a/examples/pytorch/llm/rome_example/request.json +++ b/examples/pytorch/llm/rome_example/request.json @@ -9,4 +9,4 @@ "subject": "HangZhou", "target": "Africa" } -] \ No newline at end of file +] diff --git a/examples/pytorch/llm/scripts/llama2_13b_chat/rome.sh b/examples/pytorch/llm/scripts/llama2_13b_chat/rome.sh index 8c5c1c6901..bc0321d363 100644 --- a/examples/pytorch/llm/scripts/llama2_13b_chat/rome.sh +++ b/examples/pytorch/llm/scripts/llama2_13b_chat/rome.sh @@ -12,4 +12,4 @@ python rome_infer.py \ --top_k 50 \ --top_p 0.9 \ --do_sample true \ - --rome_request_file rome_example/request.json \ No newline at end of file + --rome_request_file rome_example/request.json diff --git a/swift/llm/__init__.py b/swift/llm/__init__.py index a93fb4a568..a887318f9a 100644 --- a/swift/llm/__init__.py +++ b/swift/llm/__init__.py @@ -1,5 +1,5 @@ # Copyright (c) Alibaba, Inc. and its affiliates. from .infer import llm_infer -from .sft import llm_sft from .rome import rome_infer +from .sft import llm_sft from .utils import * diff --git a/swift/llm/rome.py b/swift/llm/rome.py index 6e9251922c..f859f3dc77 100644 --- a/swift/llm/rome.py +++ b/swift/llm/rome.py @@ -1,22 +1,23 @@ # Copyright (c) Alibaba, Inc. and its affiliates. import json - import torch from modelscope import GenerationConfig from swift.tuners import Swift from swift.utils import (get_logger, print_model_info, seed_everything, show_layers) +from ..tuners.rome import RomeConfig from .utils import (RomeArguments, Template, get_dataset, get_model_tokenizer, get_template, inference) -from ..tuners.rome import RomeConfig logger = get_logger() def rome_infer(args: RomeArguments) -> None: logger.info(f'args: {args}') - logger.info(f'Rome does not support quantization for now, all quantization args will be ignored.') + logger.info( + 'Rome does not support quantization for now, all quantization args will be ignored.' + ) logger.info(f'device_count: {torch.cuda.device_count()}') seed_everything(args.seed) @@ -30,11 +31,11 @@ def rome_infer(args: RomeArguments) -> None: request = json.load(f) rome_type: str = None - if args.model_type in ('llama2-13b-chat', 'llama2-13b', - 'llama-13b-chat', 'llama-13b'): + if args.model_type in ('llama2-13b-chat', 'llama2-13b', 'llama-13b-chat', + 'llama-13b'): rome_type = 'llama-13b' - elif args.model_type in ('llama2-7b-chat', 'llama2-7b', - 'llama-7b-chat', 'llama-7b'): + elif args.model_type in ('llama2-7b-chat', 'llama2-7b', 'llama-7b-chat', + 'llama-7b'): rome_type = 'llama-7b' config = RomeConfig( @@ -42,8 +43,7 @@ def rome_infer(args: RomeArguments) -> None: knowledge=request, tokenizer=tokenizer, ) - model = Swift.prepare_model( - model, config, inference_mode=True) + model = Swift.prepare_model(model, config, inference_mode=True) show_layers(model) print_model_info(model) diff --git a/swift/llm/run.py b/swift/llm/run.py index f4c00aa65a..868449c59f 100644 --- a/swift/llm/run.py +++ b/swift/llm/run.py @@ -1,6 +1,6 @@ # Copyright (c) Alibaba, Inc. and its affiliates. -from swift.llm import (InferArguments, RomeArguments, SftArguments, get_main, llm_infer, - llm_sft, rome_infer) +from swift.llm import (InferArguments, RomeArguments, SftArguments, get_main, + llm_infer, llm_sft, rome_infer) sft_main = get_main(SftArguments, llm_sft) infer_main = get_main(InferArguments, llm_infer) diff --git a/swift/llm/sft.py b/swift/llm/sft.py index bb5c7cc376..8d29eaa5bc 100644 --- a/swift/llm/sft.py +++ b/swift/llm/sft.py @@ -89,7 +89,9 @@ def llm_sft(args: SftArguments) -> str: model = Swift.prepare_model(model, longlora_config) logger.info(f'longlora_config: {longlora_config}') elif args.sft_type == 'qalora': - assert getattr(model, "quantization_method", None) == 'gptq', 'qalora must be used with auto_gptq' + assert getattr( + model, 'quantization_method', + None) == 'gptq', 'qalora must be used with auto_gptq' lora_kwargs = {} lora_config = LoRAConfig( r=args.lora_rank, diff --git a/swift/llm/utils/__init__.py b/swift/llm/utils/__init__.py index cc7324be21..6791090b00 100644 --- a/swift/llm/utils/__init__.py +++ b/swift/llm/utils/__init__.py @@ -1,5 +1,5 @@ # Copyright (c) Alibaba, Inc. and its affiliates. -from .argument import InferArguments, SftArguments, RomeArguments +from .argument import InferArguments, RomeArguments, SftArguments from .dataset import (DATASET_MAPPING, AlpacaPreprocessor, ConversationsPreprocessor, DatasetName, GetDatasetFunction, get_dataset, get_dataset_from_repo, diff --git a/swift/llm/utils/argument.py b/swift/llm/utils/argument.py index 4247c29acb..873a57d96e 100644 --- a/swift/llm/utils/argument.py +++ b/swift/llm/utils/argument.py @@ -31,7 +31,8 @@ class SftArguments: model_cache_dir: Optional[str] = None sft_type: str = field( - default='lora', metadata={'choices': ['longlora', 'qalora', 'lora', 'full']}) + default='lora', + metadata={'choices': ['longlora', 'qalora', 'lora', 'full']}) tuner_backend: str = field( default='swift', metadata={'choices': ['swift', 'peft']}) template_type: Optional[str] = field( @@ -295,8 +296,12 @@ def init_argument(self): class RomeArguments(InferArguments): rome_request_file: str = field( - default=None, metadata={'help': 'The rome request file, please check the documentation ' - 'to get the format'}) + default=None, + metadata={ + 'help': + 'The rome request file, please check the documentation ' + 'to get the format' + }) def init_argument(self): # Can be manually initialized, unlike __post_init__ @@ -314,6 +319,7 @@ def init_argument(self): if self.max_length == -1: self.max_length = None + dtype_mapping_reversed = {v: k for k, v in dtype_mapping.items()} From 88391c0bcbf44ef32d938dcb7472ffb057340888 Mon Sep 17 00:00:00 2001 From: "yuze.zyz" Date: Mon, 30 Oct 2023 13:45:50 +0800 Subject: [PATCH 5/8] add llama2-7b for rome --- .../pytorch/llm/scripts/llama2_7b_chat/rome.sh | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 examples/pytorch/llm/scripts/llama2_7b_chat/rome.sh diff --git a/examples/pytorch/llm/scripts/llama2_7b_chat/rome.sh b/examples/pytorch/llm/scripts/llama2_7b_chat/rome.sh new file mode 100644 index 0000000000..352460a940 --- /dev/null +++ b/examples/pytorch/llm/scripts/llama2_7b_chat/rome.sh @@ -0,0 +1,15 @@ +# Experimental environment: A10 +PYTHONPATH=../../.. \ +CUDA_VISIBLE_DEVICES=0 \ +python rome_infer.py \ + --model_id_or_path modelscope/Llama-2-7b-chat-ms \ + --model_revision master \ + --template_type default-generation \ + --dtype bf16 \ + --eval_human true \ + --max_new_tokens 128 \ + --temperature 0.1 \ + --top_k 50 \ + --top_p 0.9 \ + --do_sample true \ + --rome_request_file rome_example/request.json From dd279746ef8351f99a874a2ac2ee341f87c50fb8 Mon Sep 17 00:00:00 2001 From: "yuze.zyz" Date: Mon, 30 Oct 2023 14:17:29 +0800 Subject: [PATCH 6/8] remove file --- .../pytorch/llm/scripts/llama2_7b_chat/rome.sh | 15 --------------- 1 file changed, 15 deletions(-) delete mode 100644 examples/pytorch/llm/scripts/llama2_7b_chat/rome.sh diff --git a/examples/pytorch/llm/scripts/llama2_7b_chat/rome.sh b/examples/pytorch/llm/scripts/llama2_7b_chat/rome.sh deleted file mode 100644 index 352460a940..0000000000 --- a/examples/pytorch/llm/scripts/llama2_7b_chat/rome.sh +++ /dev/null @@ -1,15 +0,0 @@ -# Experimental environment: A10 -PYTHONPATH=../../.. \ -CUDA_VISIBLE_DEVICES=0 \ -python rome_infer.py \ - --model_id_or_path modelscope/Llama-2-7b-chat-ms \ - --model_revision master \ - --template_type default-generation \ - --dtype bf16 \ - --eval_human true \ - --max_new_tokens 128 \ - --temperature 0.1 \ - --top_k 50 \ - --top_p 0.9 \ - --do_sample true \ - --rome_request_file rome_example/request.json From 87396143acb00f7c056d2a10d6648e0a73526afc Mon Sep 17 00:00:00 2001 From: "yuze.zyz" Date: Mon, 30 Oct 2023 14:31:17 +0800 Subject: [PATCH 7/8] fix template --- examples/pytorch/llm/scripts/llama2_13b_chat/rome.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/pytorch/llm/scripts/llama2_13b_chat/rome.sh b/examples/pytorch/llm/scripts/llama2_13b_chat/rome.sh index bc0321d363..478ddb7f67 100644 --- a/examples/pytorch/llm/scripts/llama2_13b_chat/rome.sh +++ b/examples/pytorch/llm/scripts/llama2_13b_chat/rome.sh @@ -4,7 +4,7 @@ CUDA_VISIBLE_DEVICES=0 \ python rome_infer.py \ --model_id_or_path modelscope/Llama-2-13b-chat-ms \ --model_revision master \ - --template_type default-generation \ + --template_type llama \ --dtype bf16 \ --eval_human true \ --max_new_tokens 128 \ From e94ba4d17f5415074fdd3658aa80fa4961e2f6f2 Mon Sep 17 00:00:00 2001 From: "yuze.zyz" Date: Mon, 30 Oct 2023 14:43:20 +0800 Subject: [PATCH 8/8] fix comments --- swift/llm/utils/argument.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/swift/llm/utils/argument.py b/swift/llm/utils/argument.py index 873a57d96e..fc3bf90122 100644 --- a/swift/llm/utils/argument.py +++ b/swift/llm/utils/argument.py @@ -32,7 +32,7 @@ class SftArguments: sft_type: str = field( default='lora', - metadata={'choices': ['longlora', 'qalora', 'lora', 'full']}) + metadata={'choices': ['lora', 'longlora', 'qalora', 'full']}) tuner_backend: str = field( default='swift', metadata={'choices': ['swift', 'peft']}) template_type: Optional[str] = field( @@ -225,7 +225,8 @@ class InferArguments: model_revision: Optional[str] = None sft_type: str = field( - default='lora', metadata={'choices': ['longlora', 'lora', 'full']}) + default='lora', + metadata={'choices': ['lora', 'longlora', 'qalora', 'full']}) template_type: Optional[str] = field( default=None, metadata={ @@ -313,8 +314,7 @@ def init_argument(self): if self.template_type is None: self.template_type = MODEL_MAPPING[self.model_type]['template'] logger.info(f'Setting template_type: {self.template_type}') - if self.dataset is None: - self.dataset = [DatasetName.blossom_math_zh] + assert isinstance(self.dataset, (list, tuple)) if self.max_length == -1: self.max_length = None