From d592f8e1ad8dccd5e731ca2f8fe8794ab574f1e0 Mon Sep 17 00:00:00 2001 From: Jintao Huang Date: Tue, 21 Nov 2023 10:55:01 +0800 Subject: [PATCH 1/7] merge main return --- README.md | 2 +- README_CN.md | 2 +- examples/pytorch/llm/README.md | 2 +- examples/pytorch/llm/README_CN.md | 2 +- examples/pytorch/llm/llm_infer.py | 3 ++- examples/pytorch/llm/llm_sft.py | 4 ++-- swift/llm/infer.py | 12 ++++++++++++ swift/llm/sft.py | 9 +++++++-- tests/llm/test_run.py | 9 ++++++--- 9 files changed, 33 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 4e8f2f9dcf..638b258fa2 100644 --- a/README.md +++ b/README.md @@ -141,7 +141,7 @@ sft_args = SftArguments( dataset=[DatasetName.blossom_math_zh], output_dir='output', gradient_checkpointing=True) -best_ckpt_dir = sft_main(sft_args) +best_ckpt_dir = sft_main(sft_args)['best_model_checkpoint'] print(f'best_ckpt_dir: {best_ckpt_dir}') torch.cuda.empty_cache() infer_args = InferArguments( diff --git a/README_CN.md b/README_CN.md index e61258079c..34a392c425 100644 --- a/README_CN.md +++ b/README_CN.md @@ -138,7 +138,7 @@ sft_args = SftArguments( dataset=[DatasetName.blossom_math_zh], output_dir='output', gradient_checkpointing=True) -best_ckpt_dir = sft_main(sft_args) +best_ckpt_dir = sft_main(sft_args)['best_model_checkpoint'] print(f'best_ckpt_dir: {best_ckpt_dir}') torch.cuda.empty_cache() infer_args = InferArguments( diff --git a/examples/pytorch/llm/README.md b/examples/pytorch/llm/README.md index 51b1f457fc..7303f30aaa 100644 --- a/examples/pytorch/llm/README.md +++ b/examples/pytorch/llm/README.md @@ -104,7 +104,7 @@ sft_args = SftArguments( dataset=[DatasetName.blossom_math_zh], output_dir='output', gradient_checkpointing=True) -best_ckpt_dir = sft_main(sft_args) +best_ckpt_dir = sft_main(sft_args)['best_model_checkpoint'] print(f'best_ckpt_dir: {best_ckpt_dir}') torch.cuda.empty_cache() infer_args = InferArguments( diff --git a/examples/pytorch/llm/README_CN.md b/examples/pytorch/llm/README_CN.md index def15e6c95..6124b933cd 100644 --- a/examples/pytorch/llm/README_CN.md +++ b/examples/pytorch/llm/README_CN.md @@ -103,7 +103,7 @@ sft_args = SftArguments( dataset=[DatasetName.blossom_math_zh], output_dir='output', gradient_checkpointing=True) -best_ckpt_dir = sft_main(sft_args) +best_ckpt_dir = sft_main(sft_args)['best_model_checkpoint'] print(f'best_ckpt_dir: {best_ckpt_dir}') torch.cuda.empty_cache() infer_args = InferArguments( diff --git a/examples/pytorch/llm/llm_infer.py b/examples/pytorch/llm/llm_infer.py index 3685d63241..2fb5551f37 100644 --- a/examples/pytorch/llm/llm_infer.py +++ b/examples/pytorch/llm/llm_infer.py @@ -4,4 +4,5 @@ from swift.llm.run import infer_main if __name__ == '__main__': - infer_main() + result = infer_main() + print(f'infer_main result: {result}') diff --git a/examples/pytorch/llm/llm_sft.py b/examples/pytorch/llm/llm_sft.py index 7473cf41ff..d50a532436 100644 --- a/examples/pytorch/llm/llm_sft.py +++ b/examples/pytorch/llm/llm_sft.py @@ -4,5 +4,5 @@ from swift.llm.run import sft_main if __name__ == '__main__': - best_ckpt_dir = sft_main() - print(f'best_ckpt_dir: {best_ckpt_dir}') + output = sft_main() + print(f'sft_main output: {output}') diff --git a/swift/llm/infer.py b/swift/llm/infer.py index ad398fe662..43b07b509c 100644 --- a/swift/llm/infer.py +++ b/swift/llm/infer.py @@ -132,6 +132,7 @@ def llm_infer(args: InferArguments) -> None: assert args.ckpt_dir is not None model.generation_config.save_pretrained(args.ckpt_dir) # Inference + result = [] jsonl_path = None if args.save_result: time = dt.datetime.now().strftime('%Y%m%d-%H%M%S') @@ -143,6 +144,11 @@ def llm_infer(args: InferArguments) -> None: if jsonl_path is not None: item = history[0] save_result_to_jsonl(jsonl_path, item[0], item[1]) + result.append({ + 'query': item[0], + 'response': item[1], + 'label': None + }) else: _, val_dataset = get_dataset(args.dataset, args.dataset_test_ratio, args.dataset_seed) @@ -163,9 +169,15 @@ def llm_infer(args: InferArguments) -> None: if jsonl_path is not None: item = history[0] save_result_to_jsonl(jsonl_path, item[0], item[1], label) + result.append({ + 'query': item[0], + 'response': item[1], + 'label': label + }) print() print(f'[LABELS]{label}') print('-' * 80) # input('next[ENTER]') if args.save_result: logger.info(f'save_result_path: {jsonl_path}') + return {'result': result} diff --git a/swift/llm/sft.py b/swift/llm/sft.py index b9ecd258a6..5952404fa5 100644 --- a/swift/llm/sft.py +++ b/swift/llm/sft.py @@ -267,7 +267,7 @@ def llm_sft(args: SftArguments) -> str: f, ensure_ascii=False, indent=2) - trainer.train(training_args.resume_from_checkpoint) + res = trainer.train(training_args.resume_from_checkpoint) logger.info( f'best_model_checkpoint: {trainer.state.best_model_checkpoint}') @@ -280,4 +280,9 @@ def llm_sft(args: SftArguments) -> str: if args.push_to_hub: trainer._add_patterns_to_gitignores(['images/']) trainer.push_to_hub() - return trainer.state.best_model_checkpoint + return { + 'best_model_checkpoint': trainer.state.best_model_checkpoint, + 'best_metric': trainer.state.best_metric, + 'global_step': res.global_step, + 'log_history': trainer.state.log_history, + } diff --git a/tests/llm/test_run.py b/tests/llm/test_run.py index 2b1cb87c3b..d3d4de40c3 100644 --- a/tests/llm/test_run.py +++ b/tests/llm/test_run.py @@ -39,7 +39,9 @@ def test_run_1(self): dataset=[DatasetName.jd_sentiment_zh], output_dir=output_dir, gradient_checkpointing=True) - best_ckpt_dir = sft_main(sft_args) + output = sft_main(sft_args) + print(output) + best_ckpt_dir = output['best_model_checkpoint'] print(f'best_ckpt_dir: {best_ckpt_dir}') torch.cuda.empty_cache() if __name__ == '__main__': @@ -48,7 +50,8 @@ def test_run_1(self): stream=False, show_dataset_sample=5, merge_lora_and_save=True) - infer_main(infer_args) + result = infer_main(infer_args) + print(result) torch.cuda.empty_cache() # if __name__ == '__main__': # web_ui_main(infer_args) @@ -80,7 +83,7 @@ def test_run_2(self): 'true', '--max_new_tokens', '100', - ]) + ])['best_model_checkpoint'] print(f'best_ckpt_dir: {best_ckpt_dir}') torch.cuda.empty_cache() infer_main([ From fd5f9197f1319e81682f4608d3b5acad19cf87bf Mon Sep 17 00:00:00 2001 From: Jintao Huang Date: Tue, 21 Nov 2023 15:14:02 +0800 Subject: [PATCH 2/7] update temperature sh --- .../scripts/baichuan2_13b_chat/lora_ddp_ds/infer.sh | 4 +--- .../scripts/baichuan2_13b_chat/lora_mp_ddp/infer.sh | 4 +--- .../scripts/baichuan2_13b_chat/qlora_ddp_ds/infer.sh | 4 +--- .../baichuan2_13b_chat_int4/qlora_ddp_ds/infer.sh | 4 +--- .../pytorch/llm/scripts/baichuan2_7b/qlora/infer.sh | 4 +--- .../llm/scripts/baichuan2_7b_chat/lora_ddp/infer.sh | 4 +--- .../llm/scripts/baichuan2_7b_chat/lora_ddp_ds/infer.sh | 4 +--- .../scripts/baichuan2_7b_chat/qlora_ddp_ds/infer.sh | 4 +--- .../baichuan2_7b_chat_int4/qlora_ddp_ds/infer.sh | 4 +--- .../scripts/baichuan_13b_chat/qlora_ddp_ds/infer.sh | 4 +--- .../pytorch/llm/scripts/bluelm_7b_chat/lora/infer.sh | 4 +--- .../llm/scripts/chatglm2_6b/lora_ddp_ds/infer.sh | 4 +--- examples/pytorch/llm/scripts/chatglm3_6b/lora/infer.sh | 4 +--- .../llm/scripts/chatglm3_6b/lora_ddp_ds/infer.sh | 4 +--- .../pytorch/llm/scripts/chatglm3_6b_32k/qlora/infer.sh | 4 +--- .../pytorch/llm/scripts/chatglm3_6b_base/lora/infer.sh | 4 +--- .../llm/scripts/chatglm3_6b_base/lora_ddp_ds/infer.sh | 4 +--- .../custom/tigerbot_13b_chat/qlora_ddp_ds/infer.sh | 4 +--- .../scripts/custom/tigerbot_7b/lora_ddp_ds/infer.sh | 4 +--- .../pytorch/llm/scripts/internlm_20b/lora_ddp/infer.sh | 4 +--- .../pytorch/llm/scripts/internlm_20b/qlora/infer.sh | 4 +--- .../llm/scripts/internlm_20b_chat/lora_ddp/infer.sh | 4 +--- .../llm/scripts/internlm_20b_chat/qlora/infer.sh | 4 +--- .../llm/scripts/internlm_20b_chat/qlora_ddp/infer.sh | 4 +--- .../llm/scripts/llama2_13b_chat/qlora_ddp_ds/infer.sh | 4 +--- .../llm/scripts/llama2_70b_chat/qlora_ddp_ds/infer.sh | 4 +--- .../llm/scripts/llama2_70b_chat/qlora_mp/infer.sh | 4 +--- .../llm/scripts/mistral_7b_chat/lora_ddp_ds/infer.sh | 4 +--- .../llm/scripts/mistral_7b_chat/lora_mp_ddp/infer.sh | 4 +--- .../openbuddy_llama2_13b_chat/qlora_ddp_ds/infer.sh | 4 +--- .../openbuddy_llama2_70b_chat/qlora_ddp_ds/infer.sh | 4 +--- .../openbuddy_llama2_70b_chat/qlora_mp/infer.sh | 4 +--- .../openbuddy_mistral_7b_chat/lora_ddp_ds/infer.sh | 4 +--- .../openbuddy_mistral_7b_chat/lora_mp_ddp/infer.sh | 4 +--- .../llm/scripts/polylm_13b/qlora_ddp_ds/infer.sh | 4 +--- .../pytorch/llm/scripts/qwen_14b/lora_ddp_ds/infer.sh | 4 +--- examples/pytorch/llm/scripts/qwen_14b/qlora/infer.sh | 4 +--- .../pytorch/llm/scripts/qwen_14b/qlora_ddp_ds/infer.sh | 4 +--- .../llm/scripts/qwen_14b_chat/lora_ddp_ds/infer.sh | 4 +--- .../pytorch/llm/scripts/qwen_14b_chat/qlora/infer.sh | 4 +--- .../llm/scripts/qwen_14b_chat/qlora_ddp_ds/infer.sh | 4 +--- .../llm/scripts/qwen_14b_chat_int4/qlora/infer.sh | 4 +--- .../scripts/qwen_14b_chat_int4/qlora_ddp_ds/infer.sh | 4 +--- .../llm/scripts/qwen_14b_chat_int8/qlora/infer.sh | 4 +--- .../scripts/qwen_14b_chat_int8/qlora_ddp_ds/infer.sh | 4 +--- .../pytorch/llm/scripts/qwen_7b/lora_ddp_ds/infer.sh | 4 +--- .../pytorch/llm/scripts/qwen_7b/qlora_ddp/infer.sh | 4 +--- .../pytorch/llm/scripts/qwen_7b_chat/full_mp/infer.sh | 4 +--- .../llm/scripts/qwen_7b_chat/full_mp_ddp/infer.sh | 4 +--- .../pytorch/llm/scripts/qwen_7b_chat/lora/infer.sh | 4 +--- .../pytorch/llm/scripts/qwen_7b_chat/lora_ddp/infer.sh | 4 +--- .../llm/scripts/qwen_7b_chat/lora_ddp_ds/infer.sh | 4 +--- .../llm/scripts/qwen_7b_chat/lora_mp_ddp/infer.sh | 4 +--- .../pytorch/llm/scripts/qwen_7b_chat/qlora/infer.sh | 4 +--- .../llm/scripts/qwen_7b_chat/qlora_ddp/infer.sh | 4 +--- .../llm/scripts/qwen_7b_chat/qlora_ddp_ds/infer.sh | 4 +--- .../llm/scripts/qwen_7b_chat_int4/qlora/infer.sh | 4 +--- .../scripts/qwen_7b_chat_int4/qlora_ddp_ds/infer.sh | 4 +--- .../llm/scripts/qwen_7b_chat_int8/qlora/infer.sh | 4 +--- .../scripts/qwen_7b_chat_int8/qlora_ddp_ds/infer.sh | 4 +--- .../pytorch/llm/scripts/qwen_vl/lora_ddp_ds/infer.sh | 4 +--- .../llm/scripts/qwen_vl_chat/lora_ddp_ds/infer.sh | 4 +--- .../pytorch/llm/scripts/qwen_vl_chat/qlora/infer.sh | 4 +--- .../llm/scripts/qwen_vl_chat_int4/qlora/infer.sh | 4 +--- .../scripts/qwen_vl_chat_int4/qlora_ddp_ds/infer.sh | 4 +--- examples/pytorch/llm/scripts/seqgpt_560m/full/infer.sh | 4 +--- .../pytorch/llm/scripts/seqgpt_560m/full_ddp/infer.sh | 4 +--- .../pytorch/llm/scripts/skywork_13b/qlora/infer.sh | 4 +--- .../tongyi_finance_14b_chat_int4/qlora/infer.sh | 4 +--- examples/pytorch/llm/scripts/xverse_13b/qlora/infer.sh | 4 +--- .../pytorch/llm/scripts/xverse_65b/qlora_mp/infer.sh | 4 +--- .../pytorch/llm/scripts/yi_34b/lora_ddp_ds/infer.sh | 4 +--- examples/pytorch/llm/scripts/yi_6b/lora/infer.sh | 4 +--- .../pytorch/llm/scripts/ziya2_13b_chat/qlora/infer.sh | 4 +--- .../llm/scripts/ziya2_13b_chat/qlora_ddp_ds/infer.sh | 4 +--- swift/llm/infer.py | 6 +++--- swift/llm/utils/argument.py | 10 +++++----- 77 files changed, 83 insertions(+), 233 deletions(-) diff --git a/examples/pytorch/llm/scripts/baichuan2_13b_chat/lora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/baichuan2_13b_chat/lora_ddp_ds/infer.sh index 6760c2364c..8b1e838f60 100644 --- a/examples/pytorch/llm/scripts/baichuan2_13b_chat/lora_ddp_ds/infer.sh +++ b/examples/pytorch/llm/scripts/baichuan2_13b_chat/lora_ddp_ds/infer.sh @@ -8,9 +8,7 @@ python llm_infer.py \ --eval_human false \ --max_length 4096 \ --max_new_tokens 2048 \ - --temperature 0.9 \ - --top_k 20 \ - --top_p 0.9 \ + --temperature 0.1 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/baichuan2_13b_chat/lora_mp_ddp/infer.sh b/examples/pytorch/llm/scripts/baichuan2_13b_chat/lora_mp_ddp/infer.sh index 9627c8b84d..c41a44566e 100644 --- a/examples/pytorch/llm/scripts/baichuan2_13b_chat/lora_mp_ddp/infer.sh +++ b/examples/pytorch/llm/scripts/baichuan2_13b_chat/lora_mp_ddp/infer.sh @@ -8,9 +8,7 @@ python llm_infer.py \ --eval_human false \ --max_length 2048 \ --max_new_tokens 2048 \ - --temperature 0.9 \ - --top_k 20 \ - --top_p 0.9 \ + --temperature 0.1 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/baichuan2_13b_chat/qlora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/baichuan2_13b_chat/qlora_ddp_ds/infer.sh index c3a7622c8f..2b7f125caa 100644 --- a/examples/pytorch/llm/scripts/baichuan2_13b_chat/qlora_ddp_ds/infer.sh +++ b/examples/pytorch/llm/scripts/baichuan2_13b_chat/qlora_ddp_ds/infer.sh @@ -7,9 +7,7 @@ python llm_infer.py \ --eval_human false \ --max_length 4096 \ --max_new_tokens 2048 \ - --temperature 0.9 \ - --top_k 20 \ - --top_p 0.9 \ + --temperature 0.1 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/baichuan2_13b_chat_int4/qlora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/baichuan2_13b_chat_int4/qlora_ddp_ds/infer.sh index 24aa5f670a..a1784975e9 100644 --- a/examples/pytorch/llm/scripts/baichuan2_13b_chat_int4/qlora_ddp_ds/infer.sh +++ b/examples/pytorch/llm/scripts/baichuan2_13b_chat_int4/qlora_ddp_ds/infer.sh @@ -7,9 +7,7 @@ python llm_infer.py \ --eval_human false \ --max_length 4096 \ --max_new_tokens 2048 \ - --temperature 0.9 \ - --top_k 20 \ - --top_p 0.9 \ + --temperature 0.1 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/baichuan2_7b/qlora/infer.sh b/examples/pytorch/llm/scripts/baichuan2_7b/qlora/infer.sh index 0d6d352ab6..41a169e4c2 100644 --- a/examples/pytorch/llm/scripts/baichuan2_7b/qlora/infer.sh +++ b/examples/pytorch/llm/scripts/baichuan2_7b/qlora/infer.sh @@ -7,9 +7,7 @@ python llm_infer.py \ --eval_human false \ --max_length 2048 \ --max_new_tokens 2048 \ - --temperature 0.9 \ - --top_k 20 \ - --top_p 0.9 \ + --temperature 0.7 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/baichuan2_7b_chat/lora_ddp/infer.sh b/examples/pytorch/llm/scripts/baichuan2_7b_chat/lora_ddp/infer.sh index bcab229e2f..def08b768d 100644 --- a/examples/pytorch/llm/scripts/baichuan2_7b_chat/lora_ddp/infer.sh +++ b/examples/pytorch/llm/scripts/baichuan2_7b_chat/lora_ddp/infer.sh @@ -7,9 +7,7 @@ python llm_infer.py \ --eval_human false \ --max_length 4096 \ --max_new_tokens 2048 \ - --temperature 0.9 \ - --top_k 20 \ - --top_p 0.9 \ + --temperature 0.1 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/baichuan2_7b_chat/lora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/baichuan2_7b_chat/lora_ddp_ds/infer.sh index a83052ac99..f82997b55e 100644 --- a/examples/pytorch/llm/scripts/baichuan2_7b_chat/lora_ddp_ds/infer.sh +++ b/examples/pytorch/llm/scripts/baichuan2_7b_chat/lora_ddp_ds/infer.sh @@ -7,9 +7,7 @@ python llm_infer.py \ --eval_human false \ --max_length 4096 \ --max_new_tokens 2048 \ - --temperature 0.9 \ - --top_k 20 \ - --top_p 0.9 \ + --temperature 0.1 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/baichuan2_7b_chat/qlora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/baichuan2_7b_chat/qlora_ddp_ds/infer.sh index a83052ac99..f82997b55e 100644 --- a/examples/pytorch/llm/scripts/baichuan2_7b_chat/qlora_ddp_ds/infer.sh +++ b/examples/pytorch/llm/scripts/baichuan2_7b_chat/qlora_ddp_ds/infer.sh @@ -7,9 +7,7 @@ python llm_infer.py \ --eval_human false \ --max_length 4096 \ --max_new_tokens 2048 \ - --temperature 0.9 \ - --top_k 20 \ - --top_p 0.9 \ + --temperature 0.1 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/baichuan2_7b_chat_int4/qlora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/baichuan2_7b_chat_int4/qlora_ddp_ds/infer.sh index de565f58a2..ab2967d46d 100644 --- a/examples/pytorch/llm/scripts/baichuan2_7b_chat_int4/qlora_ddp_ds/infer.sh +++ b/examples/pytorch/llm/scripts/baichuan2_7b_chat_int4/qlora_ddp_ds/infer.sh @@ -7,9 +7,7 @@ python llm_infer.py \ --eval_human false \ --max_length 4096 \ --max_new_tokens 2048 \ - --temperature 0.9 \ - --top_k 20 \ - --top_p 0.9 \ + --temperature 0.1 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/baichuan_13b_chat/qlora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/baichuan_13b_chat/qlora_ddp_ds/infer.sh index 9b86a969b2..27944dbe4a 100644 --- a/examples/pytorch/llm/scripts/baichuan_13b_chat/qlora_ddp_ds/infer.sh +++ b/examples/pytorch/llm/scripts/baichuan_13b_chat/qlora_ddp_ds/infer.sh @@ -7,9 +7,7 @@ python llm_infer.py \ --eval_human false \ --max_length 2048 \ --max_new_tokens 2048 \ - --temperature 0.9 \ - --top_k 20 \ - --top_p 0.9 \ + --temperature 0.1 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/bluelm_7b_chat/lora/infer.sh b/examples/pytorch/llm/scripts/bluelm_7b_chat/lora/infer.sh index 4055c4e1e0..f98bb1a502 100644 --- a/examples/pytorch/llm/scripts/bluelm_7b_chat/lora/infer.sh +++ b/examples/pytorch/llm/scripts/bluelm_7b_chat/lora/infer.sh @@ -7,9 +7,7 @@ python llm_infer.py \ --eval_human false \ --max_length 2048 \ --max_new_tokens 2048 \ - --temperature 0.9 \ - --top_k 20 \ - --top_p 0.9 \ + --temperature 0.1 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/chatglm2_6b/lora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/chatglm2_6b/lora_ddp_ds/infer.sh index 253db589ef..6daca4b5e7 100644 --- a/examples/pytorch/llm/scripts/chatglm2_6b/lora_ddp_ds/infer.sh +++ b/examples/pytorch/llm/scripts/chatglm2_6b/lora_ddp_ds/infer.sh @@ -7,9 +7,7 @@ python llm_infer.py \ --eval_human false \ --max_length 4096 \ --max_new_tokens 2048 \ - --temperature 0.9 \ - --top_k 20 \ - --top_p 0.9 \ + --temperature 0.1 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/chatglm3_6b/lora/infer.sh b/examples/pytorch/llm/scripts/chatglm3_6b/lora/infer.sh index bfd10b75d3..e1f8f5239d 100644 --- a/examples/pytorch/llm/scripts/chatglm3_6b/lora/infer.sh +++ b/examples/pytorch/llm/scripts/chatglm3_6b/lora/infer.sh @@ -8,9 +8,7 @@ python llm_infer.py \ --eval_human false \ --max_length 2048 \ --max_new_tokens 2048 \ - --temperature 0.9 \ - --top_k 20 \ - --top_p 0.9 \ + --temperature 0.1 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/chatglm3_6b/lora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/chatglm3_6b/lora_ddp_ds/infer.sh index e557ff7e18..61819a9333 100644 --- a/examples/pytorch/llm/scripts/chatglm3_6b/lora_ddp_ds/infer.sh +++ b/examples/pytorch/llm/scripts/chatglm3_6b/lora_ddp_ds/infer.sh @@ -7,9 +7,7 @@ python llm_infer.py \ --eval_human false \ --max_length 4096 \ --max_new_tokens 2048 \ - --temperature 0.9 \ - --top_k 20 \ - --top_p 0.9 \ + --temperature 0.1 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/chatglm3_6b_32k/qlora/infer.sh b/examples/pytorch/llm/scripts/chatglm3_6b_32k/qlora/infer.sh index 67f4aef1c6..c5cdce1c2f 100644 --- a/examples/pytorch/llm/scripts/chatglm3_6b_32k/qlora/infer.sh +++ b/examples/pytorch/llm/scripts/chatglm3_6b_32k/qlora/infer.sh @@ -7,9 +7,7 @@ python llm_infer.py \ --eval_human false \ --max_length 4096 \ --max_new_tokens 2048 \ - --temperature 0.9 \ - --top_k 20 \ - --top_p 0.9 \ + --temperature 0.1 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/chatglm3_6b_base/lora/infer.sh b/examples/pytorch/llm/scripts/chatglm3_6b_base/lora/infer.sh index 039773d331..0d00b9b74b 100644 --- a/examples/pytorch/llm/scripts/chatglm3_6b_base/lora/infer.sh +++ b/examples/pytorch/llm/scripts/chatglm3_6b_base/lora/infer.sh @@ -7,9 +7,7 @@ python llm_infer.py \ --eval_human false \ --max_length 2048 \ --max_new_tokens 2048 \ - --temperature 0.9 \ - --top_k 20 \ - --top_p 0.9 \ + --temperature 0.7 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/chatglm3_6b_base/lora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/chatglm3_6b_base/lora_ddp_ds/infer.sh index c55164ef6d..9a85ff57f3 100644 --- a/examples/pytorch/llm/scripts/chatglm3_6b_base/lora_ddp_ds/infer.sh +++ b/examples/pytorch/llm/scripts/chatglm3_6b_base/lora_ddp_ds/infer.sh @@ -7,9 +7,7 @@ python llm_infer.py \ --eval_human false \ --max_length 2048 \ --max_new_tokens 2048 \ - --temperature 0.9 \ - --top_k 20 \ - --top_p 0.9 \ + --temperature 0.7 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/custom/tigerbot_13b_chat/qlora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/custom/tigerbot_13b_chat/qlora_ddp_ds/infer.sh index d0e916ce42..7470ac92f3 100644 --- a/examples/pytorch/llm/scripts/custom/tigerbot_13b_chat/qlora_ddp_ds/infer.sh +++ b/examples/pytorch/llm/scripts/custom/tigerbot_13b_chat/qlora_ddp_ds/infer.sh @@ -7,9 +7,7 @@ python llm_infer.py \ --eval_human false \ --max_length 2048 \ --max_new_tokens 2048 \ - --temperature 0.9 \ - --top_k 20 \ - --top_p 0.9 \ + --temperature 0.3 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/custom/tigerbot_7b/lora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/custom/tigerbot_7b/lora_ddp_ds/infer.sh index 7f87800282..c3a5b0555e 100644 --- a/examples/pytorch/llm/scripts/custom/tigerbot_7b/lora_ddp_ds/infer.sh +++ b/examples/pytorch/llm/scripts/custom/tigerbot_7b/lora_ddp_ds/infer.sh @@ -7,9 +7,7 @@ python llm_infer.py \ --eval_human false \ --max_length 2048 \ --max_new_tokens 2048 \ - --temperature 0.9 \ - --top_k 20 \ - --top_p 0.9 \ + --temperature 0.3 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/internlm_20b/lora_ddp/infer.sh b/examples/pytorch/llm/scripts/internlm_20b/lora_ddp/infer.sh index f052c7e44e..940ab0ada8 100644 --- a/examples/pytorch/llm/scripts/internlm_20b/lora_ddp/infer.sh +++ b/examples/pytorch/llm/scripts/internlm_20b/lora_ddp/infer.sh @@ -7,9 +7,7 @@ python llm_infer.py \ --eval_human false \ --max_length 2048 \ --max_new_tokens 2048 \ - --temperature 0.9 \ - --top_k 20 \ - --top_p 0.9 \ + --temperature 0.3 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/internlm_20b/qlora/infer.sh b/examples/pytorch/llm/scripts/internlm_20b/qlora/infer.sh index e186723f97..2dc5933123 100644 --- a/examples/pytorch/llm/scripts/internlm_20b/qlora/infer.sh +++ b/examples/pytorch/llm/scripts/internlm_20b/qlora/infer.sh @@ -7,9 +7,7 @@ python llm_infer.py \ --eval_human false \ --max_length 2048 \ --max_new_tokens 2048 \ - --temperature 0.9 \ - --top_k 20 \ - --top_p 0.9 \ + --temperature 0.7 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/internlm_20b_chat/lora_ddp/infer.sh b/examples/pytorch/llm/scripts/internlm_20b_chat/lora_ddp/infer.sh index 5e85d10514..8aa84aab31 100644 --- a/examples/pytorch/llm/scripts/internlm_20b_chat/lora_ddp/infer.sh +++ b/examples/pytorch/llm/scripts/internlm_20b_chat/lora_ddp/infer.sh @@ -8,9 +8,7 @@ python llm_infer.py \ --eval_human false \ --max_length 2048 \ --max_new_tokens 2048 \ - --temperature 0.9 \ - --top_k 20 \ - --top_p 0.9 \ + --temperature 0.1 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/internlm_20b_chat/qlora/infer.sh b/examples/pytorch/llm/scripts/internlm_20b_chat/qlora/infer.sh index a2370ccc27..5caa8fac22 100644 --- a/examples/pytorch/llm/scripts/internlm_20b_chat/qlora/infer.sh +++ b/examples/pytorch/llm/scripts/internlm_20b_chat/qlora/infer.sh @@ -7,9 +7,7 @@ python llm_infer.py \ --eval_human false \ --max_length 2048 \ --max_new_tokens 2048 \ - --temperature 0.9 \ - --top_k 20 \ - --top_p 0.9 \ + --temperature 0.1 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/internlm_20b_chat/qlora_ddp/infer.sh b/examples/pytorch/llm/scripts/internlm_20b_chat/qlora_ddp/infer.sh index a2370ccc27..5caa8fac22 100644 --- a/examples/pytorch/llm/scripts/internlm_20b_chat/qlora_ddp/infer.sh +++ b/examples/pytorch/llm/scripts/internlm_20b_chat/qlora_ddp/infer.sh @@ -7,9 +7,7 @@ python llm_infer.py \ --eval_human false \ --max_length 2048 \ --max_new_tokens 2048 \ - --temperature 0.9 \ - --top_k 20 \ - --top_p 0.9 \ + --temperature 0.1 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/llama2_13b_chat/qlora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/llama2_13b_chat/qlora_ddp_ds/infer.sh index 734781e2f2..5039d3ae65 100644 --- a/examples/pytorch/llm/scripts/llama2_13b_chat/qlora_ddp_ds/infer.sh +++ b/examples/pytorch/llm/scripts/llama2_13b_chat/qlora_ddp_ds/infer.sh @@ -7,9 +7,7 @@ python llm_infer.py \ --eval_human false \ --max_length 4096 \ --max_new_tokens 2048 \ - --temperature 0.9 \ - --top_k 20 \ - --top_p 0.9 \ + --temperature 0.1 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/llama2_70b_chat/qlora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/llama2_70b_chat/qlora_ddp_ds/infer.sh index a3a6d3da80..d7eb2e12c7 100644 --- a/examples/pytorch/llm/scripts/llama2_70b_chat/qlora_ddp_ds/infer.sh +++ b/examples/pytorch/llm/scripts/llama2_70b_chat/qlora_ddp_ds/infer.sh @@ -7,9 +7,7 @@ python llm_infer.py \ --eval_human false \ --max_length 4096 \ --max_new_tokens 2048 \ - --temperature 0.9 \ - --top_k 20 \ - --top_p 0.9 \ + --temperature 0.1 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/llama2_70b_chat/qlora_mp/infer.sh b/examples/pytorch/llm/scripts/llama2_70b_chat/qlora_mp/infer.sh index 13898a94c3..94dc8b7d89 100644 --- a/examples/pytorch/llm/scripts/llama2_70b_chat/qlora_mp/infer.sh +++ b/examples/pytorch/llm/scripts/llama2_70b_chat/qlora_mp/infer.sh @@ -7,9 +7,7 @@ python llm_infer.py \ --eval_human false \ --max_length 2048 \ --max_new_tokens 2048 \ - --temperature 0.9 \ - --top_k 20 \ - --top_p 0.9 \ + --temperature 0.1 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/mistral_7b_chat/lora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/mistral_7b_chat/lora_ddp_ds/infer.sh index a74b1acbd1..84479922dd 100644 --- a/examples/pytorch/llm/scripts/mistral_7b_chat/lora_ddp_ds/infer.sh +++ b/examples/pytorch/llm/scripts/mistral_7b_chat/lora_ddp_ds/infer.sh @@ -8,9 +8,7 @@ python llm_infer.py \ --eval_human false \ --max_length 4096 \ --max_new_tokens 2048 \ - --temperature 0.9 \ - --top_k 20 \ - --top_p 0.9 \ + --temperature 0.1 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/mistral_7b_chat/lora_mp_ddp/infer.sh b/examples/pytorch/llm/scripts/mistral_7b_chat/lora_mp_ddp/infer.sh index 4a2cfb4701..6d1f97cc4b 100644 --- a/examples/pytorch/llm/scripts/mistral_7b_chat/lora_mp_ddp/infer.sh +++ b/examples/pytorch/llm/scripts/mistral_7b_chat/lora_mp_ddp/infer.sh @@ -8,9 +8,7 @@ python llm_infer.py \ --eval_human false \ --max_length 4096 \ --max_new_tokens 2048 \ - --temperature 0.9 \ - --top_k 20 \ - --top_p 0.9 \ + --temperature 0.1 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/openbuddy_llama2_13b_chat/qlora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/openbuddy_llama2_13b_chat/qlora_ddp_ds/infer.sh index 2aa957fc83..48e862181b 100644 --- a/examples/pytorch/llm/scripts/openbuddy_llama2_13b_chat/qlora_ddp_ds/infer.sh +++ b/examples/pytorch/llm/scripts/openbuddy_llama2_13b_chat/qlora_ddp_ds/infer.sh @@ -7,9 +7,7 @@ python llm_infer.py \ --eval_human false \ --max_length 2048 \ --max_new_tokens 2048 \ - --temperature 0.9 \ - --top_k 20 \ - --top_p 0.9 \ + --temperature 0.1 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/openbuddy_llama2_70b_chat/qlora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/openbuddy_llama2_70b_chat/qlora_ddp_ds/infer.sh index be363770d3..21d7935052 100644 --- a/examples/pytorch/llm/scripts/openbuddy_llama2_70b_chat/qlora_ddp_ds/infer.sh +++ b/examples/pytorch/llm/scripts/openbuddy_llama2_70b_chat/qlora_ddp_ds/infer.sh @@ -7,9 +7,7 @@ python llm_infer.py \ --eval_human false \ --max_length 2048 \ --max_new_tokens 2048 \ - --temperature 0.9 \ - --top_k 20 \ - --top_p 0.9 \ + --temperature 0.1 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/openbuddy_llama2_70b_chat/qlora_mp/infer.sh b/examples/pytorch/llm/scripts/openbuddy_llama2_70b_chat/qlora_mp/infer.sh index 7c0c8a8675..14afadc0c8 100644 --- a/examples/pytorch/llm/scripts/openbuddy_llama2_70b_chat/qlora_mp/infer.sh +++ b/examples/pytorch/llm/scripts/openbuddy_llama2_70b_chat/qlora_mp/infer.sh @@ -7,9 +7,7 @@ python llm_infer.py \ --eval_human false \ --max_length 2048 \ --max_new_tokens 2048 \ - --temperature 0.9 \ - --top_k 20 \ - --top_p 0.9 \ + --temperature 0.1 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/openbuddy_mistral_7b_chat/lora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/openbuddy_mistral_7b_chat/lora_ddp_ds/infer.sh index c73c117aff..2698b59160 100644 --- a/examples/pytorch/llm/scripts/openbuddy_mistral_7b_chat/lora_ddp_ds/infer.sh +++ b/examples/pytorch/llm/scripts/openbuddy_mistral_7b_chat/lora_ddp_ds/infer.sh @@ -8,9 +8,7 @@ python llm_infer.py \ --eval_human false \ --max_length 2048 \ --max_new_tokens 2048 \ - --temperature 0.9 \ - --top_k 20 \ - --top_p 0.9 \ + --temperature 0.1 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/openbuddy_mistral_7b_chat/lora_mp_ddp/infer.sh b/examples/pytorch/llm/scripts/openbuddy_mistral_7b_chat/lora_mp_ddp/infer.sh index de551d54b1..620775a01e 100644 --- a/examples/pytorch/llm/scripts/openbuddy_mistral_7b_chat/lora_mp_ddp/infer.sh +++ b/examples/pytorch/llm/scripts/openbuddy_mistral_7b_chat/lora_mp_ddp/infer.sh @@ -8,9 +8,7 @@ python llm_infer.py \ --eval_human false \ --max_length 4096 \ --max_new_tokens 2048 \ - --temperature 0.9 \ - --top_k 20 \ - --top_p 0.9 \ + --temperature 0.1 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/polylm_13b/qlora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/polylm_13b/qlora_ddp_ds/infer.sh index 6bccac7005..e4f571db80 100644 --- a/examples/pytorch/llm/scripts/polylm_13b/qlora_ddp_ds/infer.sh +++ b/examples/pytorch/llm/scripts/polylm_13b/qlora_ddp_ds/infer.sh @@ -7,9 +7,7 @@ python llm_infer.py \ --eval_human false \ --max_length 2048 \ --max_new_tokens 2048 \ - --temperature 0.9 \ - --top_k 20 \ - --top_p 0.9 \ + --temperature 0.7 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/qwen_14b/lora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/qwen_14b/lora_ddp_ds/infer.sh index 62bb4e058b..1c9aaa8b9f 100644 --- a/examples/pytorch/llm/scripts/qwen_14b/lora_ddp_ds/infer.sh +++ b/examples/pytorch/llm/scripts/qwen_14b/lora_ddp_ds/infer.sh @@ -9,9 +9,7 @@ python llm_infer.py \ --max_length 2048 \ --use_flash_attn true \ --max_new_tokens 2048 \ - --temperature 0.9 \ - --top_k 20 \ - --top_p 0.9 \ + --temperature 0.7 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/qwen_14b/qlora/infer.sh b/examples/pytorch/llm/scripts/qwen_14b/qlora/infer.sh index 486a96ab06..a5eaafa8eb 100644 --- a/examples/pytorch/llm/scripts/qwen_14b/qlora/infer.sh +++ b/examples/pytorch/llm/scripts/qwen_14b/qlora/infer.sh @@ -8,9 +8,7 @@ python llm_infer.py \ --max_length 2048 \ --use_flash_attn false \ --max_new_tokens 2048 \ - --temperature 0.9 \ - --top_k 20 \ - --top_p 0.9 \ + --temperature 0.7 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/qwen_14b/qlora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/qwen_14b/qlora_ddp_ds/infer.sh index 486a96ab06..a5eaafa8eb 100644 --- a/examples/pytorch/llm/scripts/qwen_14b/qlora_ddp_ds/infer.sh +++ b/examples/pytorch/llm/scripts/qwen_14b/qlora_ddp_ds/infer.sh @@ -8,9 +8,7 @@ python llm_infer.py \ --max_length 2048 \ --use_flash_attn false \ --max_new_tokens 2048 \ - --temperature 0.9 \ - --top_k 20 \ - --top_p 0.9 \ + --temperature 0.7 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/qwen_14b_chat/lora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/qwen_14b_chat/lora_ddp_ds/infer.sh index d59a4833c8..0a5cb80521 100644 --- a/examples/pytorch/llm/scripts/qwen_14b_chat/lora_ddp_ds/infer.sh +++ b/examples/pytorch/llm/scripts/qwen_14b_chat/lora_ddp_ds/infer.sh @@ -9,9 +9,7 @@ python llm_infer.py \ --max_length 2048 \ --use_flash_attn true \ --max_new_tokens 2048 \ - --temperature 0.9 \ - --top_k 20 \ - --top_p 0.9 \ + --temperature 0.1 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/qwen_14b_chat/qlora/infer.sh b/examples/pytorch/llm/scripts/qwen_14b_chat/qlora/infer.sh index ede3d0cdb8..f19c14dc24 100644 --- a/examples/pytorch/llm/scripts/qwen_14b_chat/qlora/infer.sh +++ b/examples/pytorch/llm/scripts/qwen_14b_chat/qlora/infer.sh @@ -8,9 +8,7 @@ python llm_infer.py \ --max_length 2048 \ --use_flash_attn false \ --max_new_tokens 2048 \ - --temperature 0.9 \ - --top_k 20 \ - --top_p 0.9 \ + --temperature 0.1 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/qwen_14b_chat/qlora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/qwen_14b_chat/qlora_ddp_ds/infer.sh index 9f7905a3a1..44e1278cfc 100644 --- a/examples/pytorch/llm/scripts/qwen_14b_chat/qlora_ddp_ds/infer.sh +++ b/examples/pytorch/llm/scripts/qwen_14b_chat/qlora_ddp_ds/infer.sh @@ -8,9 +8,7 @@ python llm_infer.py \ --max_length 2048 \ --use_flash_attn false \ --max_new_tokens 2048 \ - --temperature 0.9 \ - --top_k 20 \ - --top_p 0.9 \ + --temperature 0.1 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/qwen_14b_chat_int4/qlora/infer.sh b/examples/pytorch/llm/scripts/qwen_14b_chat_int4/qlora/infer.sh index b53ae8f5f9..f60f9e18c8 100644 --- a/examples/pytorch/llm/scripts/qwen_14b_chat_int4/qlora/infer.sh +++ b/examples/pytorch/llm/scripts/qwen_14b_chat_int4/qlora/infer.sh @@ -8,9 +8,7 @@ python llm_infer.py \ --max_length 4096 \ --use_flash_attn false \ --max_new_tokens 2048 \ - --temperature 0.9 \ - --top_k 20 \ - --top_p 0.9 \ + --temperature 0.1 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/qwen_14b_chat_int4/qlora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/qwen_14b_chat_int4/qlora_ddp_ds/infer.sh index b53ae8f5f9..f60f9e18c8 100644 --- a/examples/pytorch/llm/scripts/qwen_14b_chat_int4/qlora_ddp_ds/infer.sh +++ b/examples/pytorch/llm/scripts/qwen_14b_chat_int4/qlora_ddp_ds/infer.sh @@ -8,9 +8,7 @@ python llm_infer.py \ --max_length 4096 \ --use_flash_attn false \ --max_new_tokens 2048 \ - --temperature 0.9 \ - --top_k 20 \ - --top_p 0.9 \ + --temperature 0.1 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/qwen_14b_chat_int8/qlora/infer.sh b/examples/pytorch/llm/scripts/qwen_14b_chat_int8/qlora/infer.sh index 933fb2cb30..ac1a7a90cb 100644 --- a/examples/pytorch/llm/scripts/qwen_14b_chat_int8/qlora/infer.sh +++ b/examples/pytorch/llm/scripts/qwen_14b_chat_int8/qlora/infer.sh @@ -8,9 +8,7 @@ python llm_infer.py \ --max_length 2048 \ --use_flash_attn false \ --max_new_tokens 2048 \ - --temperature 0.9 \ - --top_k 20 \ - --top_p 0.9 \ + --temperature 0.1 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/qwen_14b_chat_int8/qlora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/qwen_14b_chat_int8/qlora_ddp_ds/infer.sh index 933fb2cb30..73e64fd235 100644 --- a/examples/pytorch/llm/scripts/qwen_14b_chat_int8/qlora_ddp_ds/infer.sh +++ b/examples/pytorch/llm/scripts/qwen_14b_chat_int8/qlora_ddp_ds/infer.sh @@ -8,9 +8,7 @@ python llm_infer.py \ --max_length 2048 \ --use_flash_attn false \ --max_new_tokens 2048 \ - --temperature 0.9 \ - --top_k 20 \ - --top_p 0.9 \ + --temperature 0.7 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/qwen_7b/lora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/qwen_7b/lora_ddp_ds/infer.sh index bc0ee980bf..6ec7b9ff0f 100644 --- a/examples/pytorch/llm/scripts/qwen_7b/lora_ddp_ds/infer.sh +++ b/examples/pytorch/llm/scripts/qwen_7b/lora_ddp_ds/infer.sh @@ -9,9 +9,7 @@ python llm_infer.py \ --max_length 2048 \ --use_flash_attn false \ --max_new_tokens 2048 \ - --temperature 0.9 \ - --top_k 20 \ - --top_p 0.9 \ + --temperature 0.7 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/qwen_7b/qlora_ddp/infer.sh b/examples/pytorch/llm/scripts/qwen_7b/qlora_ddp/infer.sh index 3e23f52f45..c2813f6c4d 100644 --- a/examples/pytorch/llm/scripts/qwen_7b/qlora_ddp/infer.sh +++ b/examples/pytorch/llm/scripts/qwen_7b/qlora_ddp/infer.sh @@ -8,9 +8,7 @@ python llm_infer.py \ --max_length 4096 \ --use_flash_attn false \ --max_new_tokens 2048 \ - --temperature 0.9 \ - --top_k 20 \ - --top_p 0.9 \ + --temperature 0.7 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/qwen_7b_chat/full_mp/infer.sh b/examples/pytorch/llm/scripts/qwen_7b_chat/full_mp/infer.sh index 6748277f70..60fe98d23f 100644 --- a/examples/pytorch/llm/scripts/qwen_7b_chat/full_mp/infer.sh +++ b/examples/pytorch/llm/scripts/qwen_7b_chat/full_mp/infer.sh @@ -8,8 +8,6 @@ python llm_infer.py \ --max_length 6144 \ --use_flash_attn true \ --max_new_tokens 2048 \ - --temperature 0.9 \ - --top_k 20 \ - --top_p 0.9 \ + --temperature 0.1 \ --repetition_penalty 1.05 \ --do_sample true \ diff --git a/examples/pytorch/llm/scripts/qwen_7b_chat/full_mp_ddp/infer.sh b/examples/pytorch/llm/scripts/qwen_7b_chat/full_mp_ddp/infer.sh index 6748277f70..0c3b83b2df 100644 --- a/examples/pytorch/llm/scripts/qwen_7b_chat/full_mp_ddp/infer.sh +++ b/examples/pytorch/llm/scripts/qwen_7b_chat/full_mp_ddp/infer.sh @@ -8,8 +8,6 @@ python llm_infer.py \ --max_length 6144 \ --use_flash_attn true \ --max_new_tokens 2048 \ - --temperature 0.9 \ - --top_k 20 \ - --top_p 0.9 \ + --temperature 0.7 \ --repetition_penalty 1.05 \ --do_sample true \ diff --git a/examples/pytorch/llm/scripts/qwen_7b_chat/lora/infer.sh b/examples/pytorch/llm/scripts/qwen_7b_chat/lora/infer.sh index b618573ba7..480a89db35 100644 --- a/examples/pytorch/llm/scripts/qwen_7b_chat/lora/infer.sh +++ b/examples/pytorch/llm/scripts/qwen_7b_chat/lora/infer.sh @@ -9,9 +9,7 @@ python llm_infer.py \ --max_length 2048 \ --use_flash_attn false \ --max_new_tokens 2048 \ - --temperature 0.9 \ - --top_k 20 \ - --top_p 0.9 \ + --temperature 0.1 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/qwen_7b_chat/lora_ddp/infer.sh b/examples/pytorch/llm/scripts/qwen_7b_chat/lora_ddp/infer.sh index 6436ad0e87..65bf889d90 100644 --- a/examples/pytorch/llm/scripts/qwen_7b_chat/lora_ddp/infer.sh +++ b/examples/pytorch/llm/scripts/qwen_7b_chat/lora_ddp/infer.sh @@ -9,9 +9,7 @@ python llm_infer.py \ --max_length 2048 \ --use_flash_attn false \ --max_new_tokens 2048 \ - --temperature 0.9 \ - --top_k 20 \ - --top_p 0.9 \ + --temperature 0.1 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/qwen_7b_chat/lora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/qwen_7b_chat/lora_ddp_ds/infer.sh index 6436ad0e87..65bf889d90 100644 --- a/examples/pytorch/llm/scripts/qwen_7b_chat/lora_ddp_ds/infer.sh +++ b/examples/pytorch/llm/scripts/qwen_7b_chat/lora_ddp_ds/infer.sh @@ -9,9 +9,7 @@ python llm_infer.py \ --max_length 2048 \ --use_flash_attn false \ --max_new_tokens 2048 \ - --temperature 0.9 \ - --top_k 20 \ - --top_p 0.9 \ + --temperature 0.1 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/qwen_7b_chat/lora_mp_ddp/infer.sh b/examples/pytorch/llm/scripts/qwen_7b_chat/lora_mp_ddp/infer.sh index 9540587f4e..398e4f4ded 100644 --- a/examples/pytorch/llm/scripts/qwen_7b_chat/lora_mp_ddp/infer.sh +++ b/examples/pytorch/llm/scripts/qwen_7b_chat/lora_mp_ddp/infer.sh @@ -9,9 +9,7 @@ python llm_infer.py \ --max_length 4096 \ --use_flash_attn false \ --max_new_tokens 2048 \ - --temperature 0.9 \ - --top_k 20 \ - --top_p 0.9 \ + --temperature 0.1 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/qwen_7b_chat/qlora/infer.sh b/examples/pytorch/llm/scripts/qwen_7b_chat/qlora/infer.sh index 02ccf806c9..82fd16d225 100644 --- a/examples/pytorch/llm/scripts/qwen_7b_chat/qlora/infer.sh +++ b/examples/pytorch/llm/scripts/qwen_7b_chat/qlora/infer.sh @@ -8,9 +8,7 @@ python llm_infer.py \ --max_length 4096 \ --use_flash_attn false \ --max_new_tokens 2048 \ - --temperature 0.9 \ - --top_k 20 \ - --top_p 0.9 \ + --temperature 0.1 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/qwen_7b_chat/qlora_ddp/infer.sh b/examples/pytorch/llm/scripts/qwen_7b_chat/qlora_ddp/infer.sh index 0e4b2b80f3..2959370e59 100644 --- a/examples/pytorch/llm/scripts/qwen_7b_chat/qlora_ddp/infer.sh +++ b/examples/pytorch/llm/scripts/qwen_7b_chat/qlora_ddp/infer.sh @@ -8,9 +8,7 @@ python llm_infer.py \ --max_length 4096 \ --use_flash_attn false \ --max_new_tokens 2048 \ - --temperature 0.9 \ - --top_k 20 \ - --top_p 0.9 \ + --temperature 0.1 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/qwen_7b_chat/qlora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/qwen_7b_chat/qlora_ddp_ds/infer.sh index 0e4b2b80f3..2959370e59 100644 --- a/examples/pytorch/llm/scripts/qwen_7b_chat/qlora_ddp_ds/infer.sh +++ b/examples/pytorch/llm/scripts/qwen_7b_chat/qlora_ddp_ds/infer.sh @@ -8,9 +8,7 @@ python llm_infer.py \ --max_length 4096 \ --use_flash_attn false \ --max_new_tokens 2048 \ - --temperature 0.9 \ - --top_k 20 \ - --top_p 0.9 \ + --temperature 0.1 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/qwen_7b_chat_int4/qlora/infer.sh b/examples/pytorch/llm/scripts/qwen_7b_chat_int4/qlora/infer.sh index e130e0a910..510ba1be0e 100644 --- a/examples/pytorch/llm/scripts/qwen_7b_chat_int4/qlora/infer.sh +++ b/examples/pytorch/llm/scripts/qwen_7b_chat_int4/qlora/infer.sh @@ -8,9 +8,7 @@ python llm_infer.py \ --max_length 4096 \ --use_flash_attn false \ --max_new_tokens 2048 \ - --temperature 0.9 \ - --top_k 20 \ - --top_p 0.9 \ + --temperature 0.1 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/qwen_7b_chat_int4/qlora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/qwen_7b_chat_int4/qlora_ddp_ds/infer.sh index e130e0a910..510ba1be0e 100644 --- a/examples/pytorch/llm/scripts/qwen_7b_chat_int4/qlora_ddp_ds/infer.sh +++ b/examples/pytorch/llm/scripts/qwen_7b_chat_int4/qlora_ddp_ds/infer.sh @@ -8,9 +8,7 @@ python llm_infer.py \ --max_length 4096 \ --use_flash_attn false \ --max_new_tokens 2048 \ - --temperature 0.9 \ - --top_k 20 \ - --top_p 0.9 \ + --temperature 0.1 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/qwen_7b_chat_int8/qlora/infer.sh b/examples/pytorch/llm/scripts/qwen_7b_chat_int8/qlora/infer.sh index abe378d8d8..c58840c899 100644 --- a/examples/pytorch/llm/scripts/qwen_7b_chat_int8/qlora/infer.sh +++ b/examples/pytorch/llm/scripts/qwen_7b_chat_int8/qlora/infer.sh @@ -8,9 +8,7 @@ python llm_infer.py \ --max_length 4096 \ --use_flash_attn false \ --max_new_tokens 2048 \ - --temperature 0.9 \ - --top_k 20 \ - --top_p 0.9 \ + --temperature 0.1 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/qwen_7b_chat_int8/qlora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/qwen_7b_chat_int8/qlora_ddp_ds/infer.sh index abe378d8d8..c58840c899 100644 --- a/examples/pytorch/llm/scripts/qwen_7b_chat_int8/qlora_ddp_ds/infer.sh +++ b/examples/pytorch/llm/scripts/qwen_7b_chat_int8/qlora_ddp_ds/infer.sh @@ -8,9 +8,7 @@ python llm_infer.py \ --max_length 4096 \ --use_flash_attn false \ --max_new_tokens 2048 \ - --temperature 0.9 \ - --top_k 20 \ - --top_p 0.9 \ + --temperature 0.1 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/qwen_vl/lora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/qwen_vl/lora_ddp_ds/infer.sh index 70f443eebc..b3ca5a9436 100644 --- a/examples/pytorch/llm/scripts/qwen_vl/lora_ddp_ds/infer.sh +++ b/examples/pytorch/llm/scripts/qwen_vl/lora_ddp_ds/infer.sh @@ -8,9 +8,7 @@ python llm_infer.py \ --max_length 2048 \ --use_flash_attn false \ --max_new_tokens 2048 \ - --temperature 0.9 \ - --top_k 20 \ - --top_p 0.9 \ + --temperature 0.7 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/qwen_vl_chat/lora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/qwen_vl_chat/lora_ddp_ds/infer.sh index 946aa74c6e..a1df4916ea 100644 --- a/examples/pytorch/llm/scripts/qwen_vl_chat/lora_ddp_ds/infer.sh +++ b/examples/pytorch/llm/scripts/qwen_vl_chat/lora_ddp_ds/infer.sh @@ -9,9 +9,7 @@ python llm_infer.py \ --max_length 2048 \ --use_flash_attn false \ --max_new_tokens 2048 \ - --temperature 0.9 \ - --top_k 20 \ - --top_p 0.9 \ + --temperature 0.7 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/qwen_vl_chat/qlora/infer.sh b/examples/pytorch/llm/scripts/qwen_vl_chat/qlora/infer.sh index 08b340b91e..0bf729f351 100644 --- a/examples/pytorch/llm/scripts/qwen_vl_chat/qlora/infer.sh +++ b/examples/pytorch/llm/scripts/qwen_vl_chat/qlora/infer.sh @@ -8,9 +8,7 @@ python llm_infer.py \ --max_length 2048 \ --use_flash_attn false \ --max_new_tokens 2048 \ - --temperature 0.9 \ - --top_k 20 \ - --top_p 0.9 \ + --temperature 0.7 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/qwen_vl_chat_int4/qlora/infer.sh b/examples/pytorch/llm/scripts/qwen_vl_chat_int4/qlora/infer.sh index 77ea922f86..915897101d 100644 --- a/examples/pytorch/llm/scripts/qwen_vl_chat_int4/qlora/infer.sh +++ b/examples/pytorch/llm/scripts/qwen_vl_chat_int4/qlora/infer.sh @@ -8,9 +8,7 @@ python llm_infer.py \ --max_length 2048 \ --use_flash_attn false \ --max_new_tokens 2048 \ - --temperature 0.9 \ - --top_k 20 \ - --top_p 0.9 \ + --temperature 0.7 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/qwen_vl_chat_int4/qlora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/qwen_vl_chat_int4/qlora_ddp_ds/infer.sh index 77ea922f86..915897101d 100644 --- a/examples/pytorch/llm/scripts/qwen_vl_chat_int4/qlora_ddp_ds/infer.sh +++ b/examples/pytorch/llm/scripts/qwen_vl_chat_int4/qlora_ddp_ds/infer.sh @@ -8,9 +8,7 @@ python llm_infer.py \ --max_length 2048 \ --use_flash_attn false \ --max_new_tokens 2048 \ - --temperature 0.9 \ - --top_k 20 \ - --top_p 0.9 \ + --temperature 0.7 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/seqgpt_560m/full/infer.sh b/examples/pytorch/llm/scripts/seqgpt_560m/full/infer.sh index 544d0359b5..89a0c40e42 100644 --- a/examples/pytorch/llm/scripts/seqgpt_560m/full/infer.sh +++ b/examples/pytorch/llm/scripts/seqgpt_560m/full/infer.sh @@ -7,8 +7,6 @@ python llm_infer.py \ --eval_human false \ --max_length 1024 \ --max_new_tokens 2048 \ - --temperature 0.9 \ - --top_k 20 \ - --top_p 0.9 \ + --temperature 0.3 \ --repetition_penalty 1.05 \ --do_sample true \ diff --git a/examples/pytorch/llm/scripts/seqgpt_560m/full_ddp/infer.sh b/examples/pytorch/llm/scripts/seqgpt_560m/full_ddp/infer.sh index 544d0359b5..89a0c40e42 100644 --- a/examples/pytorch/llm/scripts/seqgpt_560m/full_ddp/infer.sh +++ b/examples/pytorch/llm/scripts/seqgpt_560m/full_ddp/infer.sh @@ -7,8 +7,6 @@ python llm_infer.py \ --eval_human false \ --max_length 1024 \ --max_new_tokens 2048 \ - --temperature 0.9 \ - --top_k 20 \ - --top_p 0.9 \ + --temperature 0.3 \ --repetition_penalty 1.05 \ --do_sample true \ diff --git a/examples/pytorch/llm/scripts/skywork_13b/qlora/infer.sh b/examples/pytorch/llm/scripts/skywork_13b/qlora/infer.sh index 9ff67e0256..6caa3c6788 100644 --- a/examples/pytorch/llm/scripts/skywork_13b/qlora/infer.sh +++ b/examples/pytorch/llm/scripts/skywork_13b/qlora/infer.sh @@ -7,9 +7,7 @@ python llm_infer.py \ --eval_human false \ --max_length 2048 \ --max_new_tokens 2048 \ - --temperature 0.9 \ - --top_k 20 \ - --top_p 0.9 \ + --temperature 0.7 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/tongyi_finance_14b_chat_int4/qlora/infer.sh b/examples/pytorch/llm/scripts/tongyi_finance_14b_chat_int4/qlora/infer.sh index 4093b4f4bf..091bf4d63e 100644 --- a/examples/pytorch/llm/scripts/tongyi_finance_14b_chat_int4/qlora/infer.sh +++ b/examples/pytorch/llm/scripts/tongyi_finance_14b_chat_int4/qlora/infer.sh @@ -8,9 +8,7 @@ python llm_infer.py \ --max_length 4096 \ --use_flash_attn false \ --max_new_tokens 2048 \ - --temperature 0.9 \ - --top_k 20 \ - --top_p 0.9 \ + --temperature 0.3 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/xverse_13b/qlora/infer.sh b/examples/pytorch/llm/scripts/xverse_13b/qlora/infer.sh index afd8b5fa4a..90abe69d58 100644 --- a/examples/pytorch/llm/scripts/xverse_13b/qlora/infer.sh +++ b/examples/pytorch/llm/scripts/xverse_13b/qlora/infer.sh @@ -7,9 +7,7 @@ python llm_infer.py \ --eval_human false \ --max_length 2048 \ --max_new_tokens 2048 \ - --temperature 0.9 \ - --top_k 20 \ - --top_p 0.9 \ + --temperature 0.7 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/xverse_65b/qlora_mp/infer.sh b/examples/pytorch/llm/scripts/xverse_65b/qlora_mp/infer.sh index 2611eb66b9..3b4d9f5c02 100644 --- a/examples/pytorch/llm/scripts/xverse_65b/qlora_mp/infer.sh +++ b/examples/pytorch/llm/scripts/xverse_65b/qlora_mp/infer.sh @@ -7,9 +7,7 @@ python llm_infer.py \ --eval_human false \ --max_length 2048 \ --max_new_tokens 2048 \ - --temperature 0.9 \ - --top_k 20 \ - --top_p 0.9 \ + --temperature 0.7 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/yi_34b/lora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/yi_34b/lora_ddp_ds/infer.sh index bed2f2620b..9bb3c1762b 100644 --- a/examples/pytorch/llm/scripts/yi_34b/lora_ddp_ds/infer.sh +++ b/examples/pytorch/llm/scripts/yi_34b/lora_ddp_ds/infer.sh @@ -7,9 +7,7 @@ python llm_infer.py \ --eval_human false \ --max_length 2048 \ --max_new_tokens 2048 \ - --temperature 0.9 \ - --top_k 20 \ - --top_p 0.9 \ + --temperature 0.7 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/yi_6b/lora/infer.sh b/examples/pytorch/llm/scripts/yi_6b/lora/infer.sh index 696c04c810..25c177f5ab 100644 --- a/examples/pytorch/llm/scripts/yi_6b/lora/infer.sh +++ b/examples/pytorch/llm/scripts/yi_6b/lora/infer.sh @@ -7,9 +7,7 @@ python llm_infer.py \ --eval_human false \ --max_length 2048 \ --max_new_tokens 2048 \ - --temperature 0.9 \ - --top_k 20 \ - --top_p 0.9 \ + --temperature 0.7 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/ziya2_13b_chat/qlora/infer.sh b/examples/pytorch/llm/scripts/ziya2_13b_chat/qlora/infer.sh index b8f6ad1858..236d6840de 100644 --- a/examples/pytorch/llm/scripts/ziya2_13b_chat/qlora/infer.sh +++ b/examples/pytorch/llm/scripts/ziya2_13b_chat/qlora/infer.sh @@ -7,9 +7,7 @@ python llm_infer.py \ --eval_human false \ --max_length 2048 \ --max_new_tokens 2048 \ - --temperature 0.9 \ - --top_k 20 \ - --top_p 0.9 \ + --temperature 0.7 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/ziya2_13b_chat/qlora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/ziya2_13b_chat/qlora_ddp_ds/infer.sh index b8f6ad1858..236d6840de 100644 --- a/examples/pytorch/llm/scripts/ziya2_13b_chat/qlora_ddp_ds/infer.sh +++ b/examples/pytorch/llm/scripts/ziya2_13b_chat/qlora_ddp_ds/infer.sh @@ -7,9 +7,7 @@ python llm_infer.py \ --eval_human false \ --max_length 2048 \ --max_new_tokens 2048 \ - --temperature 0.9 \ - --top_k 20 \ - --top_p 0.9 \ + --temperature 0.7 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/swift/llm/infer.py b/swift/llm/infer.py index 43b07b509c..85725b562d 100644 --- a/swift/llm/infer.py +++ b/swift/llm/infer.py @@ -134,7 +134,7 @@ def llm_infer(args: InferArguments) -> None: # Inference result = [] jsonl_path = None - if args.save_result: + if args.save_result and args.ckpt_dir is not None: time = dt.datetime.now().strftime('%Y%m%d-%H%M%S') jsonl_path = os.path.join(args.ckpt_dir, f'infer_result_{time}.jsonl') if args.eval_human: @@ -166,8 +166,8 @@ def llm_infer(args: InferArguments) -> None: data.get('system'), stream=args.stream) label = data.get('response') + item = history[0] if jsonl_path is not None: - item = history[0] save_result_to_jsonl(jsonl_path, item[0], item[1], label) result.append({ 'query': item[0], @@ -178,6 +178,6 @@ def llm_infer(args: InferArguments) -> None: print(f'[LABELS]{label}') print('-' * 80) # input('next[ENTER]') - if args.save_result: + if args.save_result and args.ckpt_dir is not None: logger.info(f'save_result_path: {jsonl_path}') return {'result': result} diff --git a/swift/llm/utils/argument.py b/swift/llm/utils/argument.py index b4faf686ab..7852e5e13d 100644 --- a/swift/llm/utils/argument.py +++ b/swift/llm/utils/argument.py @@ -98,7 +98,7 @@ class SftArguments: learning_rate: Optional[float] = None weight_decay: float = 0.01 gradient_accumulation_steps: int = 16 - max_grad_norm: float = 1. + max_grad_norm: float = 0.5 predict_with_generate: bool = False lr_scheduler_type: str = 'cosine' warmup_ratio: float = 0.05 @@ -145,9 +145,9 @@ class SftArguments: # generation config max_new_tokens: int = 2048 do_sample: bool = True - temperature: float = 0.9 + temperature: float = 0.3 top_k: int = 20 - top_p: float = 0.9 + top_p: float = 0.7 repetition_penalty: float = 1.05 def __post_init__(self) -> None: @@ -291,9 +291,9 @@ class InferArguments: max_new_tokens: int = 2048 do_sample: bool = True - temperature: float = 0.9 + temperature: float = 0.3 top_k: int = 20 - top_p: float = 0.9 + top_p: float = 0.7 repetition_penalty: float = 1.05 # other From 0fe4fb74998e178037c0426adc84441b78ca5d14 Mon Sep 17 00:00:00 2001 From: Jintao Huang Date: Tue, 21 Nov 2023 15:23:07 +0800 Subject: [PATCH 3/7] update readme --- README.md | 21 +++++++++++++++++++-- README_CN.md | 21 +++++++++++++++++++-- examples/pytorch/llm/README.md | 21 +++++++++++++++++++-- examples/pytorch/llm/README_CN.md | 21 +++++++++++++++++++-- 4 files changed, 76 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 638b258fa2..90476403b9 100644 --- a/README.md +++ b/README.md @@ -159,7 +159,11 @@ web_ui_main(infer_args) ```bash # Experimental environment: A10, 3090, A100, ... # 20GB GPU memory -CUDA_VISIBLE_DEVICES=0 swift sft --model_id_or_path qwen/Qwen-7B-Chat --dataset blossom-math-zh +CUDA_VISIBLE_DEVICES=0 \ +swift sft \ + --model_id_or_path qwen/Qwen-7B-Chat \ + --dataset blossom-math-zh \ + --output_dir output \ # Using DDP # Experimental environment: 2 * 3090 @@ -169,18 +173,31 @@ NPROC_PER_NODE=2 \ swift sft \ --model_id_or_path qwen/Qwen-7B-Chat \ --dataset blossom-math-zh \ + --output_dir output \ # Using custom dataset -CUDA_VISIBLE_DEVICES=0 swift sft --model_id_or_path qwen/Qwen-7B-Chat --custom_train_dataset_path chatml.jsonl +CUDA_VISIBLE_DEVICES=0 \ +swift sft \ + --model_id_or_path qwen/Qwen-7B-Chat \ + --custom_train_dataset_path chatml.jsonl \ + --output_dir output \ ``` **Inference**: ```bash +# Original Model +CUDA_VISIBLE_DEVICES=0 swift infer --model_id_or_path qwen/Qwen-7B-Chat --dataset blossom-math-zh + +# Fine-tuned Model CUDA_VISIBLE_DEVICES=0 swift infer --ckpt_dir 'xxx/vx_xxx/checkpoint-xxx' ``` **Web-UI**: ```bash +# Original Model +CUDA_VISIBLE_DEVICES=0 swift web-ui --model_id_or_path qwen/Qwen-7B-Chat + +# Fine-tuned Model CUDA_VISIBLE_DEVICES=0 swift web-ui --ckpt_dir 'xxx/vx_xxx/checkpoint-xxx' ``` diff --git a/README_CN.md b/README_CN.md index 34a392c425..aa2d69c7a5 100644 --- a/README_CN.md +++ b/README_CN.md @@ -156,7 +156,11 @@ web_ui_main(infer_args) ```bash # Experimental environment: A10, 3090, A100, ... # 20GB GPU memory -CUDA_VISIBLE_DEVICES=0 swift sft --model_id_or_path qwen/Qwen-7B-Chat --dataset blossom-math-zh +CUDA_VISIBLE_DEVICES=0 \ +swift sft \ + --model_id_or_path qwen/Qwen-7B-Chat \ + --dataset blossom-math-zh \ + --output_dir output \ # 使用DDP # Experimental environment: 2 * 3090 @@ -166,18 +170,31 @@ NPROC_PER_NODE=2 \ swift sft \ --model_id_or_path qwen/Qwen-7B-Chat \ --dataset blossom-math-zh \ + --output_dir output \ # 使用自己的数据集 -CUDA_VISIBLE_DEVICES=0 swift sft --model_id_or_path qwen/Qwen-7B-Chat --custom_train_dataset_path chatml.jsonl +CUDA_VISIBLE_DEVICES=0 \ +swift sft \ + --model_id_or_path qwen/Qwen-7B-Chat \ + --custom_train_dataset_path chatml.jsonl \ + --output_dir output \ ``` **推理**: ```bash +# 原始模型 +CUDA_VISIBLE_DEVICES=0 swift infer --model_id_or_path qwen/Qwen-7B-Chat --dataset blossom-math-zh + +# 微调后的模型 CUDA_VISIBLE_DEVICES=0 swift infer --ckpt_dir 'xxx/vx_xxx/checkpoint-xxx' ``` **Web-UI** ```bash +# 原始模型 +CUDA_VISIBLE_DEVICES=0 swift web-ui --model_id_or_path qwen/Qwen-7B-Chat + +# 微调后的模型 CUDA_VISIBLE_DEVICES=0 swift web-ui --ckpt_dir 'xxx/vx_xxx/checkpoint-xxx' ``` diff --git a/examples/pytorch/llm/README.md b/examples/pytorch/llm/README.md index 7303f30aaa..7b4c50465e 100644 --- a/examples/pytorch/llm/README.md +++ b/examples/pytorch/llm/README.md @@ -122,7 +122,11 @@ web_ui_main(infer_args) ```bash # Experimental environment: A10, 3090, A100, ... # 20GB GPU memory -CUDA_VISIBLE_DEVICES=0 swift sft --model_id_or_path qwen/Qwen-7B-Chat --dataset blossom-math-zh +CUDA_VISIBLE_DEVICES=0 \ +swift sft \ + --model_id_or_path qwen/Qwen-7B-Chat \ + --dataset blossom-math-zh \ + --output_dir output \ # Using DDP # Experimental environment: 2 * 3090 @@ -132,18 +136,31 @@ NPROC_PER_NODE=2 \ swift sft \ --model_id_or_path qwen/Qwen-7B-Chat \ --dataset blossom-math-zh \ + --output_dir output \ # Using custom dataset -CUDA_VISIBLE_DEVICES=0 swift sft --model_id_or_path qwen/Qwen-7B-Chat --custom_train_dataset_path chatml.jsonl +CUDA_VISIBLE_DEVICES=0 \ +swift sft \ + --model_id_or_path qwen/Qwen-7B-Chat \ + --custom_train_dataset_path chatml.jsonl \ + --output_dir output \ ``` **Inference**: ```bash +# Original Model +CUDA_VISIBLE_DEVICES=0 swift infer --model_id_or_path qwen/Qwen-7B-Chat --dataset blossom-math-zh + +# Fine-tuned Model CUDA_VISIBLE_DEVICES=0 swift infer --ckpt_dir 'xxx/vx_xxx/checkpoint-xxx' ``` **Web-UI**: ```bash +# Original Model +CUDA_VISIBLE_DEVICES=0 swift web-ui --model_id_or_path qwen/Qwen-7B-Chat + +# Fine-tuned Model CUDA_VISIBLE_DEVICES=0 swift web-ui --ckpt_dir 'xxx/vx_xxx/checkpoint-xxx' ``` diff --git a/examples/pytorch/llm/README_CN.md b/examples/pytorch/llm/README_CN.md index 6124b933cd..5a278804ed 100644 --- a/examples/pytorch/llm/README_CN.md +++ b/examples/pytorch/llm/README_CN.md @@ -121,7 +121,11 @@ web_ui_main(infer_args) ```bash # Experimental environment: A10, 3090, A100, ... # 20GB GPU memory -CUDA_VISIBLE_DEVICES=0 swift sft --model_id_or_path qwen/Qwen-7B-Chat --dataset blossom-math-zh +CUDA_VISIBLE_DEVICES=0 \ +swift sft \ + --model_id_or_path qwen/Qwen-7B-Chat \ + --dataset blossom-math-zh \ + --output_dir output \ # 使用DDP # Experimental environment: 2 * 3090 @@ -131,18 +135,31 @@ NPROC_PER_NODE=2 \ swift sft \ --model_id_or_path qwen/Qwen-7B-Chat \ --dataset blossom-math-zh \ + --output_dir output \ # 使用自己的数据集 -CUDA_VISIBLE_DEVICES=0 swift sft --model_id_or_path qwen/Qwen-7B-Chat --custom_train_dataset_path chatml.jsonl +CUDA_VISIBLE_DEVICES=0 \ +swift sft \ + --model_id_or_path qwen/Qwen-7B-Chat \ + --custom_train_dataset_path chatml.jsonl \ + --output_dir output \ ``` **推理**: ```bash +# 原始模型 +CUDA_VISIBLE_DEVICES=0 swift infer --model_id_or_path qwen/Qwen-7B-Chat --dataset blossom-math-zh + +# 微调后的模型 CUDA_VISIBLE_DEVICES=0 swift infer --ckpt_dir 'xxx/vx_xxx/checkpoint-xxx' ``` **Web-UI** ```bash +# 原始模型 +CUDA_VISIBLE_DEVICES=0 swift web-ui --model_id_or_path qwen/Qwen-7B-Chat + +# 微调后的模型 CUDA_VISIBLE_DEVICES=0 swift web-ui --ckpt_dir 'xxx/vx_xxx/checkpoint-xxx' ``` From 897916a9f425cb6beaa700b832353c3c043a0847 Mon Sep 17 00:00:00 2001 From: Jintao Huang Date: Tue, 21 Nov 2023 15:31:18 +0800 Subject: [PATCH 4/7] update sh --- .../pytorch/llm/scripts/baichuan2_13b_chat/lora_ddp_ds/infer.sh | 1 + .../pytorch/llm/scripts/baichuan2_13b_chat/lora_mp_ddp/infer.sh | 1 + .../pytorch/llm/scripts/baichuan2_13b_chat/qlora_ddp_ds/infer.sh | 1 + .../llm/scripts/baichuan2_13b_chat_int4/qlora_ddp_ds/infer.sh | 1 + examples/pytorch/llm/scripts/baichuan2_7b/qlora/infer.sh | 1 + examples/pytorch/llm/scripts/baichuan2_7b_chat/lora_ddp/infer.sh | 1 + .../pytorch/llm/scripts/baichuan2_7b_chat/lora_ddp_ds/infer.sh | 1 + .../pytorch/llm/scripts/baichuan2_7b_chat/qlora_ddp_ds/infer.sh | 1 + .../llm/scripts/baichuan2_7b_chat_int4/qlora_ddp_ds/infer.sh | 1 + .../pytorch/llm/scripts/baichuan_13b_chat/qlora_ddp_ds/infer.sh | 1 + examples/pytorch/llm/scripts/bluelm_7b_chat/lora/infer.sh | 1 + examples/pytorch/llm/scripts/chatglm2_6b/lora_ddp_ds/infer.sh | 1 + examples/pytorch/llm/scripts/chatglm3_6b/lora/infer.sh | 1 + examples/pytorch/llm/scripts/chatglm3_6b/lora_ddp_ds/infer.sh | 1 + examples/pytorch/llm/scripts/chatglm3_6b_32k/qlora/infer.sh | 1 + examples/pytorch/llm/scripts/chatglm3_6b_base/lora/infer.sh | 1 + .../pytorch/llm/scripts/chatglm3_6b_base/lora_ddp_ds/infer.sh | 1 + .../llm/scripts/custom/tigerbot_13b_chat/qlora_ddp_ds/infer.sh | 1 + .../pytorch/llm/scripts/custom/tigerbot_7b/lora_ddp_ds/infer.sh | 1 + examples/pytorch/llm/scripts/internlm_20b/lora_ddp/infer.sh | 1 + examples/pytorch/llm/scripts/internlm_20b/qlora/infer.sh | 1 + examples/pytorch/llm/scripts/internlm_20b_chat/lora_ddp/infer.sh | 1 + examples/pytorch/llm/scripts/internlm_20b_chat/qlora/infer.sh | 1 + .../pytorch/llm/scripts/internlm_20b_chat/qlora_ddp/infer.sh | 1 + .../pytorch/llm/scripts/llama2_13b_chat/qlora_ddp_ds/infer.sh | 1 + .../pytorch/llm/scripts/llama2_70b_chat/qlora_ddp_ds/infer.sh | 1 + examples/pytorch/llm/scripts/llama2_70b_chat/qlora_mp/infer.sh | 1 + .../pytorch/llm/scripts/mistral_7b_chat/lora_ddp_ds/infer.sh | 1 + .../pytorch/llm/scripts/mistral_7b_chat/lora_mp_ddp/infer.sh | 1 + .../llm/scripts/openbuddy_llama2_13b_chat/qlora_ddp_ds/infer.sh | 1 + .../llm/scripts/openbuddy_llama2_70b_chat/qlora_ddp_ds/infer.sh | 1 + .../llm/scripts/openbuddy_llama2_70b_chat/qlora_mp/infer.sh | 1 + .../llm/scripts/openbuddy_mistral_7b_chat/lora_ddp_ds/infer.sh | 1 + .../llm/scripts/openbuddy_mistral_7b_chat/lora_mp_ddp/infer.sh | 1 + examples/pytorch/llm/scripts/polylm_13b/qlora_ddp_ds/infer.sh | 1 + examples/pytorch/llm/scripts/qwen_14b/lora_ddp_ds/infer.sh | 1 + examples/pytorch/llm/scripts/qwen_14b/qlora/infer.sh | 1 + examples/pytorch/llm/scripts/qwen_14b/qlora_ddp_ds/infer.sh | 1 + examples/pytorch/llm/scripts/qwen_14b_chat/lora_ddp_ds/infer.sh | 1 + examples/pytorch/llm/scripts/qwen_14b_chat/qlora/infer.sh | 1 + examples/pytorch/llm/scripts/qwen_14b_chat/qlora_ddp_ds/infer.sh | 1 + examples/pytorch/llm/scripts/qwen_14b_chat_int4/qlora/infer.sh | 1 + .../pytorch/llm/scripts/qwen_14b_chat_int4/qlora_ddp_ds/infer.sh | 1 + examples/pytorch/llm/scripts/qwen_14b_chat_int8/qlora/infer.sh | 1 + .../pytorch/llm/scripts/qwen_14b_chat_int8/qlora_ddp_ds/infer.sh | 1 + examples/pytorch/llm/scripts/qwen_7b/lora_ddp_ds/infer.sh | 1 + examples/pytorch/llm/scripts/qwen_7b/qlora_ddp/infer.sh | 1 + examples/pytorch/llm/scripts/qwen_7b_chat/full_mp/infer.sh | 1 + examples/pytorch/llm/scripts/qwen_7b_chat/full_mp_ddp/infer.sh | 1 + examples/pytorch/llm/scripts/qwen_7b_chat/lora/infer.sh | 1 + examples/pytorch/llm/scripts/qwen_7b_chat/lora_ddp/infer.sh | 1 + examples/pytorch/llm/scripts/qwen_7b_chat/lora_ddp_ds/infer.sh | 1 + examples/pytorch/llm/scripts/qwen_7b_chat/lora_mp_ddp/infer.sh | 1 + examples/pytorch/llm/scripts/qwen_7b_chat/qlora/infer.sh | 1 + examples/pytorch/llm/scripts/qwen_7b_chat/qlora_ddp/infer.sh | 1 + examples/pytorch/llm/scripts/qwen_7b_chat/qlora_ddp_ds/infer.sh | 1 + examples/pytorch/llm/scripts/qwen_7b_chat_int4/qlora/infer.sh | 1 + .../pytorch/llm/scripts/qwen_7b_chat_int4/qlora_ddp_ds/infer.sh | 1 + examples/pytorch/llm/scripts/qwen_7b_chat_int8/qlora/infer.sh | 1 + .../pytorch/llm/scripts/qwen_7b_chat_int8/qlora_ddp_ds/infer.sh | 1 + examples/pytorch/llm/scripts/qwen_vl/lora_ddp_ds/infer.sh | 1 + examples/pytorch/llm/scripts/qwen_vl_chat/lora_ddp_ds/infer.sh | 1 + examples/pytorch/llm/scripts/qwen_vl_chat/qlora/infer.sh | 1 + examples/pytorch/llm/scripts/qwen_vl_chat_int4/qlora/infer.sh | 1 + .../pytorch/llm/scripts/qwen_vl_chat_int4/qlora_ddp_ds/infer.sh | 1 + examples/pytorch/llm/scripts/seqgpt_560m/full/infer.sh | 1 + examples/pytorch/llm/scripts/seqgpt_560m/full_ddp/infer.sh | 1 + examples/pytorch/llm/scripts/skywork_13b/qlora/infer.sh | 1 + .../llm/scripts/tongyi_finance_14b_chat_int4/qlora/infer.sh | 1 + examples/pytorch/llm/scripts/xverse_13b/qlora/infer.sh | 1 + examples/pytorch/llm/scripts/xverse_65b/qlora_mp/infer.sh | 1 + examples/pytorch/llm/scripts/yi_34b/lora_ddp_ds/infer.sh | 1 + examples/pytorch/llm/scripts/yi_6b/lora/infer.sh | 1 + examples/pytorch/llm/scripts/ziya2_13b_chat/qlora/infer.sh | 1 + .../pytorch/llm/scripts/ziya2_13b_chat/qlora_ddp_ds/infer.sh | 1 + 75 files changed, 75 insertions(+) diff --git a/examples/pytorch/llm/scripts/baichuan2_13b_chat/lora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/baichuan2_13b_chat/lora_ddp_ds/infer.sh index 8b1e838f60..9a32b38709 100644 --- a/examples/pytorch/llm/scripts/baichuan2_13b_chat/lora_ddp_ds/infer.sh +++ b/examples/pytorch/llm/scripts/baichuan2_13b_chat/lora_ddp_ds/infer.sh @@ -9,6 +9,7 @@ python llm_infer.py \ --max_length 4096 \ --max_new_tokens 2048 \ --temperature 0.1 \ + --top_p 0.7 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/baichuan2_13b_chat/lora_mp_ddp/infer.sh b/examples/pytorch/llm/scripts/baichuan2_13b_chat/lora_mp_ddp/infer.sh index c41a44566e..9b24990791 100644 --- a/examples/pytorch/llm/scripts/baichuan2_13b_chat/lora_mp_ddp/infer.sh +++ b/examples/pytorch/llm/scripts/baichuan2_13b_chat/lora_mp_ddp/infer.sh @@ -9,6 +9,7 @@ python llm_infer.py \ --max_length 2048 \ --max_new_tokens 2048 \ --temperature 0.1 \ + --top_p 0.7 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/baichuan2_13b_chat/qlora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/baichuan2_13b_chat/qlora_ddp_ds/infer.sh index 2b7f125caa..1797bc7214 100644 --- a/examples/pytorch/llm/scripts/baichuan2_13b_chat/qlora_ddp_ds/infer.sh +++ b/examples/pytorch/llm/scripts/baichuan2_13b_chat/qlora_ddp_ds/infer.sh @@ -8,6 +8,7 @@ python llm_infer.py \ --max_length 4096 \ --max_new_tokens 2048 \ --temperature 0.1 \ + --top_p 0.7 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/baichuan2_13b_chat_int4/qlora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/baichuan2_13b_chat_int4/qlora_ddp_ds/infer.sh index a1784975e9..0f8eebaa56 100644 --- a/examples/pytorch/llm/scripts/baichuan2_13b_chat_int4/qlora_ddp_ds/infer.sh +++ b/examples/pytorch/llm/scripts/baichuan2_13b_chat_int4/qlora_ddp_ds/infer.sh @@ -8,6 +8,7 @@ python llm_infer.py \ --max_length 4096 \ --max_new_tokens 2048 \ --temperature 0.1 \ + --top_p 0.7 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/baichuan2_7b/qlora/infer.sh b/examples/pytorch/llm/scripts/baichuan2_7b/qlora/infer.sh index 41a169e4c2..b690dc12c3 100644 --- a/examples/pytorch/llm/scripts/baichuan2_7b/qlora/infer.sh +++ b/examples/pytorch/llm/scripts/baichuan2_7b/qlora/infer.sh @@ -8,6 +8,7 @@ python llm_infer.py \ --max_length 2048 \ --max_new_tokens 2048 \ --temperature 0.7 \ + --top_p 0.7 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/baichuan2_7b_chat/lora_ddp/infer.sh b/examples/pytorch/llm/scripts/baichuan2_7b_chat/lora_ddp/infer.sh index def08b768d..0968f0eed8 100644 --- a/examples/pytorch/llm/scripts/baichuan2_7b_chat/lora_ddp/infer.sh +++ b/examples/pytorch/llm/scripts/baichuan2_7b_chat/lora_ddp/infer.sh @@ -8,6 +8,7 @@ python llm_infer.py \ --max_length 4096 \ --max_new_tokens 2048 \ --temperature 0.1 \ + --top_p 0.7 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/baichuan2_7b_chat/lora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/baichuan2_7b_chat/lora_ddp_ds/infer.sh index f82997b55e..6efd57bd5c 100644 --- a/examples/pytorch/llm/scripts/baichuan2_7b_chat/lora_ddp_ds/infer.sh +++ b/examples/pytorch/llm/scripts/baichuan2_7b_chat/lora_ddp_ds/infer.sh @@ -8,6 +8,7 @@ python llm_infer.py \ --max_length 4096 \ --max_new_tokens 2048 \ --temperature 0.1 \ + --top_p 0.7 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/baichuan2_7b_chat/qlora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/baichuan2_7b_chat/qlora_ddp_ds/infer.sh index f82997b55e..6efd57bd5c 100644 --- a/examples/pytorch/llm/scripts/baichuan2_7b_chat/qlora_ddp_ds/infer.sh +++ b/examples/pytorch/llm/scripts/baichuan2_7b_chat/qlora_ddp_ds/infer.sh @@ -8,6 +8,7 @@ python llm_infer.py \ --max_length 4096 \ --max_new_tokens 2048 \ --temperature 0.1 \ + --top_p 0.7 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/baichuan2_7b_chat_int4/qlora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/baichuan2_7b_chat_int4/qlora_ddp_ds/infer.sh index ab2967d46d..0f456007bb 100644 --- a/examples/pytorch/llm/scripts/baichuan2_7b_chat_int4/qlora_ddp_ds/infer.sh +++ b/examples/pytorch/llm/scripts/baichuan2_7b_chat_int4/qlora_ddp_ds/infer.sh @@ -8,6 +8,7 @@ python llm_infer.py \ --max_length 4096 \ --max_new_tokens 2048 \ --temperature 0.1 \ + --top_p 0.7 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/baichuan_13b_chat/qlora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/baichuan_13b_chat/qlora_ddp_ds/infer.sh index 27944dbe4a..6473a4fa45 100644 --- a/examples/pytorch/llm/scripts/baichuan_13b_chat/qlora_ddp_ds/infer.sh +++ b/examples/pytorch/llm/scripts/baichuan_13b_chat/qlora_ddp_ds/infer.sh @@ -8,6 +8,7 @@ python llm_infer.py \ --max_length 2048 \ --max_new_tokens 2048 \ --temperature 0.1 \ + --top_p 0.7 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/bluelm_7b_chat/lora/infer.sh b/examples/pytorch/llm/scripts/bluelm_7b_chat/lora/infer.sh index f98bb1a502..b84e978a2e 100644 --- a/examples/pytorch/llm/scripts/bluelm_7b_chat/lora/infer.sh +++ b/examples/pytorch/llm/scripts/bluelm_7b_chat/lora/infer.sh @@ -8,6 +8,7 @@ python llm_infer.py \ --max_length 2048 \ --max_new_tokens 2048 \ --temperature 0.1 \ + --top_p 0.7 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/chatglm2_6b/lora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/chatglm2_6b/lora_ddp_ds/infer.sh index 6daca4b5e7..e4b53c266a 100644 --- a/examples/pytorch/llm/scripts/chatglm2_6b/lora_ddp_ds/infer.sh +++ b/examples/pytorch/llm/scripts/chatglm2_6b/lora_ddp_ds/infer.sh @@ -8,6 +8,7 @@ python llm_infer.py \ --max_length 4096 \ --max_new_tokens 2048 \ --temperature 0.1 \ + --top_p 0.7 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/chatglm3_6b/lora/infer.sh b/examples/pytorch/llm/scripts/chatglm3_6b/lora/infer.sh index e1f8f5239d..00de92e0a2 100644 --- a/examples/pytorch/llm/scripts/chatglm3_6b/lora/infer.sh +++ b/examples/pytorch/llm/scripts/chatglm3_6b/lora/infer.sh @@ -9,6 +9,7 @@ python llm_infer.py \ --max_length 2048 \ --max_new_tokens 2048 \ --temperature 0.1 \ + --top_p 0.7 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/chatglm3_6b/lora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/chatglm3_6b/lora_ddp_ds/infer.sh index 61819a9333..fe39d7f29d 100644 --- a/examples/pytorch/llm/scripts/chatglm3_6b/lora_ddp_ds/infer.sh +++ b/examples/pytorch/llm/scripts/chatglm3_6b/lora_ddp_ds/infer.sh @@ -8,6 +8,7 @@ python llm_infer.py \ --max_length 4096 \ --max_new_tokens 2048 \ --temperature 0.1 \ + --top_p 0.7 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/chatglm3_6b_32k/qlora/infer.sh b/examples/pytorch/llm/scripts/chatglm3_6b_32k/qlora/infer.sh index c5cdce1c2f..67ad6dba56 100644 --- a/examples/pytorch/llm/scripts/chatglm3_6b_32k/qlora/infer.sh +++ b/examples/pytorch/llm/scripts/chatglm3_6b_32k/qlora/infer.sh @@ -8,6 +8,7 @@ python llm_infer.py \ --max_length 4096 \ --max_new_tokens 2048 \ --temperature 0.1 \ + --top_p 0.7 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/chatglm3_6b_base/lora/infer.sh b/examples/pytorch/llm/scripts/chatglm3_6b_base/lora/infer.sh index 0d00b9b74b..cf131c9def 100644 --- a/examples/pytorch/llm/scripts/chatglm3_6b_base/lora/infer.sh +++ b/examples/pytorch/llm/scripts/chatglm3_6b_base/lora/infer.sh @@ -8,6 +8,7 @@ python llm_infer.py \ --max_length 2048 \ --max_new_tokens 2048 \ --temperature 0.7 \ + --top_p 0.7 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/chatglm3_6b_base/lora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/chatglm3_6b_base/lora_ddp_ds/infer.sh index 9a85ff57f3..d9636ba550 100644 --- a/examples/pytorch/llm/scripts/chatglm3_6b_base/lora_ddp_ds/infer.sh +++ b/examples/pytorch/llm/scripts/chatglm3_6b_base/lora_ddp_ds/infer.sh @@ -8,6 +8,7 @@ python llm_infer.py \ --max_length 2048 \ --max_new_tokens 2048 \ --temperature 0.7 \ + --top_p 0.7 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/custom/tigerbot_13b_chat/qlora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/custom/tigerbot_13b_chat/qlora_ddp_ds/infer.sh index 7470ac92f3..a41555fe1d 100644 --- a/examples/pytorch/llm/scripts/custom/tigerbot_13b_chat/qlora_ddp_ds/infer.sh +++ b/examples/pytorch/llm/scripts/custom/tigerbot_13b_chat/qlora_ddp_ds/infer.sh @@ -8,6 +8,7 @@ python llm_infer.py \ --max_length 2048 \ --max_new_tokens 2048 \ --temperature 0.3 \ + --top_p 0.7 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/custom/tigerbot_7b/lora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/custom/tigerbot_7b/lora_ddp_ds/infer.sh index c3a5b0555e..b501ea05d1 100644 --- a/examples/pytorch/llm/scripts/custom/tigerbot_7b/lora_ddp_ds/infer.sh +++ b/examples/pytorch/llm/scripts/custom/tigerbot_7b/lora_ddp_ds/infer.sh @@ -8,6 +8,7 @@ python llm_infer.py \ --max_length 2048 \ --max_new_tokens 2048 \ --temperature 0.3 \ + --top_p 0.7 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/internlm_20b/lora_ddp/infer.sh b/examples/pytorch/llm/scripts/internlm_20b/lora_ddp/infer.sh index 940ab0ada8..b32417c153 100644 --- a/examples/pytorch/llm/scripts/internlm_20b/lora_ddp/infer.sh +++ b/examples/pytorch/llm/scripts/internlm_20b/lora_ddp/infer.sh @@ -8,6 +8,7 @@ python llm_infer.py \ --max_length 2048 \ --max_new_tokens 2048 \ --temperature 0.3 \ + --top_p 0.7 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/internlm_20b/qlora/infer.sh b/examples/pytorch/llm/scripts/internlm_20b/qlora/infer.sh index 2dc5933123..3ab6697104 100644 --- a/examples/pytorch/llm/scripts/internlm_20b/qlora/infer.sh +++ b/examples/pytorch/llm/scripts/internlm_20b/qlora/infer.sh @@ -8,6 +8,7 @@ python llm_infer.py \ --max_length 2048 \ --max_new_tokens 2048 \ --temperature 0.7 \ + --top_p 0.7 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/internlm_20b_chat/lora_ddp/infer.sh b/examples/pytorch/llm/scripts/internlm_20b_chat/lora_ddp/infer.sh index 8aa84aab31..caaed57ba7 100644 --- a/examples/pytorch/llm/scripts/internlm_20b_chat/lora_ddp/infer.sh +++ b/examples/pytorch/llm/scripts/internlm_20b_chat/lora_ddp/infer.sh @@ -9,6 +9,7 @@ python llm_infer.py \ --max_length 2048 \ --max_new_tokens 2048 \ --temperature 0.1 \ + --top_p 0.7 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/internlm_20b_chat/qlora/infer.sh b/examples/pytorch/llm/scripts/internlm_20b_chat/qlora/infer.sh index 5caa8fac22..0a40d44469 100644 --- a/examples/pytorch/llm/scripts/internlm_20b_chat/qlora/infer.sh +++ b/examples/pytorch/llm/scripts/internlm_20b_chat/qlora/infer.sh @@ -8,6 +8,7 @@ python llm_infer.py \ --max_length 2048 \ --max_new_tokens 2048 \ --temperature 0.1 \ + --top_p 0.7 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/internlm_20b_chat/qlora_ddp/infer.sh b/examples/pytorch/llm/scripts/internlm_20b_chat/qlora_ddp/infer.sh index 5caa8fac22..0a40d44469 100644 --- a/examples/pytorch/llm/scripts/internlm_20b_chat/qlora_ddp/infer.sh +++ b/examples/pytorch/llm/scripts/internlm_20b_chat/qlora_ddp/infer.sh @@ -8,6 +8,7 @@ python llm_infer.py \ --max_length 2048 \ --max_new_tokens 2048 \ --temperature 0.1 \ + --top_p 0.7 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/llama2_13b_chat/qlora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/llama2_13b_chat/qlora_ddp_ds/infer.sh index 5039d3ae65..be3ae4deb7 100644 --- a/examples/pytorch/llm/scripts/llama2_13b_chat/qlora_ddp_ds/infer.sh +++ b/examples/pytorch/llm/scripts/llama2_13b_chat/qlora_ddp_ds/infer.sh @@ -8,6 +8,7 @@ python llm_infer.py \ --max_length 4096 \ --max_new_tokens 2048 \ --temperature 0.1 \ + --top_p 0.7 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/llama2_70b_chat/qlora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/llama2_70b_chat/qlora_ddp_ds/infer.sh index d7eb2e12c7..4a0a4ef9e7 100644 --- a/examples/pytorch/llm/scripts/llama2_70b_chat/qlora_ddp_ds/infer.sh +++ b/examples/pytorch/llm/scripts/llama2_70b_chat/qlora_ddp_ds/infer.sh @@ -8,6 +8,7 @@ python llm_infer.py \ --max_length 4096 \ --max_new_tokens 2048 \ --temperature 0.1 \ + --top_p 0.7 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/llama2_70b_chat/qlora_mp/infer.sh b/examples/pytorch/llm/scripts/llama2_70b_chat/qlora_mp/infer.sh index 94dc8b7d89..0ca5b03df7 100644 --- a/examples/pytorch/llm/scripts/llama2_70b_chat/qlora_mp/infer.sh +++ b/examples/pytorch/llm/scripts/llama2_70b_chat/qlora_mp/infer.sh @@ -8,6 +8,7 @@ python llm_infer.py \ --max_length 2048 \ --max_new_tokens 2048 \ --temperature 0.1 \ + --top_p 0.7 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/mistral_7b_chat/lora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/mistral_7b_chat/lora_ddp_ds/infer.sh index 84479922dd..bb3ac923e9 100644 --- a/examples/pytorch/llm/scripts/mistral_7b_chat/lora_ddp_ds/infer.sh +++ b/examples/pytorch/llm/scripts/mistral_7b_chat/lora_ddp_ds/infer.sh @@ -9,6 +9,7 @@ python llm_infer.py \ --max_length 4096 \ --max_new_tokens 2048 \ --temperature 0.1 \ + --top_p 0.7 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/mistral_7b_chat/lora_mp_ddp/infer.sh b/examples/pytorch/llm/scripts/mistral_7b_chat/lora_mp_ddp/infer.sh index 6d1f97cc4b..d8bfb357ad 100644 --- a/examples/pytorch/llm/scripts/mistral_7b_chat/lora_mp_ddp/infer.sh +++ b/examples/pytorch/llm/scripts/mistral_7b_chat/lora_mp_ddp/infer.sh @@ -9,6 +9,7 @@ python llm_infer.py \ --max_length 4096 \ --max_new_tokens 2048 \ --temperature 0.1 \ + --top_p 0.7 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/openbuddy_llama2_13b_chat/qlora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/openbuddy_llama2_13b_chat/qlora_ddp_ds/infer.sh index 48e862181b..20c493aab3 100644 --- a/examples/pytorch/llm/scripts/openbuddy_llama2_13b_chat/qlora_ddp_ds/infer.sh +++ b/examples/pytorch/llm/scripts/openbuddy_llama2_13b_chat/qlora_ddp_ds/infer.sh @@ -8,6 +8,7 @@ python llm_infer.py \ --max_length 2048 \ --max_new_tokens 2048 \ --temperature 0.1 \ + --top_p 0.7 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/openbuddy_llama2_70b_chat/qlora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/openbuddy_llama2_70b_chat/qlora_ddp_ds/infer.sh index 21d7935052..8999fc745a 100644 --- a/examples/pytorch/llm/scripts/openbuddy_llama2_70b_chat/qlora_ddp_ds/infer.sh +++ b/examples/pytorch/llm/scripts/openbuddy_llama2_70b_chat/qlora_ddp_ds/infer.sh @@ -8,6 +8,7 @@ python llm_infer.py \ --max_length 2048 \ --max_new_tokens 2048 \ --temperature 0.1 \ + --top_p 0.7 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/openbuddy_llama2_70b_chat/qlora_mp/infer.sh b/examples/pytorch/llm/scripts/openbuddy_llama2_70b_chat/qlora_mp/infer.sh index 14afadc0c8..44444d3eb5 100644 --- a/examples/pytorch/llm/scripts/openbuddy_llama2_70b_chat/qlora_mp/infer.sh +++ b/examples/pytorch/llm/scripts/openbuddy_llama2_70b_chat/qlora_mp/infer.sh @@ -8,6 +8,7 @@ python llm_infer.py \ --max_length 2048 \ --max_new_tokens 2048 \ --temperature 0.1 \ + --top_p 0.7 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/openbuddy_mistral_7b_chat/lora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/openbuddy_mistral_7b_chat/lora_ddp_ds/infer.sh index 2698b59160..30869c1bc2 100644 --- a/examples/pytorch/llm/scripts/openbuddy_mistral_7b_chat/lora_ddp_ds/infer.sh +++ b/examples/pytorch/llm/scripts/openbuddy_mistral_7b_chat/lora_ddp_ds/infer.sh @@ -9,6 +9,7 @@ python llm_infer.py \ --max_length 2048 \ --max_new_tokens 2048 \ --temperature 0.1 \ + --top_p 0.7 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/openbuddy_mistral_7b_chat/lora_mp_ddp/infer.sh b/examples/pytorch/llm/scripts/openbuddy_mistral_7b_chat/lora_mp_ddp/infer.sh index 620775a01e..700ebd75a6 100644 --- a/examples/pytorch/llm/scripts/openbuddy_mistral_7b_chat/lora_mp_ddp/infer.sh +++ b/examples/pytorch/llm/scripts/openbuddy_mistral_7b_chat/lora_mp_ddp/infer.sh @@ -9,6 +9,7 @@ python llm_infer.py \ --max_length 4096 \ --max_new_tokens 2048 \ --temperature 0.1 \ + --top_p 0.7 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/polylm_13b/qlora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/polylm_13b/qlora_ddp_ds/infer.sh index e4f571db80..c0193382f3 100644 --- a/examples/pytorch/llm/scripts/polylm_13b/qlora_ddp_ds/infer.sh +++ b/examples/pytorch/llm/scripts/polylm_13b/qlora_ddp_ds/infer.sh @@ -8,6 +8,7 @@ python llm_infer.py \ --max_length 2048 \ --max_new_tokens 2048 \ --temperature 0.7 \ + --top_p 0.7 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/qwen_14b/lora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/qwen_14b/lora_ddp_ds/infer.sh index 1c9aaa8b9f..99b61de7e3 100644 --- a/examples/pytorch/llm/scripts/qwen_14b/lora_ddp_ds/infer.sh +++ b/examples/pytorch/llm/scripts/qwen_14b/lora_ddp_ds/infer.sh @@ -10,6 +10,7 @@ python llm_infer.py \ --use_flash_attn true \ --max_new_tokens 2048 \ --temperature 0.7 \ + --top_p 0.7 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/qwen_14b/qlora/infer.sh b/examples/pytorch/llm/scripts/qwen_14b/qlora/infer.sh index a5eaafa8eb..dbb067ea48 100644 --- a/examples/pytorch/llm/scripts/qwen_14b/qlora/infer.sh +++ b/examples/pytorch/llm/scripts/qwen_14b/qlora/infer.sh @@ -9,6 +9,7 @@ python llm_infer.py \ --use_flash_attn false \ --max_new_tokens 2048 \ --temperature 0.7 \ + --top_p 0.7 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/qwen_14b/qlora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/qwen_14b/qlora_ddp_ds/infer.sh index a5eaafa8eb..dbb067ea48 100644 --- a/examples/pytorch/llm/scripts/qwen_14b/qlora_ddp_ds/infer.sh +++ b/examples/pytorch/llm/scripts/qwen_14b/qlora_ddp_ds/infer.sh @@ -9,6 +9,7 @@ python llm_infer.py \ --use_flash_attn false \ --max_new_tokens 2048 \ --temperature 0.7 \ + --top_p 0.7 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/qwen_14b_chat/lora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/qwen_14b_chat/lora_ddp_ds/infer.sh index 0a5cb80521..b6d05041ed 100644 --- a/examples/pytorch/llm/scripts/qwen_14b_chat/lora_ddp_ds/infer.sh +++ b/examples/pytorch/llm/scripts/qwen_14b_chat/lora_ddp_ds/infer.sh @@ -10,6 +10,7 @@ python llm_infer.py \ --use_flash_attn true \ --max_new_tokens 2048 \ --temperature 0.1 \ + --top_p 0.7 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/qwen_14b_chat/qlora/infer.sh b/examples/pytorch/llm/scripts/qwen_14b_chat/qlora/infer.sh index f19c14dc24..e11a80ff11 100644 --- a/examples/pytorch/llm/scripts/qwen_14b_chat/qlora/infer.sh +++ b/examples/pytorch/llm/scripts/qwen_14b_chat/qlora/infer.sh @@ -9,6 +9,7 @@ python llm_infer.py \ --use_flash_attn false \ --max_new_tokens 2048 \ --temperature 0.1 \ + --top_p 0.7 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/qwen_14b_chat/qlora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/qwen_14b_chat/qlora_ddp_ds/infer.sh index 44e1278cfc..d76cf78e17 100644 --- a/examples/pytorch/llm/scripts/qwen_14b_chat/qlora_ddp_ds/infer.sh +++ b/examples/pytorch/llm/scripts/qwen_14b_chat/qlora_ddp_ds/infer.sh @@ -9,6 +9,7 @@ python llm_infer.py \ --use_flash_attn false \ --max_new_tokens 2048 \ --temperature 0.1 \ + --top_p 0.7 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/qwen_14b_chat_int4/qlora/infer.sh b/examples/pytorch/llm/scripts/qwen_14b_chat_int4/qlora/infer.sh index f60f9e18c8..628f9697f8 100644 --- a/examples/pytorch/llm/scripts/qwen_14b_chat_int4/qlora/infer.sh +++ b/examples/pytorch/llm/scripts/qwen_14b_chat_int4/qlora/infer.sh @@ -9,6 +9,7 @@ python llm_infer.py \ --use_flash_attn false \ --max_new_tokens 2048 \ --temperature 0.1 \ + --top_p 0.7 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/qwen_14b_chat_int4/qlora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/qwen_14b_chat_int4/qlora_ddp_ds/infer.sh index f60f9e18c8..628f9697f8 100644 --- a/examples/pytorch/llm/scripts/qwen_14b_chat_int4/qlora_ddp_ds/infer.sh +++ b/examples/pytorch/llm/scripts/qwen_14b_chat_int4/qlora_ddp_ds/infer.sh @@ -9,6 +9,7 @@ python llm_infer.py \ --use_flash_attn false \ --max_new_tokens 2048 \ --temperature 0.1 \ + --top_p 0.7 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/qwen_14b_chat_int8/qlora/infer.sh b/examples/pytorch/llm/scripts/qwen_14b_chat_int8/qlora/infer.sh index ac1a7a90cb..0d43d832ba 100644 --- a/examples/pytorch/llm/scripts/qwen_14b_chat_int8/qlora/infer.sh +++ b/examples/pytorch/llm/scripts/qwen_14b_chat_int8/qlora/infer.sh @@ -9,6 +9,7 @@ python llm_infer.py \ --use_flash_attn false \ --max_new_tokens 2048 \ --temperature 0.1 \ + --top_p 0.7 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/qwen_14b_chat_int8/qlora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/qwen_14b_chat_int8/qlora_ddp_ds/infer.sh index 73e64fd235..4ec4fefc9d 100644 --- a/examples/pytorch/llm/scripts/qwen_14b_chat_int8/qlora_ddp_ds/infer.sh +++ b/examples/pytorch/llm/scripts/qwen_14b_chat_int8/qlora_ddp_ds/infer.sh @@ -9,6 +9,7 @@ python llm_infer.py \ --use_flash_attn false \ --max_new_tokens 2048 \ --temperature 0.7 \ + --top_p 0.7 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/qwen_7b/lora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/qwen_7b/lora_ddp_ds/infer.sh index 6ec7b9ff0f..d0b537dc0a 100644 --- a/examples/pytorch/llm/scripts/qwen_7b/lora_ddp_ds/infer.sh +++ b/examples/pytorch/llm/scripts/qwen_7b/lora_ddp_ds/infer.sh @@ -10,6 +10,7 @@ python llm_infer.py \ --use_flash_attn false \ --max_new_tokens 2048 \ --temperature 0.7 \ + --top_p 0.7 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/qwen_7b/qlora_ddp/infer.sh b/examples/pytorch/llm/scripts/qwen_7b/qlora_ddp/infer.sh index c2813f6c4d..3365a602e5 100644 --- a/examples/pytorch/llm/scripts/qwen_7b/qlora_ddp/infer.sh +++ b/examples/pytorch/llm/scripts/qwen_7b/qlora_ddp/infer.sh @@ -9,6 +9,7 @@ python llm_infer.py \ --use_flash_attn false \ --max_new_tokens 2048 \ --temperature 0.7 \ + --top_p 0.7 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/qwen_7b_chat/full_mp/infer.sh b/examples/pytorch/llm/scripts/qwen_7b_chat/full_mp/infer.sh index 60fe98d23f..5c82fcb62a 100644 --- a/examples/pytorch/llm/scripts/qwen_7b_chat/full_mp/infer.sh +++ b/examples/pytorch/llm/scripts/qwen_7b_chat/full_mp/infer.sh @@ -9,5 +9,6 @@ python llm_infer.py \ --use_flash_attn true \ --max_new_tokens 2048 \ --temperature 0.1 \ + --top_p 0.7 \ --repetition_penalty 1.05 \ --do_sample true \ diff --git a/examples/pytorch/llm/scripts/qwen_7b_chat/full_mp_ddp/infer.sh b/examples/pytorch/llm/scripts/qwen_7b_chat/full_mp_ddp/infer.sh index 0c3b83b2df..f4ab5eeaa4 100644 --- a/examples/pytorch/llm/scripts/qwen_7b_chat/full_mp_ddp/infer.sh +++ b/examples/pytorch/llm/scripts/qwen_7b_chat/full_mp_ddp/infer.sh @@ -9,5 +9,6 @@ python llm_infer.py \ --use_flash_attn true \ --max_new_tokens 2048 \ --temperature 0.7 \ + --top_p 0.7 \ --repetition_penalty 1.05 \ --do_sample true \ diff --git a/examples/pytorch/llm/scripts/qwen_7b_chat/lora/infer.sh b/examples/pytorch/llm/scripts/qwen_7b_chat/lora/infer.sh index 480a89db35..08a5fe0802 100644 --- a/examples/pytorch/llm/scripts/qwen_7b_chat/lora/infer.sh +++ b/examples/pytorch/llm/scripts/qwen_7b_chat/lora/infer.sh @@ -10,6 +10,7 @@ python llm_infer.py \ --use_flash_attn false \ --max_new_tokens 2048 \ --temperature 0.1 \ + --top_p 0.7 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/qwen_7b_chat/lora_ddp/infer.sh b/examples/pytorch/llm/scripts/qwen_7b_chat/lora_ddp/infer.sh index 65bf889d90..1b5d3f240c 100644 --- a/examples/pytorch/llm/scripts/qwen_7b_chat/lora_ddp/infer.sh +++ b/examples/pytorch/llm/scripts/qwen_7b_chat/lora_ddp/infer.sh @@ -10,6 +10,7 @@ python llm_infer.py \ --use_flash_attn false \ --max_new_tokens 2048 \ --temperature 0.1 \ + --top_p 0.7 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/qwen_7b_chat/lora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/qwen_7b_chat/lora_ddp_ds/infer.sh index 65bf889d90..1b5d3f240c 100644 --- a/examples/pytorch/llm/scripts/qwen_7b_chat/lora_ddp_ds/infer.sh +++ b/examples/pytorch/llm/scripts/qwen_7b_chat/lora_ddp_ds/infer.sh @@ -10,6 +10,7 @@ python llm_infer.py \ --use_flash_attn false \ --max_new_tokens 2048 \ --temperature 0.1 \ + --top_p 0.7 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/qwen_7b_chat/lora_mp_ddp/infer.sh b/examples/pytorch/llm/scripts/qwen_7b_chat/lora_mp_ddp/infer.sh index 398e4f4ded..d1789609cd 100644 --- a/examples/pytorch/llm/scripts/qwen_7b_chat/lora_mp_ddp/infer.sh +++ b/examples/pytorch/llm/scripts/qwen_7b_chat/lora_mp_ddp/infer.sh @@ -10,6 +10,7 @@ python llm_infer.py \ --use_flash_attn false \ --max_new_tokens 2048 \ --temperature 0.1 \ + --top_p 0.7 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/qwen_7b_chat/qlora/infer.sh b/examples/pytorch/llm/scripts/qwen_7b_chat/qlora/infer.sh index 82fd16d225..a3aeb3351b 100644 --- a/examples/pytorch/llm/scripts/qwen_7b_chat/qlora/infer.sh +++ b/examples/pytorch/llm/scripts/qwen_7b_chat/qlora/infer.sh @@ -9,6 +9,7 @@ python llm_infer.py \ --use_flash_attn false \ --max_new_tokens 2048 \ --temperature 0.1 \ + --top_p 0.7 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/qwen_7b_chat/qlora_ddp/infer.sh b/examples/pytorch/llm/scripts/qwen_7b_chat/qlora_ddp/infer.sh index 2959370e59..5597835d05 100644 --- a/examples/pytorch/llm/scripts/qwen_7b_chat/qlora_ddp/infer.sh +++ b/examples/pytorch/llm/scripts/qwen_7b_chat/qlora_ddp/infer.sh @@ -9,6 +9,7 @@ python llm_infer.py \ --use_flash_attn false \ --max_new_tokens 2048 \ --temperature 0.1 \ + --top_p 0.7 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/qwen_7b_chat/qlora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/qwen_7b_chat/qlora_ddp_ds/infer.sh index 2959370e59..5597835d05 100644 --- a/examples/pytorch/llm/scripts/qwen_7b_chat/qlora_ddp_ds/infer.sh +++ b/examples/pytorch/llm/scripts/qwen_7b_chat/qlora_ddp_ds/infer.sh @@ -9,6 +9,7 @@ python llm_infer.py \ --use_flash_attn false \ --max_new_tokens 2048 \ --temperature 0.1 \ + --top_p 0.7 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/qwen_7b_chat_int4/qlora/infer.sh b/examples/pytorch/llm/scripts/qwen_7b_chat_int4/qlora/infer.sh index 510ba1be0e..d4cbfd093f 100644 --- a/examples/pytorch/llm/scripts/qwen_7b_chat_int4/qlora/infer.sh +++ b/examples/pytorch/llm/scripts/qwen_7b_chat_int4/qlora/infer.sh @@ -9,6 +9,7 @@ python llm_infer.py \ --use_flash_attn false \ --max_new_tokens 2048 \ --temperature 0.1 \ + --top_p 0.7 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/qwen_7b_chat_int4/qlora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/qwen_7b_chat_int4/qlora_ddp_ds/infer.sh index 510ba1be0e..d4cbfd093f 100644 --- a/examples/pytorch/llm/scripts/qwen_7b_chat_int4/qlora_ddp_ds/infer.sh +++ b/examples/pytorch/llm/scripts/qwen_7b_chat_int4/qlora_ddp_ds/infer.sh @@ -9,6 +9,7 @@ python llm_infer.py \ --use_flash_attn false \ --max_new_tokens 2048 \ --temperature 0.1 \ + --top_p 0.7 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/qwen_7b_chat_int8/qlora/infer.sh b/examples/pytorch/llm/scripts/qwen_7b_chat_int8/qlora/infer.sh index c58840c899..b8cbdd5942 100644 --- a/examples/pytorch/llm/scripts/qwen_7b_chat_int8/qlora/infer.sh +++ b/examples/pytorch/llm/scripts/qwen_7b_chat_int8/qlora/infer.sh @@ -9,6 +9,7 @@ python llm_infer.py \ --use_flash_attn false \ --max_new_tokens 2048 \ --temperature 0.1 \ + --top_p 0.7 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/qwen_7b_chat_int8/qlora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/qwen_7b_chat_int8/qlora_ddp_ds/infer.sh index c58840c899..b8cbdd5942 100644 --- a/examples/pytorch/llm/scripts/qwen_7b_chat_int8/qlora_ddp_ds/infer.sh +++ b/examples/pytorch/llm/scripts/qwen_7b_chat_int8/qlora_ddp_ds/infer.sh @@ -9,6 +9,7 @@ python llm_infer.py \ --use_flash_attn false \ --max_new_tokens 2048 \ --temperature 0.1 \ + --top_p 0.7 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/qwen_vl/lora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/qwen_vl/lora_ddp_ds/infer.sh index b3ca5a9436..69e4805fcf 100644 --- a/examples/pytorch/llm/scripts/qwen_vl/lora_ddp_ds/infer.sh +++ b/examples/pytorch/llm/scripts/qwen_vl/lora_ddp_ds/infer.sh @@ -9,6 +9,7 @@ python llm_infer.py \ --use_flash_attn false \ --max_new_tokens 2048 \ --temperature 0.7 \ + --top_p 0.7 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/qwen_vl_chat/lora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/qwen_vl_chat/lora_ddp_ds/infer.sh index a1df4916ea..0ceb964ab4 100644 --- a/examples/pytorch/llm/scripts/qwen_vl_chat/lora_ddp_ds/infer.sh +++ b/examples/pytorch/llm/scripts/qwen_vl_chat/lora_ddp_ds/infer.sh @@ -10,6 +10,7 @@ python llm_infer.py \ --use_flash_attn false \ --max_new_tokens 2048 \ --temperature 0.7 \ + --top_p 0.7 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/qwen_vl_chat/qlora/infer.sh b/examples/pytorch/llm/scripts/qwen_vl_chat/qlora/infer.sh index 0bf729f351..59e0a2ef40 100644 --- a/examples/pytorch/llm/scripts/qwen_vl_chat/qlora/infer.sh +++ b/examples/pytorch/llm/scripts/qwen_vl_chat/qlora/infer.sh @@ -9,6 +9,7 @@ python llm_infer.py \ --use_flash_attn false \ --max_new_tokens 2048 \ --temperature 0.7 \ + --top_p 0.7 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/qwen_vl_chat_int4/qlora/infer.sh b/examples/pytorch/llm/scripts/qwen_vl_chat_int4/qlora/infer.sh index 915897101d..bbf0347226 100644 --- a/examples/pytorch/llm/scripts/qwen_vl_chat_int4/qlora/infer.sh +++ b/examples/pytorch/llm/scripts/qwen_vl_chat_int4/qlora/infer.sh @@ -9,6 +9,7 @@ python llm_infer.py \ --use_flash_attn false \ --max_new_tokens 2048 \ --temperature 0.7 \ + --top_p 0.7 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/qwen_vl_chat_int4/qlora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/qwen_vl_chat_int4/qlora_ddp_ds/infer.sh index 915897101d..bbf0347226 100644 --- a/examples/pytorch/llm/scripts/qwen_vl_chat_int4/qlora_ddp_ds/infer.sh +++ b/examples/pytorch/llm/scripts/qwen_vl_chat_int4/qlora_ddp_ds/infer.sh @@ -9,6 +9,7 @@ python llm_infer.py \ --use_flash_attn false \ --max_new_tokens 2048 \ --temperature 0.7 \ + --top_p 0.7 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/seqgpt_560m/full/infer.sh b/examples/pytorch/llm/scripts/seqgpt_560m/full/infer.sh index 89a0c40e42..e370a3f513 100644 --- a/examples/pytorch/llm/scripts/seqgpt_560m/full/infer.sh +++ b/examples/pytorch/llm/scripts/seqgpt_560m/full/infer.sh @@ -8,5 +8,6 @@ python llm_infer.py \ --max_length 1024 \ --max_new_tokens 2048 \ --temperature 0.3 \ + --top_p 0.7 \ --repetition_penalty 1.05 \ --do_sample true \ diff --git a/examples/pytorch/llm/scripts/seqgpt_560m/full_ddp/infer.sh b/examples/pytorch/llm/scripts/seqgpt_560m/full_ddp/infer.sh index 89a0c40e42..e370a3f513 100644 --- a/examples/pytorch/llm/scripts/seqgpt_560m/full_ddp/infer.sh +++ b/examples/pytorch/llm/scripts/seqgpt_560m/full_ddp/infer.sh @@ -8,5 +8,6 @@ python llm_infer.py \ --max_length 1024 \ --max_new_tokens 2048 \ --temperature 0.3 \ + --top_p 0.7 \ --repetition_penalty 1.05 \ --do_sample true \ diff --git a/examples/pytorch/llm/scripts/skywork_13b/qlora/infer.sh b/examples/pytorch/llm/scripts/skywork_13b/qlora/infer.sh index 6caa3c6788..411304a68e 100644 --- a/examples/pytorch/llm/scripts/skywork_13b/qlora/infer.sh +++ b/examples/pytorch/llm/scripts/skywork_13b/qlora/infer.sh @@ -8,6 +8,7 @@ python llm_infer.py \ --max_length 2048 \ --max_new_tokens 2048 \ --temperature 0.7 \ + --top_p 0.7 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/tongyi_finance_14b_chat_int4/qlora/infer.sh b/examples/pytorch/llm/scripts/tongyi_finance_14b_chat_int4/qlora/infer.sh index 091bf4d63e..e94e5165a9 100644 --- a/examples/pytorch/llm/scripts/tongyi_finance_14b_chat_int4/qlora/infer.sh +++ b/examples/pytorch/llm/scripts/tongyi_finance_14b_chat_int4/qlora/infer.sh @@ -9,6 +9,7 @@ python llm_infer.py \ --use_flash_attn false \ --max_new_tokens 2048 \ --temperature 0.3 \ + --top_p 0.7 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/xverse_13b/qlora/infer.sh b/examples/pytorch/llm/scripts/xverse_13b/qlora/infer.sh index 90abe69d58..264354ad90 100644 --- a/examples/pytorch/llm/scripts/xverse_13b/qlora/infer.sh +++ b/examples/pytorch/llm/scripts/xverse_13b/qlora/infer.sh @@ -8,6 +8,7 @@ python llm_infer.py \ --max_length 2048 \ --max_new_tokens 2048 \ --temperature 0.7 \ + --top_p 0.7 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/xverse_65b/qlora_mp/infer.sh b/examples/pytorch/llm/scripts/xverse_65b/qlora_mp/infer.sh index 3b4d9f5c02..534abb20d5 100644 --- a/examples/pytorch/llm/scripts/xverse_65b/qlora_mp/infer.sh +++ b/examples/pytorch/llm/scripts/xverse_65b/qlora_mp/infer.sh @@ -8,6 +8,7 @@ python llm_infer.py \ --max_length 2048 \ --max_new_tokens 2048 \ --temperature 0.7 \ + --top_p 0.7 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/yi_34b/lora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/yi_34b/lora_ddp_ds/infer.sh index 9bb3c1762b..362667e29e 100644 --- a/examples/pytorch/llm/scripts/yi_34b/lora_ddp_ds/infer.sh +++ b/examples/pytorch/llm/scripts/yi_34b/lora_ddp_ds/infer.sh @@ -8,6 +8,7 @@ python llm_infer.py \ --max_length 2048 \ --max_new_tokens 2048 \ --temperature 0.7 \ + --top_p 0.7 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/yi_6b/lora/infer.sh b/examples/pytorch/llm/scripts/yi_6b/lora/infer.sh index 25c177f5ab..59279380fa 100644 --- a/examples/pytorch/llm/scripts/yi_6b/lora/infer.sh +++ b/examples/pytorch/llm/scripts/yi_6b/lora/infer.sh @@ -8,6 +8,7 @@ python llm_infer.py \ --max_length 2048 \ --max_new_tokens 2048 \ --temperature 0.7 \ + --top_p 0.7 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/ziya2_13b_chat/qlora/infer.sh b/examples/pytorch/llm/scripts/ziya2_13b_chat/qlora/infer.sh index 236d6840de..2e51f2d5ee 100644 --- a/examples/pytorch/llm/scripts/ziya2_13b_chat/qlora/infer.sh +++ b/examples/pytorch/llm/scripts/ziya2_13b_chat/qlora/infer.sh @@ -8,6 +8,7 @@ python llm_infer.py \ --max_length 2048 \ --max_new_tokens 2048 \ --temperature 0.7 \ + --top_p 0.7 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ diff --git a/examples/pytorch/llm/scripts/ziya2_13b_chat/qlora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/ziya2_13b_chat/qlora_ddp_ds/infer.sh index 236d6840de..2e51f2d5ee 100644 --- a/examples/pytorch/llm/scripts/ziya2_13b_chat/qlora_ddp_ds/infer.sh +++ b/examples/pytorch/llm/scripts/ziya2_13b_chat/qlora_ddp_ds/infer.sh @@ -8,6 +8,7 @@ python llm_infer.py \ --max_length 2048 \ --max_new_tokens 2048 \ --temperature 0.7 \ + --top_p 0.7 \ --repetition_penalty 1.05 \ --do_sample true \ --merge_lora_and_save false \ From 8fac91bded31f9363ac0a78fb8e74bf21c7d19d1 Mon Sep 17 00:00:00 2001 From: Jintao Huang Date: Tue, 21 Nov 2023 15:38:18 +0800 Subject: [PATCH 5/7] update readme --- README_CN.md | 2 +- examples/pytorch/llm/README.md | 8 ++++---- examples/pytorch/llm/README_CN.md | 10 +++++----- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/README_CN.md b/README_CN.md index aa2d69c7a5..8bce5b3c60 100644 --- a/README_CN.md +++ b/README_CN.md @@ -189,7 +189,7 @@ CUDA_VISIBLE_DEVICES=0 swift infer --model_id_or_path qwen/Qwen-7B-Chat --datase CUDA_VISIBLE_DEVICES=0 swift infer --ckpt_dir 'xxx/vx_xxx/checkpoint-xxx' ``` -**Web-UI** +**Web-UI**: ```bash # 原始模型 CUDA_VISIBLE_DEVICES=0 swift web-ui --model_id_or_path qwen/Qwen-7B-Chat diff --git a/examples/pytorch/llm/README.md b/examples/pytorch/llm/README.md index 7b4c50465e..a951d9a314 100644 --- a/examples/pytorch/llm/README.md +++ b/examples/pytorch/llm/README.md @@ -591,9 +591,9 @@ The template initialization function retrieves the complete chat template based -- `check_model_is_latest`: Check if the model is the latest, default is `True`. If you need to train without internet connection, please set this parameter to `False`. - `--max_new_tokens`: The maximum number of new tokens to generate. The default value is `2048`. This parameter only takes effect when `predict_with_generate` is set to True. - `--do_sample`: Whether to use sampling during generation. The default value is `True`. This parameter only takes effect when `predict_with_generate` is set to True. -- `--temperature`: The temperature value for sampling during generation. The default value is `0.9`. This parameter only takes effect when `predict_with_generate` is set to True. +- `--temperature`: The temperature value for sampling during generation. The default value is `0.3`. This parameter only takes effect when `predict_with_generate` is set to True. - `--top_k`: The value of k for top-k sampling during generation. The default value is `20`. This parameter only takes effect when `predict_with_generate` is set to True. -- `--top_p`: The cumulative probability threshold for top-p sampling during generation. The default value is `0.9`. This parameter only takes effect when `predict_with_generate` is set to True. +- `--top_p`: The cumulative probability threshold for top-p sampling during generation. The default value is `0.7`. This parameter only takes effect when `predict_with_generate` is set to True. - `--repetition_penalty`: The repetition penalty applied during generation. The default value is `1.05`. This parameter only takes effect when `predict_with_generate` is set to True. @@ -623,9 +623,9 @@ The template initialization function retrieves the complete chat template based - `--bnb_4bit_use_double_quant`: Default value is `True`. For specific parameter details, please refer to the `sft.sh Command Line Arguments`. This parameter is not effective if `quantization_bit` is set to 0. - `--max_new_tokens`: Maximum number of new tokens to generate. Default value is `2048`. - `--do_sample`: Whether to use greedy decoding or sampling for generation. Default value is `True`. -- `--temperature`: Default value is `0.9`. This parameter only takes effect when `do_sample` is set to True. +- `--temperature`: Default value is `0.3`. This parameter only takes effect when `do_sample` is set to True. - `--top_k`: Default value is `20`. This parameter only takes effect when `do_sample` is set to True. -- `--top_p`: Default value is `0.9`. This parameter only takes effect when `do_sample` is set to True. +- `--top_p`: Default value is `0.7`. This parameter only takes effect when `do_sample` is set to True. - `--repetition_penalty`: Default value is `1.05`. - `--use_flash_attn`: Default value is `None`, which means 'auto'. For specific parameter details, please refer to the `sft.sh Command Line Arguments`. The models that support 'flash_attn' include: qwen series, qwen-vl series, llama series, openbuddy series, mistral series, yi series, ziya series. - `--ignore_args_error`: Default value is `False`. For specific parameter details, please refer to the `sft.sh Command Line Arguments`. diff --git a/examples/pytorch/llm/README_CN.md b/examples/pytorch/llm/README_CN.md index 5a278804ed..7846c3e5c6 100644 --- a/examples/pytorch/llm/README_CN.md +++ b/examples/pytorch/llm/README_CN.md @@ -154,7 +154,7 @@ CUDA_VISIBLE_DEVICES=0 swift infer --model_id_or_path qwen/Qwen-7B-Chat --datase CUDA_VISIBLE_DEVICES=0 swift infer --ckpt_dir 'xxx/vx_xxx/checkpoint-xxx' ``` -**Web-UI** +**Web-UI**: ```bash # 原始模型 CUDA_VISIBLE_DEVICES=0 swift web-ui --model_id_or_path qwen/Qwen-7B-Chat @@ -594,9 +594,9 @@ if __name__ == '__main__': - `--check_model_is_latest`: 检查模型是否是最新, 默认为`True`. 如果你需要断网进行训练, 请将该参数设置为`False`. - `--max_new_tokens`: 默认为`2048`. 该参数只有在`predict_with_generate`设置为True的时候才生效. - `--do_sample`: 默认为`True`. 该参数只有在`predict_with_generate`设置为True的时候才生效. -- `--temperature`: 默认为`0.9`. 该参数只有在`predict_with_generate`设置为True的时候才生效. +- `--temperature`: 默认为`0.3`. 该参数只有在`predict_with_generate`设置为True的时候才生效. - `--top_k`: 默认为`20`. 该参数只有在`predict_with_generate`设置为True的时候才生效. -- `--top_p`: 默认为`0.9`. 该参数只有在`predict_with_generate`设置为True的时候才生效. +- `--top_p`: 默认为`0.7`. 该参数只有在`predict_with_generate`设置为True的时候才生效. - `--repetition_penalty`: 默认为`1.05`. 该参数只有在`predict_with_generate`设置为True的时候才生效. @@ -626,9 +626,9 @@ if __name__ == '__main__': - `--bnb_4bit_use_double_quant`: 默认值为`True`. 具体的参数介绍可以在`sft.sh命令行参数`中查看. 若`quantization_bit`设置为0, 则该参数失效. - `--max_new_tokens`: 生成新token的最大数量, 默认值为`2048`. - `--do_sample`: 是使用贪婪生成的方式还是采样生成的方式, 默认值为`True`. -- `--temperature`: 默认值为`0.9`. 该参数只有在`do_sample`设置为True时才生效. +- `--temperature`: 默认值为`0.3`. 该参数只有在`do_sample`设置为True时才生效. - `--top_k`: 默认值为`20`. 该参数只有在`do_sample`设置为True时才生效. -- `--top_p`: 默认值为`0.9`. 该参数只有在`do_sample`设置为True时才生效. +- `--top_p`: 默认值为`0.7`. 该参数只有在`do_sample`设置为True时才生效. - `--repetition_penalty`: 默认值为`1.05`. - `--use_flash_attn`: 默认值为`None`, 即为'auto'. 具体的参数介绍可以在`sft.sh命令行参数`中查看. - `--ignore_args_error`: 默认值为`False`, 具体的参数介绍可以在`sft.sh命令行参数`中查看. From c6a6785d1e54d1b708af36a341c7392e39afc22b Mon Sep 17 00:00:00 2001 From: Jintao Huang Date: Tue, 21 Nov 2023 15:39:37 +0800 Subject: [PATCH 6/7] update infer.py --- swift/llm/infer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/swift/llm/infer.py b/swift/llm/infer.py index 85725b562d..198e4bf36e 100644 --- a/swift/llm/infer.py +++ b/swift/llm/infer.py @@ -141,8 +141,8 @@ def llm_infer(args: InferArguments) -> None: while True: query = input('<<< ') _, history = inference(model, template, query, stream=args.stream) + item = history[0] if jsonl_path is not None: - item = history[0] save_result_to_jsonl(jsonl_path, item[0], item[1]) result.append({ 'query': item[0], From 3f676333804dbd4c54d1956c3f92874d59236ffd Mon Sep 17 00:00:00 2001 From: Jintao Huang Date: Tue, 21 Nov 2023 15:45:20 +0800 Subject: [PATCH 7/7] update app.py --- examples/pytorch/llm/app.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/pytorch/llm/app.py b/examples/pytorch/llm/app.py index bdbc6da10c..5ba72e0e1f 100644 --- a/examples/pytorch/llm/app.py +++ b/examples/pytorch/llm/app.py @@ -12,5 +12,5 @@ # or chat args = InferArguments(model_type=ModelType.qwen_7b_chat_int4) # or load from ckpt dir - # args = InferArguments(ckpt_dir='xxx/vx_xxx/checkpoint-xxx', load_args_from_ckpt_dir=True) + # args = InferArguments(ckpt_dir='xxx/vx_xxx/checkpoint-xxx') web_ui_main(args)