diff --git a/README.md b/README.md
index bbb7123bce..6c8eeb0fe4 100644
--- a/README.md
+++ b/README.md
@@ -62,6 +62,7 @@ Users can check the [documentation of SWIFT](docs/source/GetStarted/快速使用
 
 
 ## 🎉 News
+- 🔥2023.1.17: Support **internlm2** series: internlm2-7b-base, internlm2-7b, [internlm2-7b-sft-chat](https://github.com/modelscope/swift/tree/main/examples/pytorch/llm/scripts/internlm2_7b_sft_chat), internlm2-7b-chat, internlm2-20b-base, internlm2-20b, internlm2-20b-sft-chat, internlm2-20b-chat.
 - 2023.1.15: Support yuan series: yuan2-2b-instruct, [yuan2-2b-janus-instruct](https://github.com/modelscope/swift/tree/main/examples/pytorch/llm/scripts/yuan2_2b_janus_instruct), yuan2-51b-instruct, yuan2-102b-instruct.
 - 🔥2023.1.12: Support **deepseek-moe** series: deepseek-moe-16b, [deepseek-moe-16b-chat](https://github.com/modelscope/swift/tree/main/examples/pytorch/llm/scripts/deepseek_moe_16b_chat).
 - 🔥2023.1.4: Support for **VLLM deployment**, compatible with the **OpenAI API** style. For more details, please refer to [VLLM Inference Acceleration and Deployment](https://github.com/modelscope/swift/blob/main/docs/source/LLM/VLLM推理加速与部署.md#部署)
@@ -136,11 +137,11 @@ Users can check the [documentation of SWIFT](docs/source/GetStarted/快速使用
     - chatglm series: [chatglm2-6b](https://modelscope.cn/models/ZhipuAI/chatglm2-6b/summary), [chatglm2-6b-32k](https://modelscope.cn/models/ZhipuAI/chatglm2-6b-32k/summary), [chatglm3-6b-base](https://modelscope.cn/models/ZhipuAI/chatglm3-6b-base/summary), [chatglm3-6b](https://modelscope.cn/models/ZhipuAI/chatglm3-6b/summary), [chatglm3-6b-32k](https://modelscope.cn/models/ZhipuAI/chatglm3-6b-32k/summary)
     - llama series: [llama2-7b](https://modelscope.cn/models/modelscope/Llama-2-7b-ms/summary), [llama2-7b-chat](https://modelscope.cn/models/modelscope/Llama-2-7b-chat-ms/summary), [llama2-13b](https://modelscope.cn/models/modelscope/Llama-2-13b-ms/summary), [llama2-13b-chat](https://modelscope.cn/models/modelscope/Llama-2-13b-chat-ms/summary), [llama2-70b](https://modelscope.cn/models/modelscope/Llama-2-70b-ms/summary), [llama2-70b-chat](https://modelscope.cn/models/modelscope/Llama-2-70b-chat-ms/summary)
     - yi series: [yi-6b](https://modelscope.cn/models/01ai/Yi-6B/summary), [yi-6b-200k](https://modelscope.cn/models/01ai/Yi-6B-200K/summary), [yi-6b-chat](https://modelscope.cn/models/01ai/Yi-6B-Chat/summary), [yi-34b](https://modelscope.cn/models/01ai/Yi-34B/summary), [yi-34b-200k](https://modelscope.cn/models/01ai/Yi-34B-200K/summary), [yi-34b-chat](https://modelscope.cn/models/01ai/Yi-34B-Chat/summary)
+    - internlm series: [internlm-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm-7b/summary), [internlm-7b-chat](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm-chat-7b-v1_1/summary), [internlm-7b-chat-8k](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm-chat-7b-8k/summary), [internlm-20b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm-20b/summary), [internlm-20b-chat](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm-chat-20b/summary), [internlm2-7b-base](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-base-7b/summary), [internlm2-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-7b/summary), [internlm2-7b-sft-chat](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-7b-sft/summary), [internlm2-7b-chat](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-7b/summary), [internlm2-20b-base](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-base-20b/summary), [internlm2-20b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-20b/summary), [internlm2-20b-sft-chat](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-20b-sft/summary), [internlm2-20b-chat](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-20b/summary)
     - deepseek series: [deepseek-7b](https://modelscope.cn/models/deepseek-ai/deepseek-llm-7b-base/summary), [deepseek-7b-chat](https://modelscope.cn/models/deepseek-ai/deepseek-llm-7b-chat/summary), [deepseek-67b](https://modelscope.cn/models/deepseek-ai/deepseek-llm-67b-base/summary), [deepseek-67b-chat](https://modelscope.cn/models/deepseek-ai/deepseek-llm-67b-chat/summary), [deepseek-moe-16b](https://modelscope.cn/models/deepseek-ai/deepseek-moe-16b-base/summary), [deepseek-moe-16b-chat|[deepseek-ai/deepseek-moe-16b-chat](https://modelscope.cn/models/deepseek-ai/deepseek-moe-16b-chat/summary)
     - openbuddy series: [openbuddy-llama2-13b-chat](https://modelscope.cn/models/OpenBuddy/openbuddy-llama2-13b-v8.1-fp16/summary), [openbuddy-llama-65b-chat](https://modelscope.cn/models/OpenBuddy/openbuddy-llama-65b-v8-bf16/summary), [openbuddy-llama2-70b-chat](https://modelscope.cn/models/OpenBuddy/openbuddy-llama2-70b-v10.1-bf16/summary), [openbuddy-mistral-7b-chat](https://modelscope.cn/models/OpenBuddy/openbuddy-mistral-7b-v13.1/summary), [openbuddy-zephyr-7b-chat](https://modelscope.cn/models/OpenBuddy/openbuddy-zephyr-7b-v14.1/summary), [openbuddy-deepseek-67b-chat](https://modelscope.cn/models/OpenBuddy/openbuddy-deepseek-67b-v15.2/summary)
     - mistral series: [mistral-7b](https://modelscope.cn/models/AI-ModelScope/Mistral-7B-v0.1/summary), [mistral-7b-instruct](https://modelscope.cn/models/AI-ModelScope/Mistral-7B-Instruct-v0.1/summary), [mistral-7b-instruct-v2](https://modelscope.cn/models/AI-ModelScope/Mistral-7B-Instruct-v0.2/summary), [mixtral-moe-7b](https://modelscope.cn/models/AI-ModelScope/Mixtral-8x7B-v0.1/summary), [mixtral-moe-7b-instruct](https://modelscope.cn/models/AI-ModelScope/Mixtral-8x7B-Instruct-v0.1/summary)
     - baichuan series: [baichuan-7b](https://modelscope.cn/models/baichuan-inc/baichuan-7B/summary), [baichuan-13b](https://modelscope.cn/models/baichuan-inc/Baichuan-13B-Base/summary), [baichuan-13b-chat](https://modelscope.cn/models/baichuan-inc/Baichuan-13B-Chat/summary), [baichuan2-7b](https://modelscope.cn/models/baichuan-inc/Baichuan2-7B-Base/summary), [baichuan2-7b-chat](https://modelscope.cn/models/baichuan-inc/Baichuan2-7B-Chat/summary), [baichuan2-13b](https://modelscope.cn/models/baichuan-inc/Baichuan2-13B-Base/summary), [baichuan2-13b-chat](https://modelscope.cn/models/baichuan-inc/Baichuan2-13B-Chat/summary), [baichuan2-7b-chat-int4](https://modelscope.cn/models/baichuan-inc/Baichuan2-7B-Chat-4bits/summary), [baichuan2-13b-chat-int4](https://modelscope.cn/models/baichuan-inc/Baichuan2-13B-Chat-4bits/summary)
-    - internlm series: [internlm-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm-7b/summary), [internlm-7b-chat](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm-chat-7b-v1_1/summary), [internlm-7b-chat-8k](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm-chat-7b-8k/summary), [internlm-20b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm-20b/summary), [internlm-20b-chat](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm-chat-20b/summary)
     - yuan series: [yuan2-2b-instruct](https://modelscope.cn/models/YuanLLM/Yuan2.0-2B-hf/summary), [yuan2-2b-janus-instruct](https://modelscope.cn/models/YuanLLM/Yuan2-2B-Janus-hf/summary), [yuan2-51b-instruct](https://modelscope.cn/models/YuanLLM/Yuan2.0-51B-hf/summary), [yuan2-102b-instruct](https://modelscope.cn/models/YuanLLM/Yuan2.0-102B-hf/summary)
     - xverse series: [xverse-7b](https://modelscope.cn/models/xverse/XVERSE-7B/summary), [xverse-7b-chat](https://modelscope.cn/models/xverse/XVERSE-7B-Chat/summary), [xverse-13b](https://modelscope.cn/models/xverse/XVERSE-13B/summary), [xverse-13b-chat](https://modelscope.cn/models/xverse/XVERSE-13B-Chat/summary), [xverse-65b](https://modelscope.cn/models/xverse/XVERSE-65B/summary)
     - bluelm series: [bluelm-7b](https://modelscope.cn/models/vivo-ai/BlueLM-7B-Base/summary), [bluelm-7b-chat](https://modelscope.cn/models/vivo-ai/BlueLM-7B-Chat/summary), [bluelm-7b-32k](https://modelscope.cn/models/vivo-ai/BlueLM-7B-Base-32K/summary), [bluelm-7b-chat-32k](https://modelscope.cn/models/vivo-ai/BlueLM-7B-Chat-32K/summary)
@@ -174,7 +175,7 @@ Users can check the [documentation of SWIFT](docs/source/GetStarted/快速使用
   - Custom Dataset
 - Supported Templates:
   - Text Generation: default-generation, default-generation-bos, chatglm-generation
-  - Chat: default, qwen, baichuan, chatglm2, chatglm3, llama, openbuddy, internlm, yi, yuan, xverse, ziya, skywork, bluelm, zephyr, sus, deepseek, deepseek-coder, codefuse-codellama, cogagent
+  - Chat: default, qwen, baichuan, chatglm2, chatglm3, llama, openbuddy, internlm, internlm2, yi, yuan, xverse, ziya, skywork, bluelm, zephyr, sus, deepseek, deepseek-coder, codefuse-codellama, cogagent
 
 
 ## 🔥SCEdit
diff --git a/README_CN.md b/README_CN.md
index 6979d92ce7..3acd2dae49 100644
--- a/README_CN.md
+++ b/README_CN.md
@@ -60,6 +60,7 @@ SWIFT（Scalable lightWeight Infrastructure for Fine-Tuning）是一个可扩展
 用户可以查看 [SWIFT官方文档](docs/source/GetStarted/快速使用.md) 来了解详细信息。
 
 ## 🎉 新闻
+- 🔥2023.1.17: 支持internlm2系列: internlm2-7b-base, internlm2-7b, [internlm2-7b-sft-chat](https://github.com/modelscope/swift/tree/main/examples/pytorch/llm/scripts/internlm2_7b_sft_chat), internlm2-7b-chat, internlm2-20b-base, internlm2-20b, internlm2-20b-sft-chat, internlm2-20b-chat.
 - 2023.1.15: 支持yuan系列: yuan2-2b-instruct, [yuan2-2b-janus-instruct](https://github.com/modelscope/swift/tree/main/examples/pytorch/llm/scripts/yuan2_2b_janus_instruct), yuan2-51b-instruct, yuan2-102b-instruct.
 - 🔥2023.1.12: 支持**deepseek-moe**系列: deepseek-moe-16b, [deepseek-moe-16b-chat](https://github.com/modelscope/swift/tree/main/examples/pytorch/llm/scripts/deepseek_moe_16b_chat).
 - 🔥2023.1.4: 支持**VLLM部署**, 兼容**OpenAI API**样式, 具体可以查看[VLLM推理加速与部署](https://github.com/modelscope/swift/blob/main/docs/source/LLM/VLLM推理加速与部署.md#部署).
@@ -134,11 +135,11 @@ SWIFT（Scalable lightWeight Infrastructure for Fine-Tuning）是一个可扩展
     - chatglm 系列: [chatglm2-6b](https://modelscope.cn/models/ZhipuAI/chatglm2-6b/summary), [chatglm2-6b-32k](https://modelscope.cn/models/ZhipuAI/chatglm2-6b-32k/summary), [chatglm3-6b-base](https://modelscope.cn/models/ZhipuAI/chatglm3-6b-base/summary), [chatglm3-6b](https://modelscope.cn/models/ZhipuAI/chatglm3-6b/summary), [chatglm3-6b-32k](https://modelscope.cn/models/ZhipuAI/chatglm3-6b-32k/summary)
     - llama 系列: [llama2-7b](https://modelscope.cn/models/modelscope/Llama-2-7b-ms/summary), [llama2-7b-chat](https://modelscope.cn/models/modelscope/Llama-2-7b-chat-ms/summary), [llama2-13b](https://modelscope.cn/models/modelscope/Llama-2-13b-ms/summary), [llama2-13b-chat](https://modelscope.cn/models/modelscope/Llama-2-13b-chat-ms/summary), [llama2-70b](https://modelscope.cn/models/modelscope/Llama-2-70b-ms/summary), [llama2-70b-chat](https://modelscope.cn/models/modelscope/Llama-2-70b-chat-ms/summary)
     - yi 系列: [yi-6b](https://modelscope.cn/models/01ai/Yi-6B/summary), [yi-6b-200k](https://modelscope.cn/models/01ai/Yi-6B-200K/summary), [yi-6b-chat](https://modelscope.cn/models/01ai/Yi-6B-Chat/summary), [yi-34b](https://modelscope.cn/models/01ai/Yi-34B/summary), [yi-34b-200k](https://modelscope.cn/models/01ai/Yi-34B-200K/summary), [yi-34b-chat](https://modelscope.cn/models/01ai/Yi-34B-Chat/summary)
+    - internlm 系列: [internlm-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm-7b/summary), [internlm-7b-chat](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm-chat-7b-v1_1/summary), [internlm-7b-chat-8k](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm-chat-7b-8k/summary), [internlm-20b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm-20b/summary), [internlm-20b-chat](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm-chat-20b/summary), [internlm2-7b-base](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-base-7b/summary), [internlm2-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-7b/summary), [internlm2-7b-sft-chat](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-7b-sft/summary), [internlm2-7b-chat](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-7b/summary), [internlm2-20b-base](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-base-20b/summary), [internlm2-20b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-20b/summary), [internlm2-20b-sft-chat](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-20b-sft/summary), [internlm2-20b-chat](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-20b/summary)
     - deepseek 系列: [deepseek-7b](https://modelscope.cn/models/deepseek-ai/deepseek-llm-7b-base/summary), [deepseek-7b-chat](https://modelscope.cn/models/deepseek-ai/deepseek-llm-7b-chat/summary), [deepseek-67b](https://modelscope.cn/models/deepseek-ai/deepseek-llm-67b-base/summary), [deepseek-67b-chat](https://modelscope.cn/models/deepseek-ai/deepseek-llm-67b-chat/summary), [deepseek-moe-16b](https://modelscope.cn/models/deepseek-ai/deepseek-moe-16b-base/summary), [deepseek-moe-16b-chat|[deepseek-ai/deepseek-moe-16b-chat](https://modelscope.cn/models/deepseek-ai/deepseek-moe-16b-chat/summary)
     - openbuddy 系列: [openbuddy-llama2-13b-chat](https://modelscope.cn/models/OpenBuddy/openbuddy-llama2-13b-v8.1-fp16/summary), [openbuddy-llama-65b-chat](https://modelscope.cn/models/OpenBuddy/openbuddy-llama-65b-v8-bf16/summary), [openbuddy-llama2-70b-chat](https://modelscope.cn/models/OpenBuddy/openbuddy-llama2-70b-v10.1-bf16/summary), [openbuddy-mistral-7b-chat](https://modelscope.cn/models/OpenBuddy/openbuddy-mistral-7b-v13.1/summary), [openbuddy-zephyr-7b-chat](https://modelscope.cn/models/OpenBuddy/openbuddy-zephyr-7b-v14.1/summary), [openbuddy-deepseek-67b-chat](https://modelscope.cn/models/OpenBuddy/openbuddy-deepseek-67b-v15.2/summary)
     - mistral 系列: [mistral-7b](https://modelscope.cn/models/AI-ModelScope/Mistral-7B-v0.1/summary), [mistral-7b-instruct](https://modelscope.cn/models/AI-ModelScope/Mistral-7B-Instruct-v0.1/summary), [mistral-7b-instruct-v2](https://modelscope.cn/models/AI-ModelScope/Mistral-7B-Instruct-v0.2/summary), [mixtral-moe-7b](https://modelscope.cn/models/AI-ModelScope/Mixtral-8x7B-v0.1/summary), [mixtral-moe-7b-instruct](https://modelscope.cn/models/AI-ModelScope/Mixtral-8x7B-Instruct-v0.1/summary)
     - baichuan 系列: [baichuan-7b](https://modelscope.cn/models/baichuan-inc/baichuan-7B/summary), [baichuan-13b](https://modelscope.cn/models/baichuan-inc/Baichuan-13B-Base/summary), [baichuan-13b-chat](https://modelscope.cn/models/baichuan-inc/Baichuan-13B-Chat/summary), [baichuan2-7b](https://modelscope.cn/models/baichuan-inc/Baichuan2-7B-Base/summary), [baichuan2-7b-chat](https://modelscope.cn/models/baichuan-inc/Baichuan2-7B-Chat/summary), [baichuan2-13b](https://modelscope.cn/models/baichuan-inc/Baichuan2-13B-Base/summary), [baichuan2-13b-chat](https://modelscope.cn/models/baichuan-inc/Baichuan2-13B-Chat/summary), [baichuan2-7b-chat-int4](https://modelscope.cn/models/baichuan-inc/Baichuan2-7B-Chat-4bits/summary), [baichuan2-13b-chat-int4](https://modelscope.cn/models/baichuan-inc/Baichuan2-13B-Chat-4bits/summary)
-    - internlm 系列: [internlm-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm-7b/summary), [internlm-7b-chat](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm-chat-7b-v1_1/summary), [internlm-7b-chat-8k](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm-chat-7b-8k/summary), [internlm-20b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm-20b/summary), [internlm-20b-chat](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm-chat-20b/summary)
     - yuan 系列: [yuan2-2b-instruct](https://modelscope.cn/models/YuanLLM/Yuan2.0-2B-hf/summary), [yuan2-2b-janus-instruct](https://modelscope.cn/models/YuanLLM/Yuan2-2B-Janus-hf/summary), [yuan2-51b-instruct](https://modelscope.cn/models/YuanLLM/Yuan2.0-51B-hf/summary), [yuan2-102b-instruct](https://modelscope.cn/models/YuanLLM/Yuan2.0-102B-hf/summary)
     - xverse 系列: [xverse-7b](https://modelscope.cn/models/xverse/XVERSE-7B/summary), [xverse-7b-chat](https://modelscope.cn/models/xverse/XVERSE-7B-Chat/summary), [xverse-13b](https://modelscope.cn/models/xverse/XVERSE-13B/summary), [xverse-13b-chat](https://modelscope.cn/models/xverse/XVERSE-13B-Chat/summary), [xverse-65b](https://modelscope.cn/models/xverse/XVERSE-65B/summary)
     - bluelm 系列: [bluelm-7b](https://modelscope.cn/models/vivo-ai/BlueLM-7B-Base/summary), [bluelm-7b-chat](https://modelscope.cn/models/vivo-ai/BlueLM-7B-Chat/summary), [bluelm-7b-32k](https://modelscope.cn/models/vivo-ai/BlueLM-7B-Base-32K/summary), [bluelm-7b-chat-32k](https://modelscope.cn/models/vivo-ai/BlueLM-7B-Chat-32K/summary)
@@ -172,7 +173,7 @@ SWIFT（Scalable lightWeight Infrastructure for Fine-Tuning）是一个可扩展
   - 自定义数据集
 - 支持的对话模板:
   - 文本生成: default-generation, default-generation-bos, chatglm-generation
-  - 对话: default, qwen, baichuan, chatglm2, chatglm3, llama, openbuddy, internlm, yi, yuan, xverse, ziya, skywork, bluelm, zephyr, sus, deepseek, deepseek-coder, codefuse-codellama, cogagent
+  - 对话: default, qwen, baichuan, chatglm2, chatglm3, llama, openbuddy, internlm, internlm2, yi, yuan, xverse, ziya, skywork, bluelm, zephyr, sus, deepseek, deepseek-coder, codefuse-codellama, cogagent
 
 
 ## 🔥SCEdit
diff --git "a/docs/source/LLM/\345\221\275\344\273\244\350\241\214\345\217\202\346\225\260.md" "b/docs/source/LLM/\345\221\275\344\273\244\350\241\214\345\217\202\346\225\260.md"
index f2d30ab7b5..1dc1d620c7 100644
--- "a/docs/source/LLM/\345\221\275\344\273\244\350\241\214\345\217\202\346\225\260.md"
+++ "b/docs/source/LLM/\345\221\275\344\273\244\350\241\214\345\217\202\346\225\260.md"
@@ -91,7 +91,7 @@
 - `--temperature`: 默认为`0.3`. 该参数只有在`predict_with_generate`设置为True的时候才生效.
 - `--top_k`: 默认为`20`. 该参数只有在`predict_with_generate`设置为True的时候才生效.
 - `--top_p`: 默认为`0.7`. 该参数只有在`predict_with_generate`设置为True的时候才生效.
-- `--repetition_penalty`: 默认为`1.05`. 该参数只有在`predict_with_generate`设置为True的时候才生效.
+- `--repetition_penalty`: 默认为`1.`. 该参数只有在`predict_with_generate`设置为True的时候才生效.
 - `--num_beams`: 默认为`1`. 该参数只有在`predict_with_generate`设置为True的时候才生效.
 
 ## DPO 参数
@@ -135,7 +135,7 @@ dpo参数继承了sft参数, 除此之外增加了以下参数:
 - `--temperature`: 默认值为`0.3`. 该参数只有在`do_sample`设置为True时才生效. 该参数会在部署参数中作为默认值使用.
 - `--top_k`: 默认值为`20`. 该参数只有在`do_sample`设置为True时才生效. 该参数会在部署参数中作为默认值使用.
 - `--top_p`: 默认值为`0.7`. 该参数只有在`do_sample`设置为True时才生效. 该参数会在部署参数中作为默认值使用.
-- `--repetition_penalty`: 默认值为`1.05`. 该参数会在部署参数中作为默认值使用.
+- `--repetition_penalty`: 默认值为`1.`. 该参数会在部署参数中作为默认值使用.
 - `--num_beams`: 默认为`1`.
 - `--use_flash_attn`: 默认值为`None`, 即为'auto'. 具体的参数介绍可以在`sft.sh命令行参数`中查看.
 - `--ignore_args_error`: 默认值为`False`, 具体的参数介绍可以在`sft.sh命令行参数`中查看.
diff --git "a/docs/source/LLM/\346\224\257\346\214\201\347\232\204\346\250\241\345\236\213\345\222\214\346\225\260\346\215\256\351\233\206.md" "b/docs/source/LLM/\346\224\257\346\214\201\347\232\204\346\250\241\345\236\213\345\222\214\346\225\260\346\215\256\351\233\206.md"
index 74189923ee..8041099e31 100644
--- "a/docs/source/LLM/\346\224\257\346\214\201\347\232\204\346\250\241\345\236\213\345\222\214\346\225\260\346\215\256\351\233\206.md"
+++ "b/docs/source/LLM/\346\224\257\346\214\201\347\232\204\346\250\241\345\236\213\345\222\214\346\225\260\346\215\256\351\233\206.md"
@@ -53,6 +53,19 @@
 |yi-34b|[01ai/Yi-34B](https://modelscope.cn/models/01ai/Yi-34B/summary)|q_proj, k_proj, v_proj|default-generation|&#x2714;|&#x2714;||
 |yi-34b-200k|[01ai/Yi-34B-200K](https://modelscope.cn/models/01ai/Yi-34B-200K/summary)|q_proj, k_proj, v_proj|default-generation|&#x2714;|&#x2714;||
 |yi-34b-chat|[01ai/Yi-34B-Chat](https://modelscope.cn/models/01ai/Yi-34B-Chat/summary)|q_proj, k_proj, v_proj|yi|&#x2714;|&#x2714;||
+|internlm-7b|[Shanghai_AI_Laboratory/internlm-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm-7b/summary)|q_proj, k_proj, v_proj|default-generation-bos|&#x2718;|&#x2714;||
+|internlm-7b-chat|[Shanghai_AI_Laboratory/internlm-chat-7b-v1_1](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm-chat-7b-v1_1/summary)|q_proj, k_proj, v_proj|internlm|&#x2718;|&#x2714;||
+|internlm-7b-chat-8k|[Shanghai_AI_Laboratory/internlm-chat-7b-8k](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm-chat-7b-8k/summary)|q_proj, k_proj, v_proj|internlm|&#x2718;|&#x2714;||
+|internlm-20b|[Shanghai_AI_Laboratory/internlm-20b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm-20b/summary)|q_proj, k_proj, v_proj|default-generation-bos|&#x2718;|&#x2714;||
+|internlm-20b-chat|[Shanghai_AI_Laboratory/internlm-chat-20b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm-chat-20b/summary)|q_proj, k_proj, v_proj|internlm|&#x2718;|&#x2714;||
+|internlm2-7b-base|[Shanghai_AI_Laboratory/internlm2-base-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-base-7b/summary)|wqkv|default-generation-bos|&#x2714;|&#x2714;||
+|internlm2-7b|[Shanghai_AI_Laboratory/internlm2-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-7b/summary)|wqkv|default-generation-bos|&#x2714;|&#x2714;||
+|internlm2-7b-sft-chat|[Shanghai_AI_Laboratory/internlm2-chat-7b-sft](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-7b-sft/summary)|wqkv|internlm2|&#x2714;|&#x2714;||
+|internlm2-7b-chat|[Shanghai_AI_Laboratory/internlm2-chat-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-7b/summary)|wqkv|internlm2|&#x2714;|&#x2714;||
+|internlm2-20b-base|[Shanghai_AI_Laboratory/internlm2-base-20b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-base-20b/summary)|wqkv|default-generation-bos|&#x2714;|&#x2714;||
+|internlm2-20b|[Shanghai_AI_Laboratory/internlm2-20b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-20b/summary)|wqkv|default-generation-bos|&#x2714;|&#x2714;||
+|internlm2-20b-sft-chat|[Shanghai_AI_Laboratory/internlm2-chat-20b-sft](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-20b-sft/summary)|wqkv|internlm2|&#x2714;|&#x2714;||
+|internlm2-20b-chat|[Shanghai_AI_Laboratory/internlm2-chat-20b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-20b/summary)|wqkv|internlm2|&#x2714;|&#x2714;||
 |deepseek-7b|[deepseek-ai/deepseek-llm-7b-base](https://modelscope.cn/models/deepseek-ai/deepseek-llm-7b-base/summary)|q_proj, k_proj, v_proj|default-generation-bos|&#x2714;|&#x2714;||
 |deepseek-7b-chat|[deepseek-ai/deepseek-llm-7b-chat](https://modelscope.cn/models/deepseek-ai/deepseek-llm-7b-chat/summary)|q_proj, k_proj, v_proj|deepseek|&#x2714;|&#x2714;||
 |deepseek-moe-16b|[deepseek-ai/deepseek-moe-16b-base](https://modelscope.cn/models/deepseek-ai/deepseek-moe-16b-base/summary)|q_proj, k_proj, v_proj|default-generation-bos|&#x2714;|&#x2718;||
@@ -79,11 +92,6 @@
 |baichuan2-13b|[baichuan-inc/Baichuan2-13B-Base](https://modelscope.cn/models/baichuan-inc/Baichuan2-13B-Base/summary)|W_pack|default-generation|&#x2718;|&#x2714;||
 |baichuan2-13b-chat|[baichuan-inc/Baichuan2-13B-Chat](https://modelscope.cn/models/baichuan-inc/Baichuan2-13B-Chat/summary)|W_pack|baichuan|&#x2718;|&#x2714;||
 |baichuan2-13b-chat-int4|[baichuan-inc/Baichuan2-13B-Chat-4bits](https://modelscope.cn/models/baichuan-inc/Baichuan2-13B-Chat-4bits/summary)|W_pack|baichuan|&#x2718;|&#x2718;||
-|internlm-7b|[Shanghai_AI_Laboratory/internlm-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm-7b/summary)|q_proj, k_proj, v_proj|default-generation-bos|&#x2718;|&#x2714;||
-|internlm-7b-chat|[Shanghai_AI_Laboratory/internlm-chat-7b-v1_1](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm-chat-7b-v1_1/summary)|q_proj, k_proj, v_proj|internlm|&#x2718;|&#x2714;||
-|internlm-7b-chat-8k|[Shanghai_AI_Laboratory/internlm-chat-7b-8k](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm-chat-7b-8k/summary)|q_proj, k_proj, v_proj|internlm|&#x2718;|&#x2714;||
-|internlm-20b|[Shanghai_AI_Laboratory/internlm-20b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm-20b/summary)|q_proj, k_proj, v_proj|default-generation-bos|&#x2718;|&#x2714;||
-|internlm-20b-chat|[Shanghai_AI_Laboratory/internlm-chat-20b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm-chat-20b/summary)|q_proj, k_proj, v_proj|internlm|&#x2718;|&#x2714;||
 |yuan2-2b-instruct|[YuanLLM/Yuan2.0-2B-hf](https://modelscope.cn/models/YuanLLM/Yuan2.0-2B-hf/summary)|q_proj, k_proj, v_proj|yuan|&#x2714;|&#x2718;||
 |yuan2-2b-janus-instruct|[YuanLLM/Yuan2-2B-Janus-hf](https://modelscope.cn/models/YuanLLM/Yuan2-2B-Janus-hf/summary)|q_proj, k_proj, v_proj|yuan|&#x2714;|&#x2718;||
 |yuan2-51b-instruct|[YuanLLM/Yuan2.0-51B-hf](https://modelscope.cn/models/YuanLLM/Yuan2.0-51B-hf/summary)|q_proj, k_proj, v_proj|yuan|&#x2714;|&#x2718;||
diff --git a/examples/pytorch/llm/scripts/baichuan2_13b_chat/lora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/baichuan2_13b_chat/lora_ddp_ds/infer.sh
index 81216e66b8..4583dcbf65 100644
--- a/examples/pytorch/llm/scripts/baichuan2_13b_chat/lora_ddp_ds/infer.sh
+++ b/examples/pytorch/llm/scripts/baichuan2_13b_chat/lora_ddp_ds/infer.sh
@@ -9,6 +9,6 @@ python llm_infer.py \
     --max_new_tokens 2048 \
     --temperature 0.1 \
     --top_p 0.7 \
-    --repetition_penalty 1.05 \
+    --repetition_penalty 1. \
     --do_sample true \
     --merge_lora_and_save false \
diff --git a/examples/pytorch/llm/scripts/baichuan2_13b_chat/lora_mp_ddp/infer.sh b/examples/pytorch/llm/scripts/baichuan2_13b_chat/lora_mp_ddp/infer.sh
index 74ee87dca2..8a36680270 100644
--- a/examples/pytorch/llm/scripts/baichuan2_13b_chat/lora_mp_ddp/infer.sh
+++ b/examples/pytorch/llm/scripts/baichuan2_13b_chat/lora_mp_ddp/infer.sh
@@ -9,6 +9,6 @@ python llm_infer.py \
     --max_new_tokens 2048 \
     --temperature 0.1 \
     --top_p 0.7 \
-    --repetition_penalty 1.05 \
+    --repetition_penalty 1. \
     --do_sample true \
     --merge_lora_and_save false \
diff --git a/examples/pytorch/llm/scripts/baichuan2_13b_chat/qlora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/baichuan2_13b_chat/qlora_ddp_ds/infer.sh
index 1d363ecece..745106b5ad 100644
--- a/examples/pytorch/llm/scripts/baichuan2_13b_chat/qlora_ddp_ds/infer.sh
+++ b/examples/pytorch/llm/scripts/baichuan2_13b_chat/qlora_ddp_ds/infer.sh
@@ -8,6 +8,6 @@ python llm_infer.py \
     --max_new_tokens 2048 \
     --temperature 0.1 \
     --top_p 0.7 \
-    --repetition_penalty 1.05 \
+    --repetition_penalty 1. \
     --do_sample true \
     --merge_lora_and_save false \
diff --git a/examples/pytorch/llm/scripts/baichuan2_13b_chat_int4/qlora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/baichuan2_13b_chat_int4/qlora_ddp_ds/infer.sh
index 105e0a32ff..7f2c89439d 100644
--- a/examples/pytorch/llm/scripts/baichuan2_13b_chat_int4/qlora_ddp_ds/infer.sh
+++ b/examples/pytorch/llm/scripts/baichuan2_13b_chat_int4/qlora_ddp_ds/infer.sh
@@ -8,6 +8,6 @@ python llm_infer.py \
     --max_new_tokens 2048 \
     --temperature 0.1 \
     --top_p 0.7 \
-    --repetition_penalty 1.05 \
+    --repetition_penalty 1. \
     --do_sample true \
     --merge_lora_and_save false \
diff --git a/examples/pytorch/llm/scripts/baichuan2_7b/qlora/infer.sh b/examples/pytorch/llm/scripts/baichuan2_7b/qlora/infer.sh
index cd9da07fcc..e2edaf341a 100644
--- a/examples/pytorch/llm/scripts/baichuan2_7b/qlora/infer.sh
+++ b/examples/pytorch/llm/scripts/baichuan2_7b/qlora/infer.sh
@@ -8,6 +8,6 @@ python llm_infer.py \
     --max_new_tokens 2048 \
     --temperature 0.7 \
     --top_p 0.7 \
-    --repetition_penalty 1.05 \
+    --repetition_penalty 1. \
     --do_sample true \
     --merge_lora_and_save false \
diff --git a/examples/pytorch/llm/scripts/baichuan2_7b_chat/lora_ddp/infer.sh b/examples/pytorch/llm/scripts/baichuan2_7b_chat/lora_ddp/infer.sh
index b59ca7105e..cb4102bb4b 100644
--- a/examples/pytorch/llm/scripts/baichuan2_7b_chat/lora_ddp/infer.sh
+++ b/examples/pytorch/llm/scripts/baichuan2_7b_chat/lora_ddp/infer.sh
@@ -8,6 +8,6 @@ python llm_infer.py \
     --max_new_tokens 2048 \
     --temperature 0.1 \
     --top_p 0.7 \
-    --repetition_penalty 1.05 \
+    --repetition_penalty 1. \
     --do_sample true \
     --merge_lora_and_save false \
diff --git a/examples/pytorch/llm/scripts/baichuan2_7b_chat/lora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/baichuan2_7b_chat/lora_ddp_ds/infer.sh
index 99f86ffe3e..179811e06c 100644
--- a/examples/pytorch/llm/scripts/baichuan2_7b_chat/lora_ddp_ds/infer.sh
+++ b/examples/pytorch/llm/scripts/baichuan2_7b_chat/lora_ddp_ds/infer.sh
@@ -8,6 +8,6 @@ python llm_infer.py \
     --max_new_tokens 2048 \
     --temperature 0.1 \
     --top_p 0.7 \
-    --repetition_penalty 1.05 \
+    --repetition_penalty 1. \
     --do_sample true \
     --merge_lora_and_save false \
diff --git a/examples/pytorch/llm/scripts/baichuan2_7b_chat/qlora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/baichuan2_7b_chat/qlora_ddp_ds/infer.sh
index 99f86ffe3e..179811e06c 100644
--- a/examples/pytorch/llm/scripts/baichuan2_7b_chat/qlora_ddp_ds/infer.sh
+++ b/examples/pytorch/llm/scripts/baichuan2_7b_chat/qlora_ddp_ds/infer.sh
@@ -8,6 +8,6 @@ python llm_infer.py \
     --max_new_tokens 2048 \
     --temperature 0.1 \
     --top_p 0.7 \
-    --repetition_penalty 1.05 \
+    --repetition_penalty 1. \
     --do_sample true \
     --merge_lora_and_save false \
diff --git a/examples/pytorch/llm/scripts/baichuan2_7b_chat_int4/qlora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/baichuan2_7b_chat_int4/qlora_ddp_ds/infer.sh
index 91f68b24a1..453eeba79b 100644
--- a/examples/pytorch/llm/scripts/baichuan2_7b_chat_int4/qlora_ddp_ds/infer.sh
+++ b/examples/pytorch/llm/scripts/baichuan2_7b_chat_int4/qlora_ddp_ds/infer.sh
@@ -8,6 +8,6 @@ python llm_infer.py \
     --max_new_tokens 2048 \
     --temperature 0.1 \
     --top_p 0.7 \
-    --repetition_penalty 1.05 \
+    --repetition_penalty 1. \
     --do_sample true \
     --merge_lora_and_save false \
diff --git a/examples/pytorch/llm/scripts/baichuan_13b_chat/qlora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/baichuan_13b_chat/qlora_ddp_ds/infer.sh
index b3a706a439..938d3142b6 100644
--- a/examples/pytorch/llm/scripts/baichuan_13b_chat/qlora_ddp_ds/infer.sh
+++ b/examples/pytorch/llm/scripts/baichuan_13b_chat/qlora_ddp_ds/infer.sh
@@ -8,6 +8,6 @@ python llm_infer.py \
     --max_new_tokens 2048 \
     --temperature 0.1 \
     --top_p 0.7 \
-    --repetition_penalty 1.05 \
+    --repetition_penalty 1. \
     --do_sample true \
     --merge_lora_and_save false \
diff --git a/examples/pytorch/llm/scripts/bluelm_7b_chat/lora/infer.sh b/examples/pytorch/llm/scripts/bluelm_7b_chat/lora/infer.sh
index c0a7fac646..f180df4b12 100644
--- a/examples/pytorch/llm/scripts/bluelm_7b_chat/lora/infer.sh
+++ b/examples/pytorch/llm/scripts/bluelm_7b_chat/lora/infer.sh
@@ -8,6 +8,6 @@ python llm_infer.py \
     --max_new_tokens 2048 \
     --temperature 0.1 \
     --top_p 0.7 \
-    --repetition_penalty 1.05 \
+    --repetition_penalty 1. \
     --do_sample true \
     --merge_lora_and_save false \
diff --git a/examples/pytorch/llm/scripts/chatglm2_6b/lora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/chatglm2_6b/lora_ddp_ds/infer.sh
index 1b60115b41..02ba115511 100644
--- a/examples/pytorch/llm/scripts/chatglm2_6b/lora_ddp_ds/infer.sh
+++ b/examples/pytorch/llm/scripts/chatglm2_6b/lora_ddp_ds/infer.sh
@@ -8,6 +8,6 @@ python llm_infer.py \
     --max_new_tokens 2048 \
     --temperature 0.1 \
     --top_p 0.7 \
-    --repetition_penalty 1.05 \
+    --repetition_penalty 1. \
     --do_sample true \
     --merge_lora_and_save false \
diff --git a/examples/pytorch/llm/scripts/chatglm3_6b/lora/infer.sh b/examples/pytorch/llm/scripts/chatglm3_6b/lora/infer.sh
index a0af70ea36..c612975893 100644
--- a/examples/pytorch/llm/scripts/chatglm3_6b/lora/infer.sh
+++ b/examples/pytorch/llm/scripts/chatglm3_6b/lora/infer.sh
@@ -9,6 +9,6 @@ python llm_infer.py \
     --max_new_tokens 2048 \
     --temperature 0.1 \
     --top_p 0.7 \
-    --repetition_penalty 1.05 \
+    --repetition_penalty 1. \
     --do_sample true \
     --merge_lora_and_save false \
diff --git a/examples/pytorch/llm/scripts/chatglm3_6b/lora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/chatglm3_6b/lora_ddp_ds/infer.sh
index 3e66254737..b8193abafe 100644
--- a/examples/pytorch/llm/scripts/chatglm3_6b/lora_ddp_ds/infer.sh
+++ b/examples/pytorch/llm/scripts/chatglm3_6b/lora_ddp_ds/infer.sh
@@ -8,6 +8,6 @@ python llm_infer.py \
     --max_new_tokens 2048 \
     --temperature 0.1 \
     --top_p 0.7 \
-    --repetition_penalty 1.05 \
+    --repetition_penalty 1. \
     --do_sample true \
     --merge_lora_and_save false \
diff --git a/examples/pytorch/llm/scripts/chatglm3_6b_32k/qlora/infer.sh b/examples/pytorch/llm/scripts/chatglm3_6b_32k/qlora/infer.sh
index fa533f8c0e..21ccd25d13 100644
--- a/examples/pytorch/llm/scripts/chatglm3_6b_32k/qlora/infer.sh
+++ b/examples/pytorch/llm/scripts/chatglm3_6b_32k/qlora/infer.sh
@@ -8,6 +8,6 @@ python llm_infer.py \
     --max_new_tokens 2048 \
     --temperature 0.1 \
     --top_p 0.7 \
-    --repetition_penalty 1.05 \
+    --repetition_penalty 1. \
     --do_sample true \
     --merge_lora_and_save false \
diff --git a/examples/pytorch/llm/scripts/chatglm3_6b_base/lora/infer.sh b/examples/pytorch/llm/scripts/chatglm3_6b_base/lora/infer.sh
index 48234cd506..ff7c2e4bca 100644
--- a/examples/pytorch/llm/scripts/chatglm3_6b_base/lora/infer.sh
+++ b/examples/pytorch/llm/scripts/chatglm3_6b_base/lora/infer.sh
@@ -8,6 +8,6 @@ python llm_infer.py \
     --max_new_tokens 2048 \
     --temperature 0.7 \
     --top_p 0.7 \
-    --repetition_penalty 1.05 \
+    --repetition_penalty 1. \
     --do_sample true \
     --merge_lora_and_save false \
diff --git a/examples/pytorch/llm/scripts/chatglm3_6b_base/lora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/chatglm3_6b_base/lora_ddp_ds/infer.sh
index beae1873d8..150f649e1d 100644
--- a/examples/pytorch/llm/scripts/chatglm3_6b_base/lora_ddp_ds/infer.sh
+++ b/examples/pytorch/llm/scripts/chatglm3_6b_base/lora_ddp_ds/infer.sh
@@ -8,6 +8,6 @@ python llm_infer.py \
     --max_new_tokens 2048 \
     --temperature 0.7 \
     --top_p 0.7 \
-    --repetition_penalty 1.05 \
+    --repetition_penalty 1. \
     --do_sample true \
     --merge_lora_and_save false \
diff --git a/examples/pytorch/llm/scripts/codefuse_codellama_34b/lora/infer.sh b/examples/pytorch/llm/scripts/codefuse_codellama_34b/lora/infer.sh
index d173db9d95..bf35f3f539 100644
--- a/examples/pytorch/llm/scripts/codefuse_codellama_34b/lora/infer.sh
+++ b/examples/pytorch/llm/scripts/codefuse_codellama_34b/lora/infer.sh
@@ -9,6 +9,6 @@ python llm_infer.py \
     --max_new_tokens 2048 \
     --temperature 0.3 \
     --top_p 0.7 \
-    --repetition_penalty 1.05 \
+    --repetition_penalty 1. \
     --do_sample true \
     --merge_lora_and_save false \
diff --git a/examples/pytorch/llm/scripts/codegeex2_6b/lora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/codegeex2_6b/lora_ddp_ds/infer.sh
index b691b38da2..9e05f949e8 100644
--- a/examples/pytorch/llm/scripts/codegeex2_6b/lora_ddp_ds/infer.sh
+++ b/examples/pytorch/llm/scripts/codegeex2_6b/lora_ddp_ds/infer.sh
@@ -8,6 +8,6 @@ python llm_infer.py \
     --max_new_tokens 2048 \
     --temperature 0.1 \
     --top_p 0.7 \
-    --repetition_penalty 1.05 \
+    --repetition_penalty 1. \
     --do_sample true \
     --merge_lora_and_save false \
diff --git a/examples/pytorch/llm/scripts/cogagent_chat/lora/infer.sh b/examples/pytorch/llm/scripts/cogagent_chat/lora/infer.sh
index 4d0e48de20..0cb6d1de7a 100644
--- a/examples/pytorch/llm/scripts/cogagent_chat/lora/infer.sh
+++ b/examples/pytorch/llm/scripts/cogagent_chat/lora/infer.sh
@@ -10,6 +10,6 @@ python llm_infer.py \
     --max_new_tokens 2048 \
     --temperature 0.3 \
     --top_p 0.7 \
-    --repetition_penalty 1.05 \
+    --repetition_penalty 1. \
     --do_sample true \
     --merge_lora_and_save false \
diff --git a/examples/pytorch/llm/scripts/custom/tigerbot_13b_chat/qlora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/custom/tigerbot_13b_chat/qlora_ddp_ds/infer.sh
index 392b4b4a48..3804faa5b7 100644
--- a/examples/pytorch/llm/scripts/custom/tigerbot_13b_chat/qlora_ddp_ds/infer.sh
+++ b/examples/pytorch/llm/scripts/custom/tigerbot_13b_chat/qlora_ddp_ds/infer.sh
@@ -8,6 +8,6 @@ python llm_infer.py \
     --max_new_tokens 2048 \
     --temperature 0.3 \
     --top_p 0.7 \
-    --repetition_penalty 1.05 \
+    --repetition_penalty 1. \
     --do_sample true \
     --merge_lora_and_save false \
diff --git a/examples/pytorch/llm/scripts/custom/tigerbot_7b/lora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/custom/tigerbot_7b/lora_ddp_ds/infer.sh
index 4626706bc6..a388e218d2 100644
--- a/examples/pytorch/llm/scripts/custom/tigerbot_7b/lora_ddp_ds/infer.sh
+++ b/examples/pytorch/llm/scripts/custom/tigerbot_7b/lora_ddp_ds/infer.sh
@@ -8,6 +8,6 @@ python llm_infer.py \
     --max_new_tokens 2048 \
     --temperature 0.3 \
     --top_p 0.7 \
-    --repetition_penalty 1.05 \
+    --repetition_penalty 1. \
     --do_sample true \
     --merge_lora_and_save false \
diff --git a/examples/pytorch/llm/scripts/deepseek_moe_16b_chat/lora/infer.sh b/examples/pytorch/llm/scripts/deepseek_moe_16b_chat/lora/infer.sh
index 146337506a..6855b1de91 100644
--- a/examples/pytorch/llm/scripts/deepseek_moe_16b_chat/lora/infer.sh
+++ b/examples/pytorch/llm/scripts/deepseek_moe_16b_chat/lora/infer.sh
@@ -8,5 +8,5 @@ swift infer \
     --max_new_tokens 2048 \
     --temperature 0.1 \
     --top_p 0.7 \
-    --repetition_penalty 1.05 \
+    --repetition_penalty 1. \
     --do_sample true \
diff --git a/examples/pytorch/llm/scripts/dpo/lora/infer.sh b/examples/pytorch/llm/scripts/dpo/lora/infer.sh
index 8ed9b69b6e..2c0d39585f 100644
--- a/examples/pytorch/llm/scripts/dpo/lora/infer.sh
+++ b/examples/pytorch/llm/scripts/dpo/lora/infer.sh
@@ -9,6 +9,6 @@ python llm_infer.py \
     --max_new_tokens 1024 \
     --temperature 0.3 \
     --top_p 0.7 \
-    --repetition_penalty 1.05 \
+    --repetition_penalty 1. \
     --do_sample true \
     --merge_lora_and_save false \
diff --git a/examples/pytorch/llm/scripts/dpo/lora_ddp_mp/infer.sh b/examples/pytorch/llm/scripts/dpo/lora_ddp_mp/infer.sh
index 8ed9b69b6e..2c0d39585f 100644
--- a/examples/pytorch/llm/scripts/dpo/lora_ddp_mp/infer.sh
+++ b/examples/pytorch/llm/scripts/dpo/lora_ddp_mp/infer.sh
@@ -9,6 +9,6 @@ python llm_infer.py \
     --max_new_tokens 1024 \
     --temperature 0.3 \
     --top_p 0.7 \
-    --repetition_penalty 1.05 \
+    --repetition_penalty 1. \
     --do_sample true \
     --merge_lora_and_save false \
diff --git a/examples/pytorch/llm/scripts/internlm2_7b_sft_chat/lora/infer.sh b/examples/pytorch/llm/scripts/internlm2_7b_sft_chat/lora/infer.sh
new file mode 100644
index 0000000000..fb9c2125ae
--- /dev/null
+++ b/examples/pytorch/llm/scripts/internlm2_7b_sft_chat/lora/infer.sh
@@ -0,0 +1,13 @@
+# Experimental environment: A10
+CUDA_VISIBLE_DEVICES=0 \
+swift infer \
+    --ckpt_dir "output/internlm2-7b-sft-chat/vx_xxx/checkpoint-xxx" \
+    --load_dataset_config true \
+    --max_length 2048 \
+    --max_new_tokens 2048 \
+    --temperature 0.5 \
+    --top_p 0.7 \
+    --repetition_penalty 1. \
+    --stream false \
+    --do_sample true \
+    --merge_lora_and_save false \
diff --git a/examples/pytorch/llm/scripts/internlm2_7b_sft_chat/lora/sft.sh b/examples/pytorch/llm/scripts/internlm2_7b_sft_chat/lora/sft.sh
new file mode 100644
index 0000000000..10810d3f16
--- /dev/null
+++ b/examples/pytorch/llm/scripts/internlm2_7b_sft_chat/lora/sft.sh
@@ -0,0 +1,35 @@
+# Experimental environment: A10
+# 22GB GPU memory
+CUDA_VISIBLE_DEVICES=0 \
+swift sft \
+    --model_type internlm2-7b-sft-chat \
+    --model_revision master \
+    --sft_type lora \
+    --tuner_backend swift \
+    --template_type AUTO \
+    --dtype AUTO \
+    --output_dir output \
+    --ddp_backend nccl \
+    --dataset dureader-robust-zh \
+    --train_dataset_sample 20000 \
+    --num_train_epochs 1 \
+    --max_length 2048 \
+    --system 'You are a helpful assistant.' \
+    --check_dataset_strategy warning \
+    --lora_rank 8 \
+    --lora_alpha 32 \
+    --lora_dropout_p 0.05 \
+    --lora_target_modules DEFAULT \
+    --gradient_checkpointing true \
+    --batch_size 1 \
+    --weight_decay 0.01 \
+    --learning_rate 1e-4 \
+    --gradient_accumulation_steps 16 \
+    --max_grad_norm 0.5 \
+    --warmup_ratio 0.03 \
+    --eval_steps 100 \
+    --save_steps 100 \
+    --save_total_limit 2 \
+    --logging_steps 10 \
+    --neftune_noise_alpha 5 \
+    --use_flash_attn false \
diff --git a/examples/pytorch/llm/scripts/internlm_20b/lora_ddp/infer.sh b/examples/pytorch/llm/scripts/internlm_20b/lora_ddp/infer.sh
index c3c911c84d..2e4a491b9e 100644
--- a/examples/pytorch/llm/scripts/internlm_20b/lora_ddp/infer.sh
+++ b/examples/pytorch/llm/scripts/internlm_20b/lora_ddp/infer.sh
@@ -8,6 +8,6 @@ python llm_infer.py \
     --max_new_tokens 2048 \
     --temperature 0.3 \
     --top_p 0.7 \
-    --repetition_penalty 1.05 \
+    --repetition_penalty 1. \
     --do_sample true \
     --merge_lora_and_save false \
diff --git a/examples/pytorch/llm/scripts/internlm_20b/qlora/infer.sh b/examples/pytorch/llm/scripts/internlm_20b/qlora/infer.sh
index faf092d8ac..361fce0c76 100644
--- a/examples/pytorch/llm/scripts/internlm_20b/qlora/infer.sh
+++ b/examples/pytorch/llm/scripts/internlm_20b/qlora/infer.sh
@@ -8,6 +8,6 @@ python llm_infer.py \
     --max_new_tokens 2048 \
     --temperature 0.7 \
     --top_p 0.7 \
-    --repetition_penalty 1.05 \
+    --repetition_penalty 1. \
     --do_sample true \
     --merge_lora_and_save false \
diff --git a/examples/pytorch/llm/scripts/internlm_20b_chat/lora_ddp/infer.sh b/examples/pytorch/llm/scripts/internlm_20b_chat/lora_ddp/infer.sh
index 19cade3493..281680a639 100644
--- a/examples/pytorch/llm/scripts/internlm_20b_chat/lora_ddp/infer.sh
+++ b/examples/pytorch/llm/scripts/internlm_20b_chat/lora_ddp/infer.sh
@@ -9,6 +9,6 @@ python llm_infer.py \
     --max_new_tokens 2048 \
     --temperature 0.1 \
     --top_p 0.7 \
-    --repetition_penalty 1.05 \
+    --repetition_penalty 1. \
     --do_sample true \
     --merge_lora_and_save false \
diff --git a/examples/pytorch/llm/scripts/internlm_20b_chat/qlora/infer.sh b/examples/pytorch/llm/scripts/internlm_20b_chat/qlora/infer.sh
index 245c471136..0c49ffb84a 100644
--- a/examples/pytorch/llm/scripts/internlm_20b_chat/qlora/infer.sh
+++ b/examples/pytorch/llm/scripts/internlm_20b_chat/qlora/infer.sh
@@ -8,6 +8,6 @@ python llm_infer.py \
     --max_new_tokens 2048 \
     --temperature 0.1 \
     --top_p 0.7 \
-    --repetition_penalty 1.05 \
+    --repetition_penalty 1. \
     --do_sample true \
     --merge_lora_and_save false \
diff --git a/examples/pytorch/llm/scripts/internlm_20b_chat/qlora_ddp/infer.sh b/examples/pytorch/llm/scripts/internlm_20b_chat/qlora_ddp/infer.sh
index 245c471136..0c49ffb84a 100644
--- a/examples/pytorch/llm/scripts/internlm_20b_chat/qlora_ddp/infer.sh
+++ b/examples/pytorch/llm/scripts/internlm_20b_chat/qlora_ddp/infer.sh
@@ -8,6 +8,6 @@ python llm_infer.py \
     --max_new_tokens 2048 \
     --temperature 0.1 \
     --top_p 0.7 \
-    --repetition_penalty 1.05 \
+    --repetition_penalty 1. \
     --do_sample true \
     --merge_lora_and_save false \
diff --git a/examples/pytorch/llm/scripts/llama2_13b_chat/longlora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/llama2_13b_chat/longlora_ddp_ds/infer.sh
index 848cc89021..861c169d92 100644
--- a/examples/pytorch/llm/scripts/llama2_13b_chat/longlora_ddp_ds/infer.sh
+++ b/examples/pytorch/llm/scripts/llama2_13b_chat/longlora_ddp_ds/infer.sh
@@ -8,6 +8,6 @@ python llm_infer.py \
     --max_new_tokens 2048 \
     --temperature 0.1 \
     --top_p 0.7 \
-    --repetition_penalty 1.05 \
+    --repetition_penalty 1. \
     --do_sample true \
     --merge_lora_and_save false \
diff --git a/examples/pytorch/llm/scripts/llama2_13b_chat/qlora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/llama2_13b_chat/qlora_ddp_ds/infer.sh
index ba7e6556a1..90d5933b13 100644
--- a/examples/pytorch/llm/scripts/llama2_13b_chat/qlora_ddp_ds/infer.sh
+++ b/examples/pytorch/llm/scripts/llama2_13b_chat/qlora_ddp_ds/infer.sh
@@ -8,6 +8,6 @@ python llm_infer.py \
     --max_new_tokens 2048 \
     --temperature 0.1 \
     --top_p 0.7 \
-    --repetition_penalty 1.05 \
+    --repetition_penalty 1. \
     --do_sample true \
     --merge_lora_and_save false \
diff --git a/examples/pytorch/llm/scripts/llama2_70b_chat/qlora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/llama2_70b_chat/qlora_ddp_ds/infer.sh
index f2ef70c606..f7b5e1345f 100644
--- a/examples/pytorch/llm/scripts/llama2_70b_chat/qlora_ddp_ds/infer.sh
+++ b/examples/pytorch/llm/scripts/llama2_70b_chat/qlora_ddp_ds/infer.sh
@@ -8,6 +8,6 @@ python llm_infer.py \
     --max_new_tokens 2048 \
     --temperature 0.1 \
     --top_p 0.7 \
-    --repetition_penalty 1.05 \
+    --repetition_penalty 1. \
     --do_sample true \
     --merge_lora_and_save false \
diff --git a/examples/pytorch/llm/scripts/llama2_70b_chat/qlora_mp/infer.sh b/examples/pytorch/llm/scripts/llama2_70b_chat/qlora_mp/infer.sh
index b725edb3c2..3023a4ecc3 100644
--- a/examples/pytorch/llm/scripts/llama2_70b_chat/qlora_mp/infer.sh
+++ b/examples/pytorch/llm/scripts/llama2_70b_chat/qlora_mp/infer.sh
@@ -8,6 +8,6 @@ python llm_infer.py \
     --max_new_tokens 2048 \
     --temperature 0.1 \
     --top_p 0.7 \
-    --repetition_penalty 1.05 \
+    --repetition_penalty 1. \
     --do_sample true \
     --merge_lora_and_save false \
diff --git a/examples/pytorch/llm/scripts/mistral_7b_instruct/lora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/mistral_7b_instruct/lora_ddp_ds/infer.sh
index e8e1c112b4..2029db5d8a 100644
--- a/examples/pytorch/llm/scripts/mistral_7b_instruct/lora_ddp_ds/infer.sh
+++ b/examples/pytorch/llm/scripts/mistral_7b_instruct/lora_ddp_ds/infer.sh
@@ -9,6 +9,6 @@ python llm_infer.py \
     --max_new_tokens 2048 \
     --temperature 0.1 \
     --top_p 0.7 \
-    --repetition_penalty 1.05 \
+    --repetition_penalty 1. \
     --do_sample true \
     --merge_lora_and_save false \
diff --git a/examples/pytorch/llm/scripts/mistral_7b_instruct/lora_mp_ddp/infer.sh b/examples/pytorch/llm/scripts/mistral_7b_instruct/lora_mp_ddp/infer.sh
index 0ac3874235..33ff9a2eb0 100644
--- a/examples/pytorch/llm/scripts/mistral_7b_instruct/lora_mp_ddp/infer.sh
+++ b/examples/pytorch/llm/scripts/mistral_7b_instruct/lora_mp_ddp/infer.sh
@@ -9,6 +9,6 @@ python llm_infer.py \
     --max_new_tokens 2048 \
     --temperature 0.1 \
     --top_p 0.7 \
-    --repetition_penalty 1.05 \
+    --repetition_penalty 1. \
     --do_sample true \
     --merge_lora_and_save false \
diff --git a/examples/pytorch/llm/scripts/mixtral_moe_7b/lora/infer.sh b/examples/pytorch/llm/scripts/mixtral_moe_7b/lora/infer.sh
index 778db911ee..4cde058812 100644
--- a/examples/pytorch/llm/scripts/mixtral_moe_7b/lora/infer.sh
+++ b/examples/pytorch/llm/scripts/mixtral_moe_7b/lora/infer.sh
@@ -10,6 +10,6 @@ python llm_infer.py \
     --max_new_tokens 2048 \
     --temperature 0.5 \
     --top_p 0.7 \
-    --repetition_penalty 1.05 \
+    --repetition_penalty 1. \
     --do_sample true \
     --merge_lora_and_save false \
diff --git a/examples/pytorch/llm/scripts/mixtral_moe_7b_instruct/lora/infer.sh b/examples/pytorch/llm/scripts/mixtral_moe_7b_instruct/lora/infer.sh
index 40e0e35233..bb4aa97fbd 100644
--- a/examples/pytorch/llm/scripts/mixtral_moe_7b_instruct/lora/infer.sh
+++ b/examples/pytorch/llm/scripts/mixtral_moe_7b_instruct/lora/infer.sh
@@ -10,6 +10,6 @@ python llm_infer.py \
     --max_new_tokens 2048 \
     --temperature 0.5 \
     --top_p 0.7 \
-    --repetition_penalty 1.05 \
+    --repetition_penalty 1. \
     --do_sample true \
     --merge_lora_and_save false \
diff --git a/examples/pytorch/llm/scripts/openbuddy_llama2_13b_chat/qlora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/openbuddy_llama2_13b_chat/qlora_ddp_ds/infer.sh
index 59e362067e..4d64d8da14 100644
--- a/examples/pytorch/llm/scripts/openbuddy_llama2_13b_chat/qlora_ddp_ds/infer.sh
+++ b/examples/pytorch/llm/scripts/openbuddy_llama2_13b_chat/qlora_ddp_ds/infer.sh
@@ -8,6 +8,6 @@ python llm_infer.py \
     --max_new_tokens 2048 \
     --temperature 0.1 \
     --top_p 0.7 \
-    --repetition_penalty 1.05 \
+    --repetition_penalty 1. \
     --do_sample true \
     --merge_lora_and_save false \
diff --git a/examples/pytorch/llm/scripts/openbuddy_llama2_70b_chat/qlora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/openbuddy_llama2_70b_chat/qlora_ddp_ds/infer.sh
index 59c296ff3d..c07108e084 100644
--- a/examples/pytorch/llm/scripts/openbuddy_llama2_70b_chat/qlora_ddp_ds/infer.sh
+++ b/examples/pytorch/llm/scripts/openbuddy_llama2_70b_chat/qlora_ddp_ds/infer.sh
@@ -8,6 +8,6 @@ python llm_infer.py \
     --max_new_tokens 2048 \
     --temperature 0.1 \
     --top_p 0.7 \
-    --repetition_penalty 1.05 \
+    --repetition_penalty 1. \
     --do_sample true \
     --merge_lora_and_save false \
diff --git a/examples/pytorch/llm/scripts/openbuddy_llama2_70b_chat/qlora_mp/infer.sh b/examples/pytorch/llm/scripts/openbuddy_llama2_70b_chat/qlora_mp/infer.sh
index c969a9fd86..1d9b8428a4 100644
--- a/examples/pytorch/llm/scripts/openbuddy_llama2_70b_chat/qlora_mp/infer.sh
+++ b/examples/pytorch/llm/scripts/openbuddy_llama2_70b_chat/qlora_mp/infer.sh
@@ -8,6 +8,6 @@ python llm_infer.py \
     --max_new_tokens 2048 \
     --temperature 0.1 \
     --top_p 0.7 \
-    --repetition_penalty 1.05 \
+    --repetition_penalty 1. \
     --do_sample true \
     --merge_lora_and_save false \
diff --git a/examples/pytorch/llm/scripts/openbuddy_mistral_7b_chat/lora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/openbuddy_mistral_7b_chat/lora_ddp_ds/infer.sh
index 3f2bd3b222..f9955768ab 100644
--- a/examples/pytorch/llm/scripts/openbuddy_mistral_7b_chat/lora_ddp_ds/infer.sh
+++ b/examples/pytorch/llm/scripts/openbuddy_mistral_7b_chat/lora_ddp_ds/infer.sh
@@ -9,6 +9,6 @@ python llm_infer.py \
     --max_new_tokens 2048 \
     --temperature 0.1 \
     --top_p 0.7 \
-    --repetition_penalty 1.05 \
+    --repetition_penalty 1. \
     --do_sample true \
     --merge_lora_and_save false \
diff --git a/examples/pytorch/llm/scripts/openbuddy_mistral_7b_chat/lora_mp_ddp/infer.sh b/examples/pytorch/llm/scripts/openbuddy_mistral_7b_chat/lora_mp_ddp/infer.sh
index a83cc3d32b..93987c909b 100644
--- a/examples/pytorch/llm/scripts/openbuddy_mistral_7b_chat/lora_mp_ddp/infer.sh
+++ b/examples/pytorch/llm/scripts/openbuddy_mistral_7b_chat/lora_mp_ddp/infer.sh
@@ -9,6 +9,6 @@ python llm_infer.py \
     --max_new_tokens 2048 \
     --temperature 0.1 \
     --top_p 0.7 \
-    --repetition_penalty 1.05 \
+    --repetition_penalty 1. \
     --do_sample true \
     --merge_lora_and_save false \
diff --git a/examples/pytorch/llm/scripts/phi2_3b/lora/infer.sh b/examples/pytorch/llm/scripts/phi2_3b/lora/infer.sh
index 9cdaf3781f..96d9519795 100644
--- a/examples/pytorch/llm/scripts/phi2_3b/lora/infer.sh
+++ b/examples/pytorch/llm/scripts/phi2_3b/lora/infer.sh
@@ -9,5 +9,5 @@ swift infer \
     --max_new_tokens 2048 \
     --temperature 0.1 \
     --top_p 0.7 \
-    --repetition_penalty 1.05 \
+    --repetition_penalty 1. \
     --merge_lora_and_save false \
diff --git a/examples/pytorch/llm/scripts/polylm_13b/qlora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/polylm_13b/qlora_ddp_ds/infer.sh
index f79c618fed..73f78f9917 100644
--- a/examples/pytorch/llm/scripts/polylm_13b/qlora_ddp_ds/infer.sh
+++ b/examples/pytorch/llm/scripts/polylm_13b/qlora_ddp_ds/infer.sh
@@ -8,6 +8,6 @@ python llm_infer.py \
     --max_new_tokens 2048 \
     --temperature 0.7 \
     --top_p 0.7 \
-    --repetition_penalty 1.05 \
+    --repetition_penalty 1. \
     --do_sample true \
     --merge_lora_and_save false \
diff --git a/examples/pytorch/llm/scripts/qwen_14b/lora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/qwen_14b/lora_ddp_ds/infer.sh
index 8611f4a1aa..adf11c3f94 100644
--- a/examples/pytorch/llm/scripts/qwen_14b/lora_ddp_ds/infer.sh
+++ b/examples/pytorch/llm/scripts/qwen_14b/lora_ddp_ds/infer.sh
@@ -10,6 +10,6 @@ python llm_infer.py \
     --max_new_tokens 2048 \
     --temperature 0.7 \
     --top_p 0.7 \
-    --repetition_penalty 1.05 \
+    --repetition_penalty 1. \
     --do_sample true \
     --merge_lora_and_save false \
diff --git a/examples/pytorch/llm/scripts/qwen_14b/qlora/infer.sh b/examples/pytorch/llm/scripts/qwen_14b/qlora/infer.sh
index cd4bf06988..fb3faf6daf 100644
--- a/examples/pytorch/llm/scripts/qwen_14b/qlora/infer.sh
+++ b/examples/pytorch/llm/scripts/qwen_14b/qlora/infer.sh
@@ -9,6 +9,6 @@ python llm_infer.py \
     --max_new_tokens 2048 \
     --temperature 0.7 \
     --top_p 0.7 \
-    --repetition_penalty 1.05 \
+    --repetition_penalty 1. \
     --do_sample true \
     --merge_lora_and_save false \
diff --git a/examples/pytorch/llm/scripts/qwen_14b/qlora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/qwen_14b/qlora_ddp_ds/infer.sh
index cd4bf06988..fb3faf6daf 100644
--- a/examples/pytorch/llm/scripts/qwen_14b/qlora_ddp_ds/infer.sh
+++ b/examples/pytorch/llm/scripts/qwen_14b/qlora_ddp_ds/infer.sh
@@ -9,6 +9,6 @@ python llm_infer.py \
     --max_new_tokens 2048 \
     --temperature 0.7 \
     --top_p 0.7 \
-    --repetition_penalty 1.05 \
+    --repetition_penalty 1. \
     --do_sample true \
     --merge_lora_and_save false \
diff --git a/examples/pytorch/llm/scripts/qwen_14b_chat/lora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/qwen_14b_chat/lora_ddp_ds/infer.sh
index e47f7b290a..c9b256ccd3 100644
--- a/examples/pytorch/llm/scripts/qwen_14b_chat/lora_ddp_ds/infer.sh
+++ b/examples/pytorch/llm/scripts/qwen_14b_chat/lora_ddp_ds/infer.sh
@@ -10,6 +10,6 @@ python llm_infer.py \
     --max_new_tokens 2048 \
     --temperature 0.1 \
     --top_p 0.7 \
-    --repetition_penalty 1.05 \
+    --repetition_penalty 1. \
     --do_sample true \
     --merge_lora_and_save false \
diff --git a/examples/pytorch/llm/scripts/qwen_14b_chat/qlora/infer.sh b/examples/pytorch/llm/scripts/qwen_14b_chat/qlora/infer.sh
index e389717d93..10ffddf411 100644
--- a/examples/pytorch/llm/scripts/qwen_14b_chat/qlora/infer.sh
+++ b/examples/pytorch/llm/scripts/qwen_14b_chat/qlora/infer.sh
@@ -9,6 +9,6 @@ python llm_infer.py \
     --max_new_tokens 2048 \
     --temperature 0.1 \
     --top_p 0.7 \
-    --repetition_penalty 1.05 \
+    --repetition_penalty 1. \
     --do_sample true \
     --merge_lora_and_save false \
diff --git a/examples/pytorch/llm/scripts/qwen_14b_chat/qlora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/qwen_14b_chat/qlora_ddp_ds/infer.sh
index 54812b02eb..d7b21771dd 100644
--- a/examples/pytorch/llm/scripts/qwen_14b_chat/qlora_ddp_ds/infer.sh
+++ b/examples/pytorch/llm/scripts/qwen_14b_chat/qlora_ddp_ds/infer.sh
@@ -9,6 +9,6 @@ python llm_infer.py \
     --max_new_tokens 2048 \
     --temperature 0.1 \
     --top_p 0.7 \
-    --repetition_penalty 1.05 \
+    --repetition_penalty 1. \
     --do_sample true \
     --merge_lora_and_save false \
diff --git a/examples/pytorch/llm/scripts/qwen_14b_chat_int4/qlora/infer.sh b/examples/pytorch/llm/scripts/qwen_14b_chat_int4/qlora/infer.sh
index faaadaf77f..2b47269429 100644
--- a/examples/pytorch/llm/scripts/qwen_14b_chat_int4/qlora/infer.sh
+++ b/examples/pytorch/llm/scripts/qwen_14b_chat_int4/qlora/infer.sh
@@ -9,6 +9,6 @@ python llm_infer.py \
     --max_new_tokens 2048 \
     --temperature 0.1 \
     --top_p 0.7 \
-    --repetition_penalty 1.05 \
+    --repetition_penalty 1. \
     --do_sample true \
     --merge_lora_and_save false \
diff --git a/examples/pytorch/llm/scripts/qwen_14b_chat_int4/qlora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/qwen_14b_chat_int4/qlora_ddp_ds/infer.sh
index faaadaf77f..2b47269429 100644
--- a/examples/pytorch/llm/scripts/qwen_14b_chat_int4/qlora_ddp_ds/infer.sh
+++ b/examples/pytorch/llm/scripts/qwen_14b_chat_int4/qlora_ddp_ds/infer.sh
@@ -9,6 +9,6 @@ python llm_infer.py \
     --max_new_tokens 2048 \
     --temperature 0.1 \
     --top_p 0.7 \
-    --repetition_penalty 1.05 \
+    --repetition_penalty 1. \
     --do_sample true \
     --merge_lora_and_save false \
diff --git a/examples/pytorch/llm/scripts/qwen_14b_chat_int8/qlora/infer.sh b/examples/pytorch/llm/scripts/qwen_14b_chat_int8/qlora/infer.sh
index aa5f498938..ecdc526ee4 100644
--- a/examples/pytorch/llm/scripts/qwen_14b_chat_int8/qlora/infer.sh
+++ b/examples/pytorch/llm/scripts/qwen_14b_chat_int8/qlora/infer.sh
@@ -9,6 +9,6 @@ python llm_infer.py \
     --max_new_tokens 2048 \
     --temperature 0.1 \
     --top_p 0.7 \
-    --repetition_penalty 1.05 \
+    --repetition_penalty 1. \
     --do_sample true \
     --merge_lora_and_save false \
diff --git a/examples/pytorch/llm/scripts/qwen_14b_chat_int8/qlora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/qwen_14b_chat_int8/qlora_ddp_ds/infer.sh
index aaf5d12d51..d54fb6652a 100644
--- a/examples/pytorch/llm/scripts/qwen_14b_chat_int8/qlora_ddp_ds/infer.sh
+++ b/examples/pytorch/llm/scripts/qwen_14b_chat_int8/qlora_ddp_ds/infer.sh
@@ -9,6 +9,6 @@ python llm_infer.py \
     --max_new_tokens 2048 \
     --temperature 0.7 \
     --top_p 0.7 \
-    --repetition_penalty 1.05 \
+    --repetition_penalty 1. \
     --do_sample true \
     --merge_lora_and_save false \
diff --git a/examples/pytorch/llm/scripts/qwen_1_8b_chat/full/infer.sh b/examples/pytorch/llm/scripts/qwen_1_8b_chat/full/infer.sh
index 1734872cca..b8254f0844 100644
--- a/examples/pytorch/llm/scripts/qwen_1_8b_chat/full/infer.sh
+++ b/examples/pytorch/llm/scripts/qwen_1_8b_chat/full/infer.sh
@@ -9,5 +9,5 @@ python llm_infer.py \
     --max_new_tokens 2048 \
     --temperature 0.1 \
     --top_p 0.7 \
-    --repetition_penalty 1.05 \
+    --repetition_penalty 1. \
     --do_sample true \
diff --git a/examples/pytorch/llm/scripts/qwen_1_8b_chat/full_ddp/infer.sh b/examples/pytorch/llm/scripts/qwen_1_8b_chat/full_ddp/infer.sh
index 0becfe75c8..481ff02c82 100644
--- a/examples/pytorch/llm/scripts/qwen_1_8b_chat/full_ddp/infer.sh
+++ b/examples/pytorch/llm/scripts/qwen_1_8b_chat/full_ddp/infer.sh
@@ -9,5 +9,5 @@ python llm_infer.py \
     --max_new_tokens 2048 \
     --temperature 0.1 \
     --top_p 0.7 \
-    --repetition_penalty 1.05 \
+    --repetition_penalty 1. \
     --do_sample true \
diff --git a/examples/pytorch/llm/scripts/qwen_72b_chat/lora_mp/infer.sh b/examples/pytorch/llm/scripts/qwen_72b_chat/lora_mp/infer.sh
index 7d229d1aef..c51075b6d2 100644
--- a/examples/pytorch/llm/scripts/qwen_72b_chat/lora_mp/infer.sh
+++ b/examples/pytorch/llm/scripts/qwen_72b_chat/lora_mp/infer.sh
@@ -9,6 +9,6 @@ python llm_infer.py \
     --max_new_tokens 2048 \
     --temperature 0.9 \
     --top_p 0.9 \
-    --repetition_penalty 1.05 \
+    --repetition_penalty 1. \
     --do_sample true \
     --merge_lora_and_save false \
diff --git a/examples/pytorch/llm/scripts/qwen_72b_chat/lora_mp_ddp/infer.sh b/examples/pytorch/llm/scripts/qwen_72b_chat/lora_mp_ddp/infer.sh
index 7d229d1aef..c51075b6d2 100644
--- a/examples/pytorch/llm/scripts/qwen_72b_chat/lora_mp_ddp/infer.sh
+++ b/examples/pytorch/llm/scripts/qwen_72b_chat/lora_mp_ddp/infer.sh
@@ -9,6 +9,6 @@ python llm_infer.py \
     --max_new_tokens 2048 \
     --temperature 0.9 \
     --top_p 0.9 \
-    --repetition_penalty 1.05 \
+    --repetition_penalty 1. \
     --do_sample true \
     --merge_lora_and_save false \
diff --git a/examples/pytorch/llm/scripts/qwen_72b_chat/qlora/infer.sh b/examples/pytorch/llm/scripts/qwen_72b_chat/qlora/infer.sh
index 73b9dca145..d52bbc2c90 100644
--- a/examples/pytorch/llm/scripts/qwen_72b_chat/qlora/infer.sh
+++ b/examples/pytorch/llm/scripts/qwen_72b_chat/qlora/infer.sh
@@ -9,6 +9,6 @@ python llm_infer.py \
     --max_new_tokens 2048 \
     --temperature 0.9 \
     --top_p 0.9 \
-    --repetition_penalty 1.05 \
+    --repetition_penalty 1. \
     --do_sample true \
     --merge_lora_and_save false \
diff --git a/examples/pytorch/llm/scripts/qwen_72b_chat_int4/qlora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/qwen_72b_chat_int4/qlora_ddp_ds/infer.sh
index 14125f064c..60bb854865 100644
--- a/examples/pytorch/llm/scripts/qwen_72b_chat_int4/qlora_ddp_ds/infer.sh
+++ b/examples/pytorch/llm/scripts/qwen_72b_chat_int4/qlora_ddp_ds/infer.sh
@@ -9,6 +9,6 @@ python llm_infer.py \
     --max_new_tokens 2048 \
     --temperature 0.1 \
     --top_p 0.7 \
-    --repetition_penalty 1.05 \
+    --repetition_penalty 1. \
     --do_sample true \
     --merge_lora_and_save false \
diff --git a/examples/pytorch/llm/scripts/qwen_72b_chat_int8/qlora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/qwen_72b_chat_int8/qlora_ddp_ds/infer.sh
index f361526e56..28f59e8e8e 100644
--- a/examples/pytorch/llm/scripts/qwen_72b_chat_int8/qlora_ddp_ds/infer.sh
+++ b/examples/pytorch/llm/scripts/qwen_72b_chat_int8/qlora_ddp_ds/infer.sh
@@ -9,6 +9,6 @@ python llm_infer.py \
     --max_new_tokens 2048 \
     --temperature 0.1 \
     --top_p 0.7 \
-    --repetition_penalty 1.05 \
+    --repetition_penalty 1. \
     --do_sample true \
     --merge_lora_and_save false \
diff --git a/examples/pytorch/llm/scripts/qwen_7b/lora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/qwen_7b/lora_ddp_ds/infer.sh
index fdb02de463..6251eb2a98 100644
--- a/examples/pytorch/llm/scripts/qwen_7b/lora_ddp_ds/infer.sh
+++ b/examples/pytorch/llm/scripts/qwen_7b/lora_ddp_ds/infer.sh
@@ -10,6 +10,6 @@ python llm_infer.py \
     --max_new_tokens 2048 \
     --temperature 0.7 \
     --top_p 0.7 \
-    --repetition_penalty 1.05 \
+    --repetition_penalty 1. \
     --do_sample true \
     --merge_lora_and_save false \
diff --git a/examples/pytorch/llm/scripts/qwen_7b/qlora_ddp/infer.sh b/examples/pytorch/llm/scripts/qwen_7b/qlora_ddp/infer.sh
index 6e836c39a3..1935c2e993 100644
--- a/examples/pytorch/llm/scripts/qwen_7b/qlora_ddp/infer.sh
+++ b/examples/pytorch/llm/scripts/qwen_7b/qlora_ddp/infer.sh
@@ -9,6 +9,6 @@ python llm_infer.py \
     --max_new_tokens 2048 \
     --temperature 0.7 \
     --top_p 0.7 \
-    --repetition_penalty 1.05 \
+    --repetition_penalty 1. \
     --do_sample true \
     --merge_lora_and_save false \
diff --git a/examples/pytorch/llm/scripts/qwen_7b_chat/full_mp/infer.sh b/examples/pytorch/llm/scripts/qwen_7b_chat/full_mp/infer.sh
index cc5b4ec595..f615439ed4 100644
--- a/examples/pytorch/llm/scripts/qwen_7b_chat/full_mp/infer.sh
+++ b/examples/pytorch/llm/scripts/qwen_7b_chat/full_mp/infer.sh
@@ -9,5 +9,5 @@ python llm_infer.py \
     --max_new_tokens 2048 \
     --temperature 0.1 \
     --top_p 0.7 \
-    --repetition_penalty 1.05 \
+    --repetition_penalty 1. \
     --do_sample true \
diff --git a/examples/pytorch/llm/scripts/qwen_7b_chat/full_mp_ddp/infer.sh b/examples/pytorch/llm/scripts/qwen_7b_chat/full_mp_ddp/infer.sh
index 28bc12bbde..60e9147fcb 100644
--- a/examples/pytorch/llm/scripts/qwen_7b_chat/full_mp_ddp/infer.sh
+++ b/examples/pytorch/llm/scripts/qwen_7b_chat/full_mp_ddp/infer.sh
@@ -9,5 +9,5 @@ python llm_infer.py \
     --max_new_tokens 2048 \
     --temperature 0.7 \
     --top_p 0.7 \
-    --repetition_penalty 1.05 \
+    --repetition_penalty 1. \
     --do_sample true \
diff --git a/examples/pytorch/llm/scripts/qwen_7b_chat/lora/infer.sh b/examples/pytorch/llm/scripts/qwen_7b_chat/lora/infer.sh
index fcc6efc665..92447c3498 100644
--- a/examples/pytorch/llm/scripts/qwen_7b_chat/lora/infer.sh
+++ b/examples/pytorch/llm/scripts/qwen_7b_chat/lora/infer.sh
@@ -10,6 +10,6 @@ python llm_infer.py \
     --max_new_tokens 2048 \
     --temperature 0.1 \
     --top_p 0.7 \
-    --repetition_penalty 1.05 \
+    --repetition_penalty 1. \
     --do_sample true \
     --merge_lora_and_save false \
diff --git a/examples/pytorch/llm/scripts/qwen_7b_chat/lora_ddp/infer.sh b/examples/pytorch/llm/scripts/qwen_7b_chat/lora_ddp/infer.sh
index d59e5e93d6..a6705f0353 100644
--- a/examples/pytorch/llm/scripts/qwen_7b_chat/lora_ddp/infer.sh
+++ b/examples/pytorch/llm/scripts/qwen_7b_chat/lora_ddp/infer.sh
@@ -10,6 +10,6 @@ python llm_infer.py \
     --max_new_tokens 2048 \
     --temperature 0.1 \
     --top_p 0.7 \
-    --repetition_penalty 1.05 \
+    --repetition_penalty 1. \
     --do_sample true \
     --merge_lora_and_save false \
diff --git a/examples/pytorch/llm/scripts/qwen_7b_chat/lora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/qwen_7b_chat/lora_ddp_ds/infer.sh
index d59e5e93d6..a6705f0353 100644
--- a/examples/pytorch/llm/scripts/qwen_7b_chat/lora_ddp_ds/infer.sh
+++ b/examples/pytorch/llm/scripts/qwen_7b_chat/lora_ddp_ds/infer.sh
@@ -10,6 +10,6 @@ python llm_infer.py \
     --max_new_tokens 2048 \
     --temperature 0.1 \
     --top_p 0.7 \
-    --repetition_penalty 1.05 \
+    --repetition_penalty 1. \
     --do_sample true \
     --merge_lora_and_save false \
diff --git a/examples/pytorch/llm/scripts/qwen_7b_chat/lora_mp_ddp/infer.sh b/examples/pytorch/llm/scripts/qwen_7b_chat/lora_mp_ddp/infer.sh
index 7ea7f1fb2c..271ac73e4e 100644
--- a/examples/pytorch/llm/scripts/qwen_7b_chat/lora_mp_ddp/infer.sh
+++ b/examples/pytorch/llm/scripts/qwen_7b_chat/lora_mp_ddp/infer.sh
@@ -10,6 +10,6 @@ python llm_infer.py \
     --max_new_tokens 2048 \
     --temperature 0.1 \
     --top_p 0.7 \
-    --repetition_penalty 1.05 \
+    --repetition_penalty 1. \
     --do_sample true \
     --merge_lora_and_save false \
diff --git a/examples/pytorch/llm/scripts/qwen_7b_chat/qlora/infer.sh b/examples/pytorch/llm/scripts/qwen_7b_chat/qlora/infer.sh
index 443dac4471..ec11cbd758 100644
--- a/examples/pytorch/llm/scripts/qwen_7b_chat/qlora/infer.sh
+++ b/examples/pytorch/llm/scripts/qwen_7b_chat/qlora/infer.sh
@@ -9,6 +9,6 @@ python llm_infer.py \
     --max_new_tokens 2048 \
     --temperature 0.1 \
     --top_p 0.7 \
-    --repetition_penalty 1.05 \
+    --repetition_penalty 1. \
     --do_sample true \
     --merge_lora_and_save false \
diff --git a/examples/pytorch/llm/scripts/qwen_7b_chat/qlora_ddp/infer.sh b/examples/pytorch/llm/scripts/qwen_7b_chat/qlora_ddp/infer.sh
index 9cba806829..bd8c2ff0e5 100644
--- a/examples/pytorch/llm/scripts/qwen_7b_chat/qlora_ddp/infer.sh
+++ b/examples/pytorch/llm/scripts/qwen_7b_chat/qlora_ddp/infer.sh
@@ -9,6 +9,6 @@ python llm_infer.py \
     --max_new_tokens 2048 \
     --temperature 0.1 \
     --top_p 0.7 \
-    --repetition_penalty 1.05 \
+    --repetition_penalty 1. \
     --do_sample true \
     --merge_lora_and_save false \
diff --git a/examples/pytorch/llm/scripts/qwen_7b_chat/qlora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/qwen_7b_chat/qlora_ddp_ds/infer.sh
index 9cba806829..bd8c2ff0e5 100644
--- a/examples/pytorch/llm/scripts/qwen_7b_chat/qlora_ddp_ds/infer.sh
+++ b/examples/pytorch/llm/scripts/qwen_7b_chat/qlora_ddp_ds/infer.sh
@@ -9,6 +9,6 @@ python llm_infer.py \
     --max_new_tokens 2048 \
     --temperature 0.1 \
     --top_p 0.7 \
-    --repetition_penalty 1.05 \
+    --repetition_penalty 1. \
     --do_sample true \
     --merge_lora_and_save false \
diff --git a/examples/pytorch/llm/scripts/qwen_7b_chat_int4/qalora/infer.sh b/examples/pytorch/llm/scripts/qwen_7b_chat_int4/qalora/infer.sh
index 131d7efd4c..e1cc03e047 100644
--- a/examples/pytorch/llm/scripts/qwen_7b_chat_int4/qalora/infer.sh
+++ b/examples/pytorch/llm/scripts/qwen_7b_chat_int4/qalora/infer.sh
@@ -9,6 +9,6 @@ python llm_infer.py \
     --max_new_tokens 2048 \
     --temperature 0.1 \
     --top_p 0.7 \
-    --repetition_penalty 1.05 \
+    --repetition_penalty 1. \
     --do_sample true \
     --merge_lora_and_save false \
diff --git a/examples/pytorch/llm/scripts/qwen_7b_chat_int4/qlora/infer.sh b/examples/pytorch/llm/scripts/qwen_7b_chat_int4/qlora/infer.sh
index 131d7efd4c..e1cc03e047 100644
--- a/examples/pytorch/llm/scripts/qwen_7b_chat_int4/qlora/infer.sh
+++ b/examples/pytorch/llm/scripts/qwen_7b_chat_int4/qlora/infer.sh
@@ -9,6 +9,6 @@ python llm_infer.py \
     --max_new_tokens 2048 \
     --temperature 0.1 \
     --top_p 0.7 \
-    --repetition_penalty 1.05 \
+    --repetition_penalty 1. \
     --do_sample true \
     --merge_lora_and_save false \
diff --git a/examples/pytorch/llm/scripts/qwen_7b_chat_int4/qlora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/qwen_7b_chat_int4/qlora_ddp_ds/infer.sh
index 131d7efd4c..e1cc03e047 100644
--- a/examples/pytorch/llm/scripts/qwen_7b_chat_int4/qlora_ddp_ds/infer.sh
+++ b/examples/pytorch/llm/scripts/qwen_7b_chat_int4/qlora_ddp_ds/infer.sh
@@ -9,6 +9,6 @@ python llm_infer.py \
     --max_new_tokens 2048 \
     --temperature 0.1 \
     --top_p 0.7 \
-    --repetition_penalty 1.05 \
+    --repetition_penalty 1. \
     --do_sample true \
     --merge_lora_and_save false \
diff --git a/examples/pytorch/llm/scripts/qwen_7b_chat_int8/qlora/infer.sh b/examples/pytorch/llm/scripts/qwen_7b_chat_int8/qlora/infer.sh
index 4c44b5e8a1..4c2bc9e217 100644
--- a/examples/pytorch/llm/scripts/qwen_7b_chat_int8/qlora/infer.sh
+++ b/examples/pytorch/llm/scripts/qwen_7b_chat_int8/qlora/infer.sh
@@ -9,6 +9,6 @@ python llm_infer.py \
     --max_new_tokens 2048 \
     --temperature 0.1 \
     --top_p 0.7 \
-    --repetition_penalty 1.05 \
+    --repetition_penalty 1. \
     --do_sample true \
     --merge_lora_and_save false \
diff --git a/examples/pytorch/llm/scripts/qwen_7b_chat_int8/qlora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/qwen_7b_chat_int8/qlora_ddp_ds/infer.sh
index 4c44b5e8a1..4c2bc9e217 100644
--- a/examples/pytorch/llm/scripts/qwen_7b_chat_int8/qlora_ddp_ds/infer.sh
+++ b/examples/pytorch/llm/scripts/qwen_7b_chat_int8/qlora_ddp_ds/infer.sh
@@ -9,6 +9,6 @@ python llm_infer.py \
     --max_new_tokens 2048 \
     --temperature 0.1 \
     --top_p 0.7 \
-    --repetition_penalty 1.05 \
+    --repetition_penalty 1. \
     --do_sample true \
     --merge_lora_and_save false \
diff --git a/examples/pytorch/llm/scripts/qwen_audio_chat/lora/infer.sh b/examples/pytorch/llm/scripts/qwen_audio_chat/lora/infer.sh
index 708f5613af..d0e9821cc5 100644
--- a/examples/pytorch/llm/scripts/qwen_audio_chat/lora/infer.sh
+++ b/examples/pytorch/llm/scripts/qwen_audio_chat/lora/infer.sh
@@ -9,6 +9,6 @@ python llm_infer.py \
     --max_new_tokens 2048 \
     --temperature 0.3 \
     --top_p 0.7 \
-    --repetition_penalty 1.05 \
+    --repetition_penalty 1. \
     --do_sample true \
     --merge_lora_and_save false \
diff --git a/examples/pytorch/llm/scripts/qwen_audio_chat/lora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/qwen_audio_chat/lora_ddp_ds/infer.sh
index 19b86b3b58..10cbb58571 100644
--- a/examples/pytorch/llm/scripts/qwen_audio_chat/lora_ddp_ds/infer.sh
+++ b/examples/pytorch/llm/scripts/qwen_audio_chat/lora_ddp_ds/infer.sh
@@ -9,6 +9,6 @@ python llm_infer.py \
     --max_new_tokens 2048 \
     --temperature 0.3 \
     --top_p 0.7 \
-    --repetition_penalty 1.05 \
+    --repetition_penalty 1. \
     --do_sample true \
     --merge_lora_and_save false \
diff --git a/examples/pytorch/llm/scripts/qwen_vl/lora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/qwen_vl/lora_ddp_ds/infer.sh
index 5024de47f8..968a19579d 100644
--- a/examples/pytorch/llm/scripts/qwen_vl/lora_ddp_ds/infer.sh
+++ b/examples/pytorch/llm/scripts/qwen_vl/lora_ddp_ds/infer.sh
@@ -9,6 +9,6 @@ python llm_infer.py \
     --max_new_tokens 2048 \
     --temperature 0.7 \
     --top_p 0.7 \
-    --repetition_penalty 1.05 \
+    --repetition_penalty 1. \
     --do_sample true \
     --merge_lora_and_save false \
diff --git a/examples/pytorch/llm/scripts/qwen_vl_chat/lora/infer.sh b/examples/pytorch/llm/scripts/qwen_vl_chat/lora/infer.sh
index 8c0fcc97f2..0ac50cbac7 100644
--- a/examples/pytorch/llm/scripts/qwen_vl_chat/lora/infer.sh
+++ b/examples/pytorch/llm/scripts/qwen_vl_chat/lora/infer.sh
@@ -9,6 +9,6 @@ python llm_infer.py \
     --max_new_tokens 2048 \
     --temperature 0.7 \
     --top_p 0.7 \
-    --repetition_penalty 1.05 \
+    --repetition_penalty 1. \
     --do_sample true \
     --merge_lora_and_save false \
diff --git a/examples/pytorch/llm/scripts/qwen_vl_chat/lora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/qwen_vl_chat/lora_ddp_ds/infer.sh
index d2dc0500f0..c21c0faf0c 100644
--- a/examples/pytorch/llm/scripts/qwen_vl_chat/lora_ddp_ds/infer.sh
+++ b/examples/pytorch/llm/scripts/qwen_vl_chat/lora_ddp_ds/infer.sh
@@ -9,6 +9,6 @@ python llm_infer.py \
     --max_new_tokens 2048 \
     --temperature 0.7 \
     --top_p 0.7 \
-    --repetition_penalty 1.05 \
+    --repetition_penalty 1. \
     --do_sample true \
     --merge_lora_and_save false \
diff --git a/examples/pytorch/llm/scripts/qwen_vl_chat/qlora/infer.sh b/examples/pytorch/llm/scripts/qwen_vl_chat/qlora/infer.sh
index d2dc0500f0..c21c0faf0c 100644
--- a/examples/pytorch/llm/scripts/qwen_vl_chat/qlora/infer.sh
+++ b/examples/pytorch/llm/scripts/qwen_vl_chat/qlora/infer.sh
@@ -9,6 +9,6 @@ python llm_infer.py \
     --max_new_tokens 2048 \
     --temperature 0.7 \
     --top_p 0.7 \
-    --repetition_penalty 1.05 \
+    --repetition_penalty 1. \
     --do_sample true \
     --merge_lora_and_save false \
diff --git a/examples/pytorch/llm/scripts/qwen_vl_chat_int4/qlora/infer.sh b/examples/pytorch/llm/scripts/qwen_vl_chat_int4/qlora/infer.sh
index e6d11aff1c..5037bb6966 100644
--- a/examples/pytorch/llm/scripts/qwen_vl_chat_int4/qlora/infer.sh
+++ b/examples/pytorch/llm/scripts/qwen_vl_chat_int4/qlora/infer.sh
@@ -9,6 +9,6 @@ python llm_infer.py \
     --max_new_tokens 2048 \
     --temperature 0.7 \
     --top_p 0.7 \
-    --repetition_penalty 1.05 \
+    --repetition_penalty 1. \
     --do_sample true \
     --merge_lora_and_save false \
diff --git a/examples/pytorch/llm/scripts/qwen_vl_chat_int4/qlora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/qwen_vl_chat_int4/qlora_ddp_ds/infer.sh
index e6d11aff1c..5037bb6966 100644
--- a/examples/pytorch/llm/scripts/qwen_vl_chat_int4/qlora_ddp_ds/infer.sh
+++ b/examples/pytorch/llm/scripts/qwen_vl_chat_int4/qlora_ddp_ds/infer.sh
@@ -9,6 +9,6 @@ python llm_infer.py \
     --max_new_tokens 2048 \
     --temperature 0.7 \
     --top_p 0.7 \
-    --repetition_penalty 1.05 \
+    --repetition_penalty 1. \
     --do_sample true \
     --merge_lora_and_save false \
diff --git a/examples/pytorch/llm/scripts/seqgpt_560m/full/infer.sh b/examples/pytorch/llm/scripts/seqgpt_560m/full/infer.sh
index 741f7324a2..095128a9be 100644
--- a/examples/pytorch/llm/scripts/seqgpt_560m/full/infer.sh
+++ b/examples/pytorch/llm/scripts/seqgpt_560m/full/infer.sh
@@ -8,5 +8,5 @@ python llm_infer.py \
     --max_new_tokens 2048 \
     --temperature 0.3 \
     --top_p 0.7 \
-    --repetition_penalty 1.05 \
+    --repetition_penalty 1. \
     --do_sample true \
diff --git a/examples/pytorch/llm/scripts/seqgpt_560m/full_ddp/infer.sh b/examples/pytorch/llm/scripts/seqgpt_560m/full_ddp/infer.sh
index 741f7324a2..095128a9be 100644
--- a/examples/pytorch/llm/scripts/seqgpt_560m/full_ddp/infer.sh
+++ b/examples/pytorch/llm/scripts/seqgpt_560m/full_ddp/infer.sh
@@ -8,5 +8,5 @@ python llm_infer.py \
     --max_new_tokens 2048 \
     --temperature 0.3 \
     --top_p 0.7 \
-    --repetition_penalty 1.05 \
+    --repetition_penalty 1. \
     --do_sample true \
diff --git a/examples/pytorch/llm/scripts/skywork_13b/qlora/infer.sh b/examples/pytorch/llm/scripts/skywork_13b/qlora/infer.sh
index 625d99f65a..c2066a1aba 100644
--- a/examples/pytorch/llm/scripts/skywork_13b/qlora/infer.sh
+++ b/examples/pytorch/llm/scripts/skywork_13b/qlora/infer.sh
@@ -8,6 +8,6 @@ python llm_infer.py \
     --max_new_tokens 2048 \
     --temperature 0.7 \
     --top_p 0.7 \
-    --repetition_penalty 1.05 \
+    --repetition_penalty 1. \
     --do_sample true \
     --merge_lora_and_save false \
diff --git a/examples/pytorch/llm/scripts/tongyi_finance_14b_chat_int4/qlora/infer.sh b/examples/pytorch/llm/scripts/tongyi_finance_14b_chat_int4/qlora/infer.sh
index ae2595cc81..ad78c30eaa 100644
--- a/examples/pytorch/llm/scripts/tongyi_finance_14b_chat_int4/qlora/infer.sh
+++ b/examples/pytorch/llm/scripts/tongyi_finance_14b_chat_int4/qlora/infer.sh
@@ -9,6 +9,6 @@ python llm_infer.py \
     --max_new_tokens 2048 \
     --temperature 0.3 \
     --top_p 0.7 \
-    --repetition_penalty 1.05 \
+    --repetition_penalty 1. \
     --do_sample true \
     --merge_lora_and_save false \
diff --git a/examples/pytorch/llm/scripts/xverse_13b/qlora/infer.sh b/examples/pytorch/llm/scripts/xverse_13b/qlora/infer.sh
index 739f270fb0..85c0221f9b 100644
--- a/examples/pytorch/llm/scripts/xverse_13b/qlora/infer.sh
+++ b/examples/pytorch/llm/scripts/xverse_13b/qlora/infer.sh
@@ -8,6 +8,6 @@ python llm_infer.py \
     --max_new_tokens 2048 \
     --temperature 0.7 \
     --top_p 0.7 \
-    --repetition_penalty 1.05 \
+    --repetition_penalty 1. \
     --do_sample true \
     --merge_lora_and_save false \
diff --git a/examples/pytorch/llm/scripts/xverse_65b/qlora_mp/infer.sh b/examples/pytorch/llm/scripts/xverse_65b/qlora_mp/infer.sh
index b21a660ac5..358bb080c3 100644
--- a/examples/pytorch/llm/scripts/xverse_65b/qlora_mp/infer.sh
+++ b/examples/pytorch/llm/scripts/xverse_65b/qlora_mp/infer.sh
@@ -8,6 +8,6 @@ python llm_infer.py \
     --max_new_tokens 2048 \
     --temperature 0.7 \
     --top_p 0.7 \
-    --repetition_penalty 1.05 \
+    --repetition_penalty 1. \
     --do_sample true \
     --merge_lora_and_save false \
diff --git a/examples/pytorch/llm/scripts/yi_34b/lora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/yi_34b/lora_ddp_ds/infer.sh
index 18bbe45a20..c0f4ba8815 100644
--- a/examples/pytorch/llm/scripts/yi_34b/lora_ddp_ds/infer.sh
+++ b/examples/pytorch/llm/scripts/yi_34b/lora_ddp_ds/infer.sh
@@ -9,6 +9,6 @@ python llm_infer.py \
     --max_new_tokens 2048 \
     --temperature 0.7 \
     --top_p 0.7 \
-    --repetition_penalty 1.05 \
+    --repetition_penalty 1. \
     --do_sample true \
     --merge_lora_and_save false \
diff --git a/examples/pytorch/llm/scripts/yi_34b_chat/lora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/yi_34b_chat/lora_ddp_ds/infer.sh
index 9b6eb2d7e5..c96982d51e 100644
--- a/examples/pytorch/llm/scripts/yi_34b_chat/lora_ddp_ds/infer.sh
+++ b/examples/pytorch/llm/scripts/yi_34b_chat/lora_ddp_ds/infer.sh
@@ -9,6 +9,6 @@ python llm_infer.py \
     --max_new_tokens 2048 \
     --temperature 0.1 \
     --top_p 0.7 \
-    --repetition_penalty 1.05 \
+    --repetition_penalty 1. \
     --do_sample true \
     --merge_lora_and_save false \
diff --git a/examples/pytorch/llm/scripts/yi_34b_chat/qlora/infer.sh b/examples/pytorch/llm/scripts/yi_34b_chat/qlora/infer.sh
index d9f6540eee..a79e32cef5 100644
--- a/examples/pytorch/llm/scripts/yi_34b_chat/qlora/infer.sh
+++ b/examples/pytorch/llm/scripts/yi_34b_chat/qlora/infer.sh
@@ -9,6 +9,6 @@ python llm_infer.py \
     --max_new_tokens 2048 \
     --temperature 0.1 \
     --top_p 0.7 \
-    --repetition_penalty 1.05 \
+    --repetition_penalty 1. \
     --do_sample true \
     --merge_lora_and_save false \
diff --git a/examples/pytorch/llm/scripts/yi_6b/lora/infer.sh b/examples/pytorch/llm/scripts/yi_6b/lora/infer.sh
index 737a1df240..0cba049ee9 100644
--- a/examples/pytorch/llm/scripts/yi_6b/lora/infer.sh
+++ b/examples/pytorch/llm/scripts/yi_6b/lora/infer.sh
@@ -8,6 +8,6 @@ python llm_infer.py \
     --max_new_tokens 2048 \
     --temperature 0.7 \
     --top_p 0.7 \
-    --repetition_penalty 1.05 \
+    --repetition_penalty 1. \
     --do_sample true \
     --merge_lora_and_save false \
diff --git a/examples/pytorch/llm/scripts/ziya2_13b_chat/qlora/infer.sh b/examples/pytorch/llm/scripts/ziya2_13b_chat/qlora/infer.sh
index 2eb402ac2d..7616e82cba 100644
--- a/examples/pytorch/llm/scripts/ziya2_13b_chat/qlora/infer.sh
+++ b/examples/pytorch/llm/scripts/ziya2_13b_chat/qlora/infer.sh
@@ -8,6 +8,6 @@ python llm_infer.py \
     --max_new_tokens 2048 \
     --temperature 0.7 \
     --top_p 0.7 \
-    --repetition_penalty 1.05 \
+    --repetition_penalty 1. \
     --do_sample true \
     --merge_lora_and_save false \
diff --git a/examples/pytorch/llm/scripts/ziya2_13b_chat/qlora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/ziya2_13b_chat/qlora_ddp_ds/infer.sh
index 2eb402ac2d..7616e82cba 100644
--- a/examples/pytorch/llm/scripts/ziya2_13b_chat/qlora_ddp_ds/infer.sh
+++ b/examples/pytorch/llm/scripts/ziya2_13b_chat/qlora_ddp_ds/infer.sh
@@ -8,6 +8,6 @@ python llm_infer.py \
     --max_new_tokens 2048 \
     --temperature 0.7 \
     --top_p 0.7 \
-    --repetition_penalty 1.05 \
+    --repetition_penalty 1. \
     --do_sample true \
     --merge_lora_and_save false \
diff --git a/swift/llm/utils/argument.py b/swift/llm/utils/argument.py
index 025a58d88c..b4c5117905 100644
--- a/swift/llm/utils/argument.py
+++ b/swift/llm/utils/argument.py
@@ -163,7 +163,7 @@ class SftArguments:
     temperature: float = 0.3
     top_k: int = 20
     top_p: float = 0.7
-    repetition_penalty: float = 1.05
+    repetition_penalty: float = 1.
     num_beams: int = 1
     # compatibility hf
     per_device_train_batch_size: Optional[int] = None
@@ -369,7 +369,7 @@ class InferArguments:
     temperature: float = 0.3
     top_k: int = 20
     top_p: float = 0.7
-    repetition_penalty: float = 1.05
+    repetition_penalty: float = 1.
     num_beams: int = 1
 
     # other
diff --git a/swift/llm/utils/model.py b/swift/llm/utils/model.py
index 8c5b2ebcf2..105a5a3949 100644
--- a/swift/llm/utils/model.py
+++ b/swift/llm/utils/model.py
@@ -78,6 +78,20 @@ class ModelType:
     yi_34b = 'yi-34b'
     yi_34b_200k = 'yi-34b-200k'
     yi_34b_chat = 'yi-34b-chat'
+    # internlm
+    internlm_7b = 'internlm-7b'
+    internlm_7b_chat = 'internlm-7b-chat'
+    internlm_7b_chat_8k = 'internlm-7b-chat-8k'
+    internlm_20b = 'internlm-20b'
+    internlm_20b_chat = 'internlm-20b-chat'
+    internlm2_7b_base = 'internlm2-7b-base'
+    internlm2_7b = 'internlm2-7b'
+    internlm2_7b_sft_chat = 'internlm2-7b-sft-chat'
+    internlm2_7b_chat = 'internlm2-7b-chat'
+    internlm2_20b_base = 'internlm2-20b-base'
+    internlm2_20b = 'internlm2-20b'
+    internlm2_20b_sft_chat = 'internlm2-20b-sft-chat'
+    internlm2_20b_chat = 'internlm2-20b-chat'
     # deepseek
     deepseek_7b = 'deepseek-7b'
     deepseek_7b_chat = 'deepseek-7b-chat'
@@ -108,12 +122,6 @@ class ModelType:
     baichuan2_13b = 'baichuan2-13b'
     baichuan2_13b_chat = 'baichuan2-13b-chat'
     baichuan2_13b_chat_int4 = 'baichuan2-13b-chat-int4'
-    # internlm
-    internlm_7b = 'internlm-7b'
-    internlm_7b_chat = 'internlm-7b-chat'
-    internlm_7b_chat_8k = 'internlm-7b-chat-8k'
-    internlm_20b = 'internlm-20b'
-    internlm_20b_chat = 'internlm-20b-chat'
     # yuan
     yuan2_2b_instruct = 'yuan2-2b-instruct'
     yuan2_2b_janus_instruct = 'yuan2-2b-janus-instruct'
@@ -189,6 +197,7 @@ class LoRATM(NamedTuple):
         'key_value', 'dense'
     ]
     phi = ['Wqkv']
+    internlm2 = ['wqkv']
 
 
 GetModelTokenizerFunction = Callable[..., Tuple[Optional[PreTrainedModel],
@@ -398,7 +407,8 @@ def get_model_tokenizer_internlm_chat(model_dir: str,
     model, tokenizer = get_model_tokenizer_from_repo(model_dir, torch_dtype,
                                                      model_kwargs, load_model,
                                                      **kwargs)
-    del tokenizer.__class__.eos_token_id
+    if getattr(tokenizer.__class__.eos_token_id, 'fset', None) is None:
+        del tokenizer.__class__.eos_token_id
     tokenizer.eos_token = '<eoa>'
     return model, tokenizer
 
@@ -871,6 +881,87 @@ def get_model_tokenizer_with_flash_attn(model_dir: str,
                                          load_model, model_config, **kwargs)
 
 
+@register_model(
+    ModelType.internlm2_7b_sft_chat,
+    'Shanghai_AI_Laboratory/internlm2-chat-7b-sft',
+    LoRATM.internlm2,
+    TemplateType.internlm2,
+    eos_token='[UNUSED_TOKEN_145]',
+    support_flash_attn=True)
+@register_model(
+    ModelType.internlm2_7b_chat,
+    'Shanghai_AI_Laboratory/internlm2-chat-7b',
+    LoRATM.internlm2,
+    TemplateType.internlm2,
+    eos_token='[UNUSED_TOKEN_145]',
+    support_flash_attn=True)
+@register_model(
+    ModelType.internlm2_20b_sft_chat,
+    'Shanghai_AI_Laboratory/internlm2-chat-20b-sft',
+    LoRATM.internlm2,
+    TemplateType.internlm2,
+    eos_token='[UNUSED_TOKEN_145]',
+    support_flash_attn=True)
+@register_model(
+    ModelType.internlm2_20b_chat,
+    'Shanghai_AI_Laboratory/internlm2-chat-20b',
+    LoRATM.internlm2,
+    TemplateType.internlm2,
+    eos_token='[UNUSED_TOKEN_145]',
+    support_flash_attn=True)
+@register_model(
+    ModelType.internlm2_7b,
+    'Shanghai_AI_Laboratory/internlm2-7b',
+    LoRATM.internlm2,
+    TemplateType.default_generation_bos,
+    support_flash_attn=True)
+@register_model(
+    ModelType.internlm2_7b_base,
+    'Shanghai_AI_Laboratory/internlm2-base-7b',
+    LoRATM.internlm2,
+    TemplateType.default_generation_bos,
+    support_flash_attn=True)
+@register_model(
+    ModelType.internlm2_20b,
+    'Shanghai_AI_Laboratory/internlm2-20b',
+    LoRATM.internlm2,
+    TemplateType.default_generation_bos,
+    support_flash_attn=True)
+@register_model(
+    ModelType.internlm2_20b_base,
+    'Shanghai_AI_Laboratory/internlm2-base-20b',
+    LoRATM.internlm2,
+    TemplateType.default_generation_bos,
+    support_flash_attn=True)
+def get_model_tokenizer_internlm2(model_dir: str,
+                                  torch_dtype: Dtype,
+                                  model_kwargs: Dict[str, Any],
+                                  load_model: bool = True,
+                                  **kwargs):
+    model_config = AutoConfig.from_pretrained(
+        model_dir, trust_remote_code=True)
+    use_flash_attn = kwargs.pop('use_flash_attn', False)
+    if use_flash_attn:
+        model_config.attn_implementation = 'flash_attention_2'
+
+    eos_token = kwargs.pop('eos_token', None)
+    model, tokenizer = get_model_tokenizer_from_repo(
+        model_dir,
+        torch_dtype,
+        model_kwargs,
+        load_model,
+        model_config=model_config,
+        **kwargs)
+    if eos_token is not None:
+        if getattr(tokenizer.__class__.eos_token_id, 'fset', None) is None:
+            del tokenizer.__class__.eos_token_id
+        tokenizer.eos_token = eos_token
+    if model is not None and use_flash_attn:
+        # fix AttributeError: no attribute 'attention_dropout'
+        model.model.layers[0].attention.__class__.attention_dropout = 0.
+    return model, tokenizer
+
+
 @register_model(
     ModelType.llama2_7b,
     'modelscope/Llama-2-7b-ms',
diff --git a/swift/llm/utils/template.py b/swift/llm/utils/template.py
index 7cbb404e67..b8d9d066df 100644
--- a/swift/llm/utils/template.py
+++ b/swift/llm/utils/template.py
@@ -24,6 +24,7 @@ class TemplateType:
     llama = 'llama'
     openbuddy = 'openbuddy'
     internlm = 'internlm'
+    internlm2 = 'internlm2'
     yi = 'yi'
     yuan = 'yuan'
     xverse = 'xverse'
@@ -665,10 +666,25 @@ def register_template(template_type: str,
              [['eos_token_id']], OPENBUDDY_DEFAULT_SYSTEM,
              [['bos_token_id'], '{{SYSTEM}}\n\n']))
 
+INTERNLM_SYSTEM = (
+    'You are an AI assistant whose name is InternLM (书生·浦语).\n'
+    '- InternLM (书生·浦语) is a conversational language model that is developed by Shanghai AI Laboratory (上海人工智能实验室). '
+    'It is designed to be helpful, honest, and harmless.\n'
+    '- InternLM (书生·浦语) can understand and communicate fluently in the language chosen '
+    'by the user such as English and 中文.')
+
+# '<s><s>': internlm template official implementation
 register_template(
     TemplateType.internlm,
-    Template(['<s>{{SYSTEM}}'], ['<|User|>:{{QUERY}}<eoh>\n<|Bot|>:'],
-             ['<eoa>\n'], ['<eoa>'], ''))
+    Template(['<s><s>'], ['<|User|>:{{QUERY}}\n<|Bot|>:'], ['<eoa>\n'],
+             ['<eoa>'], INTERNLM_SYSTEM, ['<s><s><|System|>:{{SYSTEM}}\n']))
+register_template(
+    TemplateType.internlm2,
+    Template(['<s><s>'], [
+        '[UNUSED_TOKEN_146]user\n{{QUERY}}[UNUSED_TOKEN_145]\n[UNUSED_TOKEN_146]assistant\n'
+    ], ['[UNUSED_TOKEN_145]\n'], ['[UNUSED_TOKEN_145]'], INTERNLM_SYSTEM, [
+        '<s><s>[UNUSED_TOKEN_146]system\n{{SYSTEM}}[UNUSED_TOKEN_145]\n'
+    ]))
 register_template(
     TemplateType.xverse,
     Template(['{{SYSTEM}}'], ['Human: {{QUERY}}\n\nAssistant: '],