diff --git a/.gitignore b/.gitignore
index 9b56a91519..084ee30b84 100644
--- a/.gitignore
+++ b/.gitignore
@@ -109,6 +109,7 @@ venv.bak/
 
 .vscode
 .idea
+.run
 
 # custom
 *.pkl
diff --git "a/docs/source/Instruction/\346\224\257\346\214\201\347\232\204\346\250\241\345\236\213\345\222\214\346\225\260\346\215\256\351\233\206.md" "b/docs/source/Instruction/\346\224\257\346\214\201\347\232\204\346\250\241\345\236\213\345\222\214\346\225\260\346\215\256\351\233\206.md"
index a374d272a4..e7c37f6e6b 100644
--- "a/docs/source/Instruction/\346\224\257\346\214\201\347\232\204\346\250\241\345\236\213\345\222\214\346\225\260\346\215\256\351\233\206.md"
+++ "b/docs/source/Instruction/\346\224\257\346\214\201\347\232\204\346\250\241\345\236\213\345\222\214\346\225\260\346\215\256\351\233\206.md"
@@ -334,6 +334,7 @@
 |mistral-nemo-base-2407|[AI-ModelScope/Mistral-Nemo-Base-2407](https://modelscope.cn/models/AI-ModelScope/Mistral-Nemo-Base-2407/summary)|q_proj, k_proj, v_proj|default-generation|&#x2714;|&#x2714;|&#x2718;|&#x2718;|transformers>=4.43|-|[mistralai/Mistral-Nemo-Base-2407](https://huggingface.co/mistralai/Mistral-Nemo-Base-2407)|
 |mistral-nemo-instruct-2407|[AI-ModelScope/Mistral-Nemo-Instruct-2407](https://modelscope.cn/models/AI-ModelScope/Mistral-Nemo-Instruct-2407/summary)|q_proj, k_proj, v_proj|mistral-nemo|&#x2714;|&#x2714;|&#x2718;|&#x2718;|transformers>=4.43|-|[mistralai/Mistral-Nemo-Instruct-2407](https://huggingface.co/mistralai/Mistral-Nemo-Instruct-2407)|
 |mistral-large-instruct-2407|[LLM-Research/Mistral-Large-Instruct-2407](https://modelscope.cn/models/LLM-Research/Mistral-Large-Instruct-2407/summary)|q_proj, k_proj, v_proj|mistral-nemo|&#x2714;|&#x2714;|&#x2718;|&#x2718;|transformers>=4.43|-|[mistralai/Mistral-Large-Instruct-2407](https://huggingface.co/mistralai/Mistral-Large-Instruct-2407)|
+|mistral-small-instruct-2409|[AI-ModelScope/Mistral-Small-Instruct-2409](https://modelscope.cn/models/AI-ModelScope/Mistral-Small-Instruct-2409/summary)|q_proj, k_proj, v_proj|mistral-nemo|&#x2714;|&#x2714;|&#x2718;|&#x2718;|transformers>=4.43|-|[mistralai/Mistral-Small-Instruct-2409](https://huggingface.co/mistralai/Mistral-Small-Instruct-2409)|
 |mixtral-moe-7b|[AI-ModelScope/Mixtral-8x7B-v0.1](https://modelscope.cn/models/AI-ModelScope/Mixtral-8x7B-v0.1/summary)|q_proj, k_proj, v_proj|default-generation|&#x2714;|&#x2714;|&#x2718;|&#x2718;|transformers>=4.36|moe|[mistralai/Mixtral-8x7B-v0.1](https://huggingface.co/mistralai/Mixtral-8x7B-v0.1)|
 |mixtral-moe-7b-instruct|[AI-ModelScope/Mixtral-8x7B-Instruct-v0.1](https://modelscope.cn/models/AI-ModelScope/Mixtral-8x7B-Instruct-v0.1/summary)|q_proj, k_proj, v_proj|llama|&#x2714;|&#x2714;|&#x2718;|&#x2718;|transformers>=4.36|moe|[mistralai/Mixtral-8x7B-Instruct-v0.1](https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1)|
 |mixtral-moe-7b-aqlm-2bit-1x16|[AI-ModelScope/Mixtral-8x7b-AQLM-2Bit-1x16-hf](https://modelscope.cn/models/AI-ModelScope/Mixtral-8x7b-AQLM-2Bit-1x16-hf/summary)|q_proj, k_proj, v_proj|default-generation|&#x2714;|&#x2718;|&#x2718;|&#x2718;|transformers>=4.38, aqlm, torch>=2.2.0|moe|[ISTA-DASLab/Mixtral-8x7b-AQLM-2Bit-1x16-hf](https://huggingface.co/ISTA-DASLab/Mixtral-8x7b-AQLM-2Bit-1x16-hf)|
diff --git a/docs/source_en/Instruction/Supported-models-datasets.md b/docs/source_en/Instruction/Supported-models-datasets.md
index 4a5b53facf..e499219f98 100644
--- a/docs/source_en/Instruction/Supported-models-datasets.md
+++ b/docs/source_en/Instruction/Supported-models-datasets.md
@@ -334,6 +334,7 @@ The table below introcudes all models supported by SWIFT:
 |mistral-nemo-base-2407|[AI-ModelScope/Mistral-Nemo-Base-2407](https://modelscope.cn/models/AI-ModelScope/Mistral-Nemo-Base-2407/summary)|q_proj, k_proj, v_proj|default-generation|&#x2714;|&#x2714;|&#x2718;|&#x2718;|transformers>=4.43|-|[mistralai/Mistral-Nemo-Base-2407](https://huggingface.co/mistralai/Mistral-Nemo-Base-2407)|
 |mistral-nemo-instruct-2407|[AI-ModelScope/Mistral-Nemo-Instruct-2407](https://modelscope.cn/models/AI-ModelScope/Mistral-Nemo-Instruct-2407/summary)|q_proj, k_proj, v_proj|mistral-nemo|&#x2714;|&#x2714;|&#x2718;|&#x2718;|transformers>=4.43|-|[mistralai/Mistral-Nemo-Instruct-2407](https://huggingface.co/mistralai/Mistral-Nemo-Instruct-2407)|
 |mistral-large-instruct-2407|[LLM-Research/Mistral-Large-Instruct-2407](https://modelscope.cn/models/LLM-Research/Mistral-Large-Instruct-2407/summary)|q_proj, k_proj, v_proj|mistral-nemo|&#x2714;|&#x2714;|&#x2718;|&#x2718;|transformers>=4.43|-|[mistralai/Mistral-Large-Instruct-2407](https://huggingface.co/mistralai/Mistral-Large-Instruct-2407)|
+|mistral-small-instruct-2409|[AI-ModelScope/Mistral-Small-Instruct-2409](https://modelscope.cn/models/AI-ModelScope/Mistral-Small-Instruct-2409/summary)|q_proj, k_proj, v_proj|mistral-nemo|&#x2714;|&#x2714;|&#x2718;|&#x2718;|transformers>=4.43|-|[mistralai/Mistral-Small-Instruct-2409](https://huggingface.co/mistralai/Mistral-Small-Instruct-2409)|
 |mixtral-moe-7b|[AI-ModelScope/Mixtral-8x7B-v0.1](https://modelscope.cn/models/AI-ModelScope/Mixtral-8x7B-v0.1/summary)|q_proj, k_proj, v_proj|default-generation|&#x2714;|&#x2714;|&#x2718;|&#x2718;|transformers>=4.36|moe|[mistralai/Mixtral-8x7B-v0.1](https://huggingface.co/mistralai/Mixtral-8x7B-v0.1)|
 |mixtral-moe-7b-instruct|[AI-ModelScope/Mixtral-8x7B-Instruct-v0.1](https://modelscope.cn/models/AI-ModelScope/Mixtral-8x7B-Instruct-v0.1/summary)|q_proj, k_proj, v_proj|llama|&#x2714;|&#x2714;|&#x2718;|&#x2718;|transformers>=4.36|moe|[mistralai/Mixtral-8x7B-Instruct-v0.1](https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1)|
 |mixtral-moe-7b-aqlm-2bit-1x16|[AI-ModelScope/Mixtral-8x7b-AQLM-2Bit-1x16-hf](https://modelscope.cn/models/AI-ModelScope/Mixtral-8x7b-AQLM-2Bit-1x16-hf/summary)|q_proj, k_proj, v_proj|default-generation|&#x2714;|&#x2718;|&#x2718;|&#x2718;|transformers>=4.38, aqlm, torch>=2.2.0|moe|[ISTA-DASLab/Mixtral-8x7b-AQLM-2Bit-1x16-hf](https://huggingface.co/ISTA-DASLab/Mixtral-8x7b-AQLM-2Bit-1x16-hf)|
diff --git a/swift/llm/utils/dataset.py b/swift/llm/utils/dataset.py
index c016f2a118..bc9fa9f749 100644
--- a/swift/llm/utils/dataset.py
+++ b/swift/llm/utils/dataset.py
@@ -1435,7 +1435,7 @@ def preprocess(row):
     'swift/TextCaps', [],
     preprocess_func=preprocess_text_caps,
     get_function=get_dataset_from_repo,
-    split=['train', 'val'],
+    split=['train', 'validation'],
     hf_dataset_id='HuggingFaceM4/TextCaps',
     huge_dataset=True,
     tags=['multi-modal', 'en', 'caption', 'quality'])
diff --git a/swift/llm/utils/model.py b/swift/llm/utils/model.py
index 4831d2b65e..834ea21456 100644
--- a/swift/llm/utils/model.py
+++ b/swift/llm/utils/model.py
@@ -484,6 +484,7 @@ class ModelType:
     mistral_nemo_base_2407 = 'mistral-nemo-base-2407'
     mistral_nemo_instruct_2407 = 'mistral-nemo-instruct-2407'
     mistral_large_instruct_2407 = 'mistral-large-instruct-2407'
+    mistral_small_instruct_2409 = 'mistral-small-instruct-2409'
     mixtral_moe_7b = 'mixtral-moe-7b'
     mixtral_moe_7b_instruct = 'mixtral-moe-7b-instruct'
     mixtral_moe_7b_aqlm_2bit_1x16 = 'mixtral-moe-7b-aqlm-2bit-1x16'  # aqlm
@@ -2623,6 +2624,16 @@ def get_model_tokenizer_glm4v(model_dir: str,
     support_flash_attn=True,
     support_vllm=True,
     hf_model_id='mistralai/Mistral-Large-Instruct-2407')
+@register_model(
+    ModelType.mistral_small_instruct_2409,
+    'AI-ModelScope/Mistral-Small-Instruct-2409',
+    LoRATM.llama,
+    TemplateType.mistral_nemo,
+    requires=['transformers>=4.43'],
+    ignore_file_pattern=['^consolidated'],
+    support_flash_attn=True,
+    support_vllm=True,
+    hf_model_id='mistralai/Mistral-Small-Instruct-2409')
 @register_model(
     ModelType.mistral_nemo_instruct_2407,
     'AI-ModelScope/Mistral-Nemo-Instruct-2407',