From 2f3e1ba0048710e8e814c2a12ec3e277822b8f09 Mon Sep 17 00:00:00 2001 From: Jintao Huang Date: Tue, 21 Oct 2025 16:38:30 +0800 Subject: [PATCH 1/3] update deepseek_ocr docs --- ...00\344\275\263\345\256\236\350\267\265.md" | 2 +- examples/models/deepseek_ocr/infer.py | 24 +++++++++++++++++++ .../deepseek_ocr.sh => deepseek_ocr/train.sh} | 0 3 files changed, 25 insertions(+), 1 deletion(-) create mode 100644 examples/models/deepseek_ocr/infer.py rename examples/models/{deepseek_vl2/deepseek_ocr.sh => deepseek_ocr/train.sh} (100%) diff --git "a/docs/source/BestPractices/Qwen3-VL\346\234\200\344\275\263\345\256\236\350\267\265.md" "b/docs/source/BestPractices/Qwen3-VL\346\234\200\344\275\263\345\256\236\350\267\265.md" index 9a0528c890..15615e9923 100644 --- "a/docs/source/BestPractices/Qwen3-VL\346\234\200\344\275\263\345\256\236\350\267\265.md" +++ "b/docs/source/BestPractices/Qwen3-VL\346\234\200\344\275\263\345\256\236\350\267\265.md" @@ -73,7 +73,7 @@ print(output_text[0]) 使用 ms-swift 的 `PtEngine` 进行推理: ```python import os -os.environ['SWIFT_DEBUG'] = '1' +# os.environ['SWIFT_DEBUG'] = '1' os.environ['CUDA_VISIBLE_DEVICES'] = '0' os.environ['VIDEO_MAX_TOKEN_NUM'] = '128' os.environ['FPS_MAX_FRAMES'] = '16' diff --git a/examples/models/deepseek_ocr/infer.py b/examples/models/deepseek_ocr/infer.py new file mode 100644 index 0000000000..b49ed11490 --- /dev/null +++ b/examples/models/deepseek_ocr/infer.py @@ -0,0 +1,24 @@ +# pip install "transformers==4.46.3" +import os +# os.environ['SWIFT_DEBUG'] = '1' + +from swift.llm import PtEngine, InferRequest, RequestConfig +engine = PtEngine('deepseek-ai/DeepSeek-OCR') +infer_request = InferRequest(messages=[{ + "role": "user", + # or + "content": 'Free OCR.', + # "content": '<|grounding|>Convert the document to markdown.', +}], images=['https://modelscope-open.oss-cn-hangzhou.aliyuncs.com/images/ocr.png']) +request_config = RequestConfig(max_tokens=512, temperature=0) +resp_list = engine.infer([infer_request], request_config=request_config) +response = resp_list[0].choices[0].message.content + +# use stream +request_config = RequestConfig(max_tokens=512, temperature=0, stream=True) +gen_list = engine.infer([infer_request], request_config=request_config) +for chunk in gen_list[0]: + if chunk is None: + continue + print(chunk.choices[0].delta.content, end='', flush=True) +print() diff --git a/examples/models/deepseek_vl2/deepseek_ocr.sh b/examples/models/deepseek_ocr/train.sh similarity index 100% rename from examples/models/deepseek_vl2/deepseek_ocr.sh rename to examples/models/deepseek_ocr/train.sh From 6ac509f2abb5eaacf8816f6b141f32796afbee68 Mon Sep 17 00:00:00 2001 From: Jintao Huang Date: Tue, 21 Oct 2025 16:42:05 +0800 Subject: [PATCH 2/3] update --- examples/models/deepseek_ocr/infer.py | 43 +++++++++++++++------------ 1 file changed, 24 insertions(+), 19 deletions(-) diff --git a/examples/models/deepseek_ocr/infer.py b/examples/models/deepseek_ocr/infer.py index b49ed11490..13019f309a 100644 --- a/examples/models/deepseek_ocr/infer.py +++ b/examples/models/deepseek_ocr/infer.py @@ -1,24 +1,29 @@ # pip install "transformers==4.46.3" import os + +os.environ['CUDA_VISIBLE_DEVICES'] = '0' # os.environ['SWIFT_DEBUG'] = '1' -from swift.llm import PtEngine, InferRequest, RequestConfig -engine = PtEngine('deepseek-ai/DeepSeek-OCR') -infer_request = InferRequest(messages=[{ - "role": "user", - # or - "content": 'Free OCR.', - # "content": '<|grounding|>Convert the document to markdown.', -}], images=['https://modelscope-open.oss-cn-hangzhou.aliyuncs.com/images/ocr.png']) -request_config = RequestConfig(max_tokens=512, temperature=0) -resp_list = engine.infer([infer_request], request_config=request_config) -response = resp_list[0].choices[0].message.content +if __name__ == '__main__': + from swift.llm import InferRequest, PtEngine, RequestConfig + engine = PtEngine('deepseek-ai/DeepSeek-OCR') + infer_request = InferRequest( + messages=[{ + 'role': 'user', + # or + 'content': 'Free OCR.', + # "content": '<|grounding|>Convert the document to markdown.', + }], + images=['https://modelscope-open.oss-cn-hangzhou.aliyuncs.com/images/ocr.png']) + request_config = RequestConfig(max_tokens=512, temperature=0) + resp_list = engine.infer([infer_request], request_config=request_config) + response = resp_list[0].choices[0].message.content -# use stream -request_config = RequestConfig(max_tokens=512, temperature=0, stream=True) -gen_list = engine.infer([infer_request], request_config=request_config) -for chunk in gen_list[0]: - if chunk is None: - continue - print(chunk.choices[0].delta.content, end='', flush=True) -print() + # use stream + request_config = RequestConfig(max_tokens=512, temperature=0, stream=True) + gen_list = engine.infer([infer_request], request_config=request_config) + for chunk in gen_list[0]: + if chunk is None: + continue + print(chunk.choices[0].delta.content, end='', flush=True) + print() From 16df77cae1a9fe55278355428ebf75732fc4033f Mon Sep 17 00:00:00 2001 From: Jintao Huang Date: Tue, 21 Oct 2025 16:43:27 +0800 Subject: [PATCH 3/3] update --- examples/models/deepseek_ocr/infer.py | 2 +- examples/models/deepseek_ocr/train.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/models/deepseek_ocr/infer.py b/examples/models/deepseek_ocr/infer.py index 13019f309a..3f54cae8aa 100644 --- a/examples/models/deepseek_ocr/infer.py +++ b/examples/models/deepseek_ocr/infer.py @@ -1,4 +1,4 @@ -# pip install "transformers==4.46.3" +# pip install "transformers==4.46.3" easydict import os os.environ['CUDA_VISIBLE_DEVICES'] = '0' diff --git a/examples/models/deepseek_ocr/train.sh b/examples/models/deepseek_ocr/train.sh index cba3a0c555..cc056eba12 100644 --- a/examples/models/deepseek_ocr/train.sh +++ b/examples/models/deepseek_ocr/train.sh @@ -1,5 +1,5 @@ # 24GiB -pip install "transformers==4.46.3" +pip install "transformers==4.46.3" easydict CUDA_VISIBLE_DEVICES=0 \ swift sft \