diff --git a/examples/infer/demo_mllm.py b/examples/infer/demo_mllm.py
index 8d3c77212a..b277818957 100644
--- a/examples/infer/demo_mllm.py
+++ b/examples/infer/demo_mllm.py
@@ -104,20 +104,33 @@ def get_data(mm_type: Literal['text', 'image', 'video', 'audio']):
     if infer_backend == 'pt':
         model = 'Qwen/Qwen2-Audio-7B-Instruct'
         mm_type = 'audio'
-        dataset = 'speech_asr/speech_asr_aishell1_trainsets:validation#1000'
         engine = PtEngine(model, max_batch_size=64)
     elif infer_backend == 'vllm':
+        # test env: vllm==0.6.5, transformers==4.48.*
         from swift.llm import VllmEngine
         model = 'Qwen/Qwen2-VL-2B-Instruct'
-        mm_type = 'video'
-        dataset = 'AI-ModelScope/LaTeX_OCR:small#1000'
+        # If you encounter insufficient GPU memory, please reduce `max_model_len` and set `max_num_seqs=5`.
         engine = VllmEngine(model, max_model_len=32768, limit_mm_per_prompt={'image': 5, 'video': 2})
+        mm_type = 'video'  # or 'image'
     elif infer_backend == 'lmdeploy':
+        # test env: lmdeploy==0.6.4
         from swift.llm import LmdeployEngine
         model = 'OpenGVLab/InternVL2_5-1B'
-        mm_type = 'video'
-        dataset = 'AI-ModelScope/LaTeX_OCR:small#1000'
         engine = LmdeployEngine(model, vision_batch_size=8)
+        mm_type = 'video'  # or 'image'
+
+    # infer dataset
+    if mm_type == 'audio':
+        dataset = 'speech_asr/speech_asr_aishell1_trainsets:validation#1000'
+    elif mm_type == 'image':
+        # The meaning of environment variables can be found at:
+        # https://swift.readthedocs.io/zh-cn/latest/Instruction/%E5%91%BD%E4%BB%A4%E8%A1%8C%E5%8F%82%E6%95%B0.html#id17
+        os.environ['MAX_PIXELS'] = '1003520'
+        dataset = 'AI-ModelScope/LaTeX_OCR:small#1000'
+    elif mm_type == 'video':
+        os.environ['VIDEO_MAX_PIXELS'] = '50176'
+        os.environ['FPS_MAX_FRAMES'] = '12'
+        dataset = 'swift/VideoChatGPT:Generic#100'
 
     # Here, `load_dataset` is used for convenience; `infer_batch` does not require creating a dataset.
     dataset = load_dataset([dataset], seed=42)[0]