In [1]:
import argparse
import logging
import os
import sys
import warnings
from datetime import datetime

warnings.filterwarnings('ignore')

import random

import torch
import torch.distributed as dist
from PIL import Image

import wan
from wan.configs import MAX_AREA_CONFIGS, SIZE_CONFIGS, SUPPORTED_SIZES, WAN_CONFIGS
from wan.distributed.util import init_distributed_group
from wan.utils.prompt_extend import DashScopePromptExpander, QwenPromptExpander
from wan.utils.utils import merge_video_audio, save_video, str2bool

ckpt_dir = "./Wan2.2-T2V-A14B"
device = 0
rank = 0
t5_fsdp = False
dit_fsdp = False
ulysses_size = 1
t5_cpu = False
convert_model_dtype = True 

prompt = "Style: cinematic anime, ultra-detailed hand-drawn shading, volumetric light shafts, ethereal watercolor tones with HDR anime rendering. Genre: dark fantasy, spiritual transcendence, operatic tension. Camera: slow cinematic dolly-in → 3/4 circular pan → close-up orbit; 2.39:1 film aspect, anime-grade depth of field and motion interpolation. Scene Concept: A lone woman sings within colossal ancient ruins beneath a red eclipse — her aria stirs the slumbering energy of gods. The air vibrates with her voice as the world begins to fracture between light and shadow.(0–5s) Wide establishing shot: broken temple, moonlight filtering through floating ash. The woman stands at the center, long dark hair drifting; eyes closed as she begins her melancholic aria. The animation shows subtle breathing, trembling fingers, small light motes pulsing with the rhythm of her song. (5–15s) Camera sweeps closer — magic sigils ignite around her feet, casting reflections in her eyes. Her cloak ripples with a supernatural wind; the aria grows deeper, emotion overwhelming her face. Soft hand-drawn glows and particle trails rise with the music — translucent petals, dust, fragments of light. (Think Makoto Shinkai’s “Your Name” or Violet Evergarden’s lighting, but darker and holier.) (15–25s) Her final note shatters the silence — massive energy blooms upward, light bending like brush strokes. The eclipse flares crimson; camera spins upward as she ascends through luminous debris. Her voice distorts into ethereal harmony; her eyes open — glowing gold tears flow down her cheeks as her body dissolves into divine radiance. Last frame: silhouette against red eclipse, voice echoing into the void."
size = "480*832"
frame_num = 81
sample_shift = 5.5
sample_solver = "dpm++"
sample_guide_scale = 4.5
base_seed = 42
offload_model = False

save_file = "base.mp4"
sample_fps = 16

wan_t2v = wan.WanT2V(
    config = WAN_CONFIGS["t2v-A14B"],
    checkpoint_dir=ckpt_dir,
    device_id=device,
    rank=rank,
    t5_fsdp=t5_fsdp,
    dit_fsdp=dit_fsdp,
    use_sp=(ulysses_size > 1),
    t5_cpu=t5_cpu,
    convert_model_dtype=convert_model_dtype,
)


Loading checkpoint shards:   0%|          | 0/6 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/6 [00:00<?, ?it/s]

Applying TaylorSeer caching...
Successfully applied TaylorSeer cache to WanModel.
Successfully applied TaylorSeer cache to WanModel.
Caching enabled on both models.


In [2]:
wan_t2v

<wan.text2video.WanT2V at 0x7fd89f7f24e0>

In [3]:

video = wan_t2v.generate(
    prompt,
    size=SIZE_CONFIGS[size],
    frame_num=frame_num,
    shift=sample_shift,
    sample_solver=sample_solver,
    sampling_steps=30,
    guide_scale=sample_guide_scale,
    seed=base_seed,
    offload_model=offload_model,
    enable_taylorseer=True,
    residual_diff_threshold=0.0,
    taylorseer_n_derivatives=3, # Use 2nd order derivatives
    taylorseer_warmup_steps=5,  # Do full computation for the first 5 steps
    taylorseer_skip_interval=2  # After warmup, compute fully every 3 steps
)

  0%|          | 0/30 [00:00<?, ?it/s]

Step 0: [COND PASS - COMPUTE] ❌ Computing fully based on interval.
Step 1: [COND PASS - COMPUTE] ❌ Computing fully based on interval.


  3%|▎         | 1/30 [00:14<07:02, 14.58s/it]

Step 2: [COND PASS - COMPUTE] ❌ Computing fully based on interval.
Step 3: [COND PASS - COMPUTE] ❌ Computing fully based on interval.


  7%|▋         | 2/30 [00:21<04:34,  9.81s/it]

Step 4: [COND PASS - COMPUTE] ❌ Computing fully based on interval.


 10%|█         | 3/30 [00:25<03:22,  7.49s/it]

Step 5: [COND PASS - HIT] ✅ Approximating with TaylorSeer.
Step 6: [COND PASS - COMPUTE] ❌ Computing fully based on interval.


 13%|█▎        | 4/30 [00:29<02:39,  6.12s/it]

Step 7: [COND PASS - HIT] ✅ Approximating with TaylorSeer.
Step 8: [COND PASS - COMPUTE] ❌ Computing fully based on interval.


 17%|█▋        | 5/30 [00:33<02:14,  5.37s/it]

Step 9: [COND PASS - HIT] ✅ Approximating with TaylorSeer.
Step 10: [COND PASS - COMPUTE] ❌ Computing fully based on interval.


 20%|██        | 6/30 [00:37<01:57,  4.91s/it]

Step 11: [COND PASS - HIT] ✅ Approximating with TaylorSeer.
Step 12: [COND PASS - COMPUTE] ❌ Computing fully based on interval.


 23%|██▎       | 7/30 [00:41<01:46,  4.62s/it]

Step 13: [COND PASS - HIT] ✅ Approximating with TaylorSeer.
Step 14: [COND PASS - COMPUTE] ❌ Computing fully based on interval.


 27%|██▋       | 8/30 [00:45<01:37,  4.44s/it]

Step 15: [COND PASS - HIT] ✅ Approximating with TaylorSeer.
Step 16: [COND PASS - COMPUTE] ❌ Computing fully based on interval.


 30%|███       | 9/30 [00:49<01:30,  4.31s/it]

Step 17: [COND PASS - HIT] ✅ Approximating with TaylorSeer.
Step 18: [COND PASS - COMPUTE] ❌ Computing fully based on interval.


 33%|███▎      | 10/30 [00:53<01:24,  4.22s/it]

Step 19: [COND PASS - HIT] ✅ Approximating with TaylorSeer.
Step 20: [COND PASS - COMPUTE] ❌ Computing fully based on interval.


 37%|███▋      | 11/30 [00:58<01:19,  4.16s/it]

Step 21: [COND PASS - HIT] ✅ Approximating with TaylorSeer.
Step 22: [COND PASS - COMPUTE] ❌ Computing fully based on interval.


 40%|████      | 12/30 [01:02<01:14,  4.12s/it]

Step 23: [COND PASS - HIT] ✅ Approximating with TaylorSeer.
Step 24: [COND PASS - COMPUTE] ❌ Computing fully based on interval.


 43%|████▎     | 13/30 [01:06<01:09,  4.09s/it]

Step 25: [COND PASS - HIT] ✅ Approximating with TaylorSeer.
Step 26: [COND PASS - COMPUTE] ❌ Computing fully based on interval.


 47%|████▋     | 14/30 [01:10<01:05,  4.07s/it]

Step 27: [COND PASS - HIT] ✅ Approximating with TaylorSeer.
--- SWITCHING TO LOW-NOISE MODEL: RESETTING CACHE STATE! ---
Step 0: [COND PASS - COMPUTE] ❌ Computing fully based on interval.
Step 1: [COND PASS - COMPUTE] ❌ Computing fully based on interval.


 50%|█████     | 15/30 [01:33<02:29,  9.98s/it]

Step 2: [COND PASS - COMPUTE] ❌ Computing fully based on interval.
Step 3: [COND PASS - COMPUTE] ❌ Computing fully based on interval.


 53%|█████▎    | 16/30 [01:41<02:09,  9.23s/it]

Step 4: [COND PASS - COMPUTE] ❌ Computing fully based on interval.


 57%|█████▋    | 17/30 [01:45<01:42,  7.88s/it]

Step 5: [COND PASS - HIT] ✅ Approximating with TaylorSeer.
Step 6: [COND PASS - COMPUTE] ❌ Computing fully based on interval.


 60%|██████    | 18/30 [01:50<01:20,  6.72s/it]

Step 7: [COND PASS - HIT] ✅ Approximating with TaylorSeer.
Step 8: [COND PASS - COMPUTE] ❌ Computing fully based on interval.


 63%|██████▎   | 19/30 [01:54<01:05,  5.91s/it]

Step 9: [COND PASS - HIT] ✅ Approximating with TaylorSeer.
Step 10: [COND PASS - COMPUTE] ❌ Computing fully based on interval.


 67%|██████▋   | 20/30 [01:58<00:53,  5.35s/it]

Step 11: [COND PASS - HIT] ✅ Approximating with TaylorSeer.
Step 12: [COND PASS - COMPUTE] ❌ Computing fully based on interval.


 70%|███████   | 21/30 [02:02<00:44,  4.95s/it]

Step 13: [COND PASS - HIT] ✅ Approximating with TaylorSeer.
Step 14: [COND PASS - COMPUTE] ❌ Computing fully based on interval.


 73%|███████▎  | 22/30 [02:06<00:37,  4.68s/it]

Step 15: [COND PASS - HIT] ✅ Approximating with TaylorSeer.
Step 16: [COND PASS - COMPUTE] ❌ Computing fully based on interval.


 77%|███████▋  | 23/30 [02:10<00:31,  4.48s/it]

Step 17: [COND PASS - HIT] ✅ Approximating with TaylorSeer.
Step 18: [COND PASS - COMPUTE] ❌ Computing fully based on interval.


 80%|████████  | 24/30 [02:14<00:26,  4.34s/it]

Step 19: [COND PASS - HIT] ✅ Approximating with TaylorSeer.
Step 20: [COND PASS - COMPUTE] ❌ Computing fully based on interval.


 83%|████████▎ | 25/30 [02:18<00:21,  4.25s/it]

Step 21: [COND PASS - HIT] ✅ Approximating with TaylorSeer.
Step 22: [COND PASS - COMPUTE] ❌ Computing fully based on interval.


 87%|████████▋ | 26/30 [02:22<00:16,  4.18s/it]

Step 23: [COND PASS - HIT] ✅ Approximating with TaylorSeer.
Step 24: [COND PASS - COMPUTE] ❌ Computing fully based on interval.


 90%|█████████ | 27/30 [02:26<00:12,  4.13s/it]

Step 25: [COND PASS - HIT] ✅ Approximating with TaylorSeer.
Step 26: [COND PASS - COMPUTE] ❌ Computing fully based on interval.


 93%|█████████▎| 28/30 [02:30<00:08,  4.10s/it]

Step 27: [COND PASS - HIT] ✅ Approximating with TaylorSeer.
Step 28: [COND PASS - COMPUTE] ❌ Computing fully based on interval.


 97%|█████████▋| 29/30 [02:34<00:04,  4.08s/it]

Step 29: [COND PASS - HIT] ✅ Approximating with TaylorSeer.
Step 30: [COND PASS - COMPUTE] ❌ Computing fully based on interval.


100%|██████████| 30/30 [02:38<00:00,  5.28s/it]

Step 31: [COND PASS - HIT] ✅ Approximating with TaylorSeer.





In [4]:

save_video(
    tensor=video[None],
    save_file=save_file,
    fps=sample_fps,
    nrow=1,
    normalize=True,
    value_range=(-1, 1)
)
del video
torch.cuda.synchronize()