In [2]:
import argparse
import logging
import os
import sys
import warnings
from datetime import datetime

warnings.filterwarnings('ignore')

import random

import torch
import torch.distributed as dist
from PIL import Image

import wan
from wan.configs import MAX_AREA_CONFIGS, SIZE_CONFIGS, SUPPORTED_SIZES, WAN_CONFIGS
from wan.distributed.util import init_distributed_group
from wan.utils.prompt_extend import DashScopePromptExpander, QwenPromptExpander
from wan.utils.utils import merge_video_audio, save_video, str2bool

ckpt_dir = "./Wan2.2-T2V-A14B"
device = 0
rank = 0
t5_fsdp = False
dit_fsdp = False
ulysses_size = 1
t5_cpu = False
convert_model_dtype = True 

prompt = "Style: cinematic anime, ultra-detailed hand-drawn shading, volumetric light shafts, ethereal watercolor tones with HDR anime rendering. Genre: dark fantasy, spiritual transcendence, operatic tension. Camera: slow cinematic dolly-in → 3/4 circular pan → close-up orbit; 2.39:1 film aspect, anime-grade depth of field and motion interpolation. Scene Concept: A lone woman sings within colossal ancient ruins beneath a red eclipse — her aria stirs the slumbering energy of gods. The air vibrates with her voice as the world begins to fracture between light and shadow.(0–5s) Wide establishing shot: broken temple, moonlight filtering through floating ash. The woman stands at the center, long dark hair drifting; eyes closed as she begins her melancholic aria. The animation shows subtle breathing, trembling fingers, small light motes pulsing with the rhythm of her song. (5–15s) Camera sweeps closer — magic sigils ignite around her feet, casting reflections in her eyes. Her cloak ripples with a supernatural wind; the aria grows deeper, emotion overwhelming her face. Soft hand-drawn glows and particle trails rise with the music — translucent petals, dust, fragments of light. (Think Makoto Shinkai’s “Your Name” or Violet Evergarden’s lighting, but darker and holier.) (15–25s) Her final note shatters the silence — massive energy blooms upward, light bending like brush strokes. The eclipse flares crimson; camera spins upward as she ascends through luminous debris. Her voice distorts into ethereal harmony; her eyes open — glowing gold tears flow down her cheeks as her body dissolves into divine radiance. Last frame: silhouette against red eclipse, voice echoing into the void."
size = "480*832"
frame_num = 81
sample_shift = 5.5
sample_solver = "dpm++"
sample_guide_scale = 4.5
base_seed = 42
offload_model = False

save_file = "base.mp4"
sample_fps = 16

wan_t2v = wan.WanT2V(
    config = WAN_CONFIGS["t2v-A14B"],
    checkpoint_dir=ckpt_dir,
    device_id=device,
    rank=rank,
    t5_fsdp=t5_fsdp,
    dit_fsdp=dit_fsdp,
    use_sp=(ulysses_size > 1),
    t5_cpu=t5_cpu,
    convert_model_dtype=convert_model_dtype,
)


Loading checkpoint shards:   0%|          | 0/6 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/6 [00:00<?, ?it/s]

In [4]:

video = wan_t2v.generate(
    prompt,
    size=SIZE_CONFIGS[size],
    frame_num=frame_num,
    shift=sample_shift,
    sample_solver=sample_solver,
    sampling_steps=30,
    guide_scale=sample_guide_scale,
    seed=base_seed,
    offload_model=offload_model
)

100%|██████████| 30/30 [04:07<00:00,  8.24s/it]


In [5]:

save_video(
    tensor=video[None],
    save_file=save_file,
    fps=sample_fps,
    nrow=1,
    normalize=True,
    value_range=(-1, 1)
)
del video
torch.cuda.synchronize()