**Note**: Downloading and loading the models takes 3 minutes 30 seconds to 4 minutes.

For generating a 5-second video with 30 steps:

-480×832 resolution

- L4 GPU: ~40 minutes 10 seconds
- A100 GPU: ~10 minutes 24 seconds (faster and potentially more cost-effective)

-720×1280 resolution

- A100 GPU: ~34 minutes

**DOWNLOAD AND LOAD LIBRARIES & MODELS**

In [None]:
# @title
!git clone https://github.com/Isi-dev/DiffSynth-Studio.git
%cd DiffSynth-Studio
!pip install -e .
!pip install "huggingface_hub[cli]"
!apt-get install -y aria2
import os
from huggingface_hub import list_repo_files

repo_id = "Isi99999/Wan2.1-T2V-14B"
# repo_id = "Wan-AI/Wan2.1-T2V-14B"
all_files = list_repo_files(repo_id)
base_url = f"https://huggingface.co/{repo_id}/resolve/main/"

with open("file_list.txt", "w") as f:
    for file_path in all_files:
        full_url = f"{base_url}{file_path}"
        save_path = f"models/Wan-AI/Wan2.1-T2V-14B/{file_path}"
        os.makedirs(os.path.dirname(save_path), exist_ok=True)
        f.write(f"{full_url}\n out={save_path}\n")
!aria2c -x 16 -s 16 -i file_list.txt --continue=true --auto-file-renaming=false

print("✅ All models downloaded successfully!")

import torch
from diffsynth import ModelManager, WanVideoPipeline

# Initialize model manager and load the model
model_manager = ModelManager(device="cpu")
model_manager.load_models(
    [
        "models/Wan-AI/Wan2.1-T2V-14B/diffusion_pytorch_model.safetensors",
        "models/Wan-AI/Wan2.1-T2V-14B/models_t5_umt5-xxl-enc-bf16.safetensors",
        "models/Wan-AI/Wan2.1-T2V-14B/Wan2.1_VAE.pth",
    ],
    torch_dtype=torch.float8_e4m3fn  # You can set `torch.float8_e4m3fn` or `torch_dtype=torch.bfloat16` to disable FP8 quantization.
)

# Initialize the video pipeline
pipe = WanVideoPipeline.from_model_manager(model_manager, torch_dtype=torch.bfloat16, device="cuda")
pipe.enable_vram_management(num_persistent_param_in_dit=None)
print("✅ All models loaded successfully!")
from diffsynth import save_video
from diffsynth import VideoData

**RUN TEXT TO VIDEO**

In [None]:


prompt = "A highly detailed, realistic AI-generated portrait of a very beautiful female soldier representing China. She has long hair, a confident and friendly smile, and striking facial features. She is wearing a camouflage military uniform with an open front, revealing her huge cleavage. She holds a modern assault rifle in a relaxed yet ready position. She walks towards the camera as the camera moves back to track her movements.The background shows a slightly blurred battlefield with other soldiers in formation, creating a sense of military action. The Chinese flag is displayed on her uniform on her shoulder. The lighting is natural, with a warm and slightly cinematic tone. The image should have a sharp focus on her face and outfit while maintaining a professional military aesthetic." # @param {type:"string"}
sample_steps = 30 # @param {"type":"number"}
Instruction = "choose from '720*1280', '1280*720', '480*832', '832*480', '1024*1024 for Width & Height" # @param {"type":"string"}
width = 480 # @param {"type":"number"}
height = 832 # @param {"type":"number"}
# num_frames = 1 # @param {"type":"number"}
seed = 1 # @param {"type":"number"}

# Generate video from text prompt
video = pipe(
    prompt=prompt,
    negative_prompt="色调艳丽，过曝，静态，细节模糊不清，字幕，风格，作品，画作，画面，静止，整体发灰，最差质量，低质量，JPEG压缩残留，丑陋的，残缺的，多余的手指，画得不好的手部，画得不好的脸部，畸形的，毁容的，形态畸形的肢体，手指融合，静止不动的画面，杂乱的背景，三条腿，背景人很多，倒着走",
    height = height,
    width = width,
    num_frames=81,
    num_inference_steps=sample_steps,
    seed=seed, tiled=True
)

# # Save the generated video
save_video(video, "video1.mp4", fps=15, quality=5)

from IPython.display import display as displayVid, Video as outVid
import os

# Function to display video
def show_video(video_path):
    if os.path.exists(video_path):
        displayVid(outVid(video_path, embed=True))
    else:
        print(f"Error: {video_path} not found!")

# Show the video
show_video("video1.mp4")


**RUN TEXT TO IMAGE**

In [None]:


prompt = "A highly detailed, realistic AI-generated portrait of a very beautiful female soldier representing Canada. She has long hair, a confident and friendly smile, and striking facial features. She is wearing a camouflage military uniform with an open front, revealing her huge cleavage. She holds a modern assault rifle in a relaxed yet ready position. The background shows a slightly blurred battlefield with other soldiers in formation, creating a sense of military action. The Canadian flag is displayed on her uniform on her shoulder. The lighting is natural, with a warm and slightly cinematic tone. The image should have a sharp focus on her face and outfit while maintaining a professional military aesthetic." # @param {type:"string"}
sample_steps = 30 # @param {"type":"number"}
Instruction = "choose from '720*1280', '1280*720', '480*832', '832*480', '1024*1024 for Width & Height" # @param {"type":"string"}
width = 720 # @param {"type":"number"}
height = 1280 # @param {"type":"number"}
seed = 1 # @param {"type":"number"}
displayWidth = 480 # @param {"type":"number"}

# Generate video from text prompt
video = pipe(
    prompt=prompt,
    negative_prompt="色调艳丽，过曝，静态，细节模糊不清，字幕，风格，作品，画作，画面，静止，整体发灰，最差质量，低质量，JPEG压缩残留，丑陋的，残缺的，多余的手指，画得不好的手部，画得不好的脸部，畸形的，毁容的，形态畸形的肢体，手指融合，静止不动的画面，杂乱的背景，三条腿，背景人很多，倒着走",
    height = height,
    width = width,
    num_frames=1,
    num_inference_steps=sample_steps,
    seed=seed, tiled=True
)


# Save the generated video
save_video(video, "video2.mp4", fps=15, quality=5)

import cv2
import os
from IPython.display import display as displayVid, Image as outImg

def show_image(video_path, display_width=480):
    if not os.path.exists(video_path):
        print(f"Error: {video_path} not found!")
        return

    cap = cv2.VideoCapture(video_path)
    success, frame = cap.read()
    cap.release()

    if success:
        image_path = "single_frame.png"
        cv2.imwrite(image_path, frame)
        displayVid(outImg(image_path, width=display_width))
    else:
        print("Error: Could not read the frame.")

show_image("video2.mp4", display_width=displayWidth)
