In [1]:
import torch
import torchvision
from diffusers.utils import export_to_video
from diffusers import LTXPipeline
print("Torch version:", torch.__version__)
print("CUDA available:", torch.cuda.is_available())
print("CUDA device count:", torch.cuda.device_count())
print("CUDA device name:", torch.cuda.get_device_name(0) if torch.cuda.is_available() else "No CUDA device found")

Torch version: 2.6.0+cu118
CUDA available: True
CUDA device count: 1
CUDA device name: NVIDIA GeForce RTX 4070 Laptop GPU


In [2]:
pipe = LTXPipeline.from_pretrained(
    "D:\\huggingface_models\\LTX-Video",
    torch_dtype=torch.float16,  # ✅ Use float16 (uses less memory)
)

pipe.enable_attention_slicing()  # ✅ Reduces VRAM usage
pipe.to("cuda")
pipe.enable_model_cpu_offload()



Loading pipeline components...:   0%|          | 0/5 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [24]:
prompt = (  
    "A stunning Asian woman with long, dark brown hair gracefully dances to a trending Tagalog song. She has a naturally fit and curvy figure with medium-sized breasts, well-proportioned thighs, and a smooth, radiant white skin tone. She wears an elegant, stylish swimsuit that highlights her beauty. The video captures her natural facial expressions, fluid body movements, and detailed skin texture, with professional lighting and a cinematic look."  
)  

negative_prompt = (  
    "Worst quality, low resolution, distorted face, unnatural skin texture, blurry details, stiff movement, unrealistic body proportions, exaggerated features, unnatural lighting, pixelated details, deformed hands or fingers, missing facial details, creepy or horror-like elements."  
)  


In [25]:
video = pipe(
    prompt=prompt,
    negative_prompt=negative_prompt,
    width=1024,  
    height=576,  # 16:9 ratio for better clarity
    num_frames=300,
    num_inference_steps=40,
).frames  

  0%|          | 0/40 [00:00<?, ?it/s]

In [26]:
import numpy as np
import imageio

# Ensure `video` is properly structured as a list of frames
if isinstance(video, list) and len(video) == 1 and isinstance(video[0], list):
    video_frames = [np.array(frame) for frame in video[0]]  # Extract frames correctly
elif isinstance(video, list):
    video_frames = [np.array(frame) for frame in video]
else:
    raise ValueError(f"Unexpected video format: {type(video)}")

# Debugging: Check frame shapes
print(f"First frame shape after fixing: {video_frames[0].shape}")
print([frame.shape for frame in video_frames[:5]])  # Should be [(480, 704, 3), ...]

# Define output path
output_path = "output.mp4"

# Export video using imageio
try:
    imageio.mimsave(output_path, video_frames, fps=24)
    print(f"✅ Video saved successfully as {output_path}!")
except Exception as e:
    print(f"❌ Error exporting video: {e}")
    print(type(video), len(video))  # Check video structure
    print(video[0].shape if isinstance(video[0], np.ndarray) else type(video[0]))  # Check first element shape


First frame shape after fixing: (576, 1024, 3)
[(576, 1024, 3), (576, 1024, 3), (576, 1024, 3), (576, 1024, 3), (576, 1024, 3)]
✅ Video saved successfully as output.mp4!
