In [1]:
import torch
from diffusers import TextToVideoZeroPipeline
import numpy as np
import cv2

In [2]:
FPS = 4
SECONDS = 30

In [3]:
model_id = "stable-diffusion-v1-5/stable-diffusion-v1-5"
pipe = TextToVideoZeroPipeline.from_pretrained(model_id, torch_dtype = torch.float16).to("cuda")

Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

In [4]:
seed = 0
#video_length = 24  #24 ÷ 4fps = 6 seconds
video_length = FPS * SECONDS
chunk_size = 8 # (SECONDS if SECONDS < FPS else FPS)

In [5]:
prompt = '''In 3D Video Game,
A chrome-plated robot dog, its fur matted with Martian dust, stumbles through the wreckage of a terraforming project.
The robot's single, glowing red eye scans the desolate landscape, searching for survivors amidst crumbling hydroponics domes.
The Martian sky, a swirling vortex of red and purple, casts long, eerie shadows across the scene.
'''

In [None]:
# Generate the video chunk-by-chunk
result = []
chunk_ids = np.arange(0, video_length, chunk_size - 1)
generator = torch.Generator(device="cuda")
for i in range(len(chunk_ids)):
    print(f"Processing chunk {i + 1} / {len(chunk_ids)}")
    ch_start = chunk_ids[i]
    ch_end = video_length if i == len(chunk_ids) - 1 else chunk_ids[i + 1]
    # Attach the first frame for Cross Frame Attention
    frame_ids = [0] + list(range(ch_start, ch_end))
    # Fix the seed for the temporal consistency
    generator.manual_seed(seed)
    output = pipe(prompt=prompt, video_length=len(frame_ids), generator=generator, frame_ids=frame_ids)
    result.append(output.images[1:])

# Concatenate chunks and save
result = np.concatenate(result)
result = [(r * 255).astype("uint8") for r in result]

Processing chunk 1 / 18


  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


  0%|          | 0/48 [00:00<?, ?it/s]

Processing chunk 2 / 18


  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

Processing chunk 3 / 18


  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

Processing chunk 4 / 18


  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

Processing chunk 5 / 18


  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

Processing chunk 6 / 18


  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

Processing chunk 7 / 18


  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

Processing chunk 8 / 18


  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

Processing chunk 9 / 18


  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

In [7]:
def save_video_from_frames(frames, output_filename, fps=24):
    """
    Save a list of frames as an MP4 video.

    Parameters:
    - frames: List of frames (numpy arrays of shape (height, width, 3)).
    - output_filename: Output file name (e.g., 'output.mp4').
    - fps: Frames per second for the video.
    """
    if not frames:
        print("No frames to save.")
        return

    # Get the size of the frames
    height, width, _ = frames[0].shape

    # Define the codec and create the VideoWriter object
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    video_writer = cv2.VideoWriter(output_filename, fourcc, fps, (width, height))

    for frame in frames:
        video_writer.write(frame)

    video_writer.release()
    print(f"Video saved as {output_filename}")

In [8]:
# import imageio
# imageio.mimsave("Output.mp4", result, format = 'mp4')

In [9]:
# Save the video
save_video_from_frames(result, 'Output.mp4', fps=FPS)

Video saved as Output.mp4
