In [1]:
import requests
from bs4 import BeautifulSoup

def search_huggingface(query):
    """Searches Hugging Face for models matching the query."""
    url = f"https://huggingface.co/models?search={query}&library=all&sort=downloads"
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')
    models = []
    for model_card in soup.select('article.mb-2.border.border-gray-200.rounded-lg'):
        title_tag = model_card.select_one('h4 a')
        if title_tag:
            title = title_tag.get_text(strip=True)
            link = f"https://huggingface.co{title_tag['href']}"
            models.append({'title': title, 'link': link})
    return models

# Search for text-to-video models on Hugging Face
text_to_video_models = search_huggingface("text-to-video")

print("Potential Text-to-Video Models/Libraries found on Hugging Face:")
if text_to_video_models:
    for model in text_to_video_models:
        print(f"- {model['title']}: {model['link']}")
else:
    print("No models found.")

# Note: Additional search on general web for open-source libraries might be needed if Hugging Face results are insufficient.
# However, given the prevalence of models on Hugging Face, starting here is a good approach.



Potential Text-to-Video Models/Libraries found on Hugging Face:
No models found.


In [2]:
import torch
from diffusers import DiffusionPipeline

# Specify the model identifier from Hugging Face
# Using a known text-to-video model as an example.
# This model was identified through manual research outside of the programmatic steps.
model_id = "cerspense/zeroscope_v2_576w"

# Determine the device to use (GPU if available, otherwise CPU)
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

# Load the pre-trained text-to-video model
# Using from_pretrained with the model identifier and moving it to the selected device.
try:
    pipe = DiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16 if device == "cuda" else torch.float32)
    pipe = pipe.to(device)
    print(f"Model '{model_id}' loaded successfully on {device}.")
except Exception as e:
    print(f"Error loading model: {e}")
    pipe = None # Ensure pipe is None if loading fails




Using device: cuda


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Loading pipeline components...:   0%|          | 0/5 [00:00<?, ?it/s]

An error occurred while trying to fetch /root/.cache/huggingface/hub/models--cerspense--zeroscope_v2_576w/snapshots/6963642a64dbefa93663d1ecebb4ceda2d9ecb28/vae: Error no file named diffusion_pytorch_model.safetensors found in directory /root/.cache/huggingface/hub/models--cerspense--zeroscope_v2_576w/snapshots/6963642a64dbefa93663d1ecebb4ceda2d9ecb28/vae.
Defaulting to unsafe serialization. Pass `allow_pickle=False` to raise an error instead.
An error occurred while trying to fetch /root/.cache/huggingface/hub/models--cerspense--zeroscope_v2_576w/snapshots/6963642a64dbefa93663d1ecebb4ceda2d9ecb28/unet: Error no file named diffusion_pytorch_model.safetensors found in directory /root/.cache/huggingface/hub/models--cerspense--zeroscope_v2_576w/snapshots/6963642a64dbefa93663d1ecebb4ceda2d9ecb28/unet.
Defaulting to unsafe serialization. Pass `allow_pickle=False` to raise an error instead.
The TextToVideoSDPipeline has been deprecated and will not receive bug fixes or feature updates after 

Model 'cerspense/zeroscope_v2_576w' loaded successfully on cuda.


In [3]:
# Install necessary libraries
!pip install diffusers transformers accelerate imageio[ffmpeg] git-lfs

# Clone a potential repository (assuming a Stable Diffusion based approach as an example)
# This is a placeholder and might need to be replaced with a more specific repo if a model is identified later.
!git clone https://github.com/huggingface/diffusers.git

# Install git-lfs
!git lfs install

fatal: destination path 'diffusers' already exists and is not an empty directory.
Git LFS initialized.


In [4]:
# Install necessary libraries
!pip install diffusers transformers accelerate imageio[ffmpeg] git-lfs

# Clone a potential repository (assuming a Stable Diffusion based approach as an example)
# This is a placeholder and might need to be replaced with a more specific repo if a model is identified later.
!git clone https://github.com/huggingface/diffusers.git

# Install git-lfs
!git lfs install


fatal: destination path 'diffusers' already exists and is not an empty directory.
Git LFS initialized.


In [5]:
if pipe is not None:
    prompt = "a close up shot of a person's eye, with a galaxy reflected in it"

    # Generate the video
    try:
        video_frames = pipe(prompt, num_frames=25, num_inference_steps=50, generator=torch.manual_seed(42)).frames
        print("Video generation successful.")
    except Exception as e:
        print(f"Error during video generation: {e}")
        video_frames = None
else:
    print("Model was not loaded successfully. Cannot generate video.")
    video_frames = None


  0%|          | 0/50 [00:00<?, ?it/s]

Video generation successful.


In [6]:
if 'video_frames' in locals() and video_frames is not None:
    import imageio

    output_filename = "generated_video.mp4"
    try:
        with imageio.get_writer(output_filename, fps=8) as writer:
            for frame in video_frames:
                writer.append_data(frame)
        print(f"Video saved successfully to {output_filename}")
    except Exception as e:
        print(f"Error saving video: {e}")
else:
    print("No video frames available to save.")



Error saving video: Image must have 1, 2, 3 or 4 channels


In [7]:
if 'video_frames' in locals() and video_frames is not None:
    import imageio
    import numpy as np

    output_filename = "generated_video.mp4"
    try:
        # Convert frames to uint8 and ensure 3 channels (RGB)
        processed_frames = []
        for frame in video_frames:
            # Convert float32 to uint8 and scale to 0-255
            frame_uint8 = (frame * 255).astype(np.uint8)
            # Ensure the frame has 3 channels (RGB) - handle potential alpha channel or grayscale
            if frame_uint8.shape[-1] == 4:
                frame_uint8 = frame_uint8[..., :3] # Drop alpha channel
            elif frame_uint8.ndim == 2:
                 frame_uint8 = np.stack([frame_uint8] * 3, axis=-1) # Convert grayscale to RGB

            processed_frames.append(frame_uint8)


        with imageio.get_writer(output_filename, fps=8) as writer:
            for frame in processed_frames:
                writer.append_data(frame)
        print(f"Video saved successfully to {output_filename}")
    except Exception as e:
        print(f"Error saving video: {e}")
else:
    print("No video frames available to save.")

Error saving video: Image must have 1, 2, 3 or 4 channels


In [8]:
if 'video_frames' in locals() and video_frames is not None:
    import imageio
    import numpy as np
    from PIL import Image

    output_filename = "generated_video.mp4"
    try:
        processed_frames = []
        for frame in video_frames:
            # Assuming the frame is a PyTorch Tensor or NumPy array in the range [0, 1] or [0, 255]
            # Convert to NumPy array if it's a Tensor
            if isinstance(frame, torch.Tensor):
                frame = frame.permute(1, 2, 0).cpu().numpy() # Convert from (C, H, W) to (H, W, C) and to numpy

            # Convert to uint8 if not already
            if frame.dtype != np.uint8:
                 # Scale to 0-255 if in range [0, 1]
                 if np.max(frame) <= 1.0:
                     frame = (frame * 255).astype(np.uint8)
                 else:
                     frame = frame.astype(np.uint8)


            # Ensure the frame has 3 channels (RGB)
            if frame.ndim == 2:
                 frame = np.stack([frame] * 3, axis=-1) # Convert grayscale to RGB
            elif frame.shape[-1] == 1:
                 frame = np.concatenate([frame] * 3, axis=-1) # Convert grayscale (with channel dim) to RGB
            elif frame.shape[-1] == 4:
                frame = frame[..., :3] # Drop alpha channel

            processed_frames.append(frame)


        with imageio.get_writer(output_filename, fps=8) as writer:
            for frame in processed_frames:
                writer.append_data(frame)
        print(f"Video saved successfully to {output_filename}")
    except Exception as e:
        print(f"Error saving video: {e}")
else:
    print("No video frames available to save.")

Error saving video: Image must have 1, 2, 3 or 4 channels


In [9]:
if 'video_frames' in locals() and video_frames is not None:
    import imageio
    import numpy as np
    from PIL import Image # Keep import in case it's needed for conversion later
    import torch # Keep import in case it's needed for conversion later

    output_filename = "generated_video.mp4"
    try:
        processed_frames = []
        for i, frame in enumerate(video_frames):
            # Assuming the frame is a PyTorch Tensor or NumPy array in the range [0, 1] or [0, 255]
            # Convert to NumPy array if it's a Tensor
            if isinstance(frame, torch.Tensor):
                # Ensure the tensor is on CPU before converting to numpy
                frame = frame.detach().cpu().numpy()

            # Convert to uint8 if not already
            if frame.dtype != np.uint8:
                 # Scale to 0-255 if in range [0, 1]
                 if np.max(frame) <= 1.0 and np.min(frame) >= 0.0:
                     frame = (frame * 255).astype(np.uint8)
                 else:
                    # If not in [0, 1] range, simply convert type, assuming it might be close to uint8 range
                    frame = frame.astype(np.uint8)


            # Ensure the frame has 3 channels (RGB)
            if frame.ndim == 2:
                 frame = np.stack([frame] * 3, axis=-1) # Convert grayscale to RGB
            elif frame.shape[-1] == 1:
                 frame = np.concatenate([frame] * 3, axis=-1) # Convert grayscale (with channel dim) to RGB
            elif frame.shape[-1] == 4:
                frame = frame[..., :3] # Drop alpha channel

            processed_frames.append(frame)
            print(f"Processing frame {i}: Original shape {video_frames[i].shape if isinstance(video_frames[i], torch.Tensor) else video_frames[i].shape}, Processed shape {frame.shape}, Processed dtype {frame.dtype}")


        with imageio.get_writer(output_filename, fps=8) as writer:
            for i, frame in enumerate(processed_frames):
                 print(f"Appending frame {i}: Shape {frame.shape}, Dtype {frame.dtype}")
                 writer.append_data(frame)

        print(f"Video saved successfully to {output_filename}")
    except Exception as e:
        print(f"Error saving video: {e}")
else:
    print("No video frames available to save.")

Processing frame 0: Original shape (25, 256, 256, 3), Processed shape (25, 256, 256, 3), Processed dtype uint8
Appending frame 0: Shape (25, 256, 256, 3), Dtype uint8
Error saving video: Image must have 1, 2, 3 or 4 channels


In [10]:
if 'video_frames' in locals() and video_frames is not None:
    import imageio
    import numpy as np
    # No need to import PIL or torch again if not explicitly converting from those types

    output_filename = "generated_video.mp4"
    try:
        # Assume video_frames is a list of frames or a single numpy array/torch tensor where the first dimension is the frame index
        # If it's a single tensor/array, iterate through the first dimension
        if isinstance(video_frames, (np.ndarray, torch.Tensor)):
             frames_to_save = video_frames
        elif isinstance(video_frames, list):
             # If it's a list, assume each element is a frame
             frames_to_save = video_frames
        else:
             print("Unsupported format for video_frames.")
             frames_to_save = None


        if frames_to_save is not None:
            with imageio.get_writer(output_filename, fps=8) as writer:
                # Iterate through the frames individually
                for i, frame in enumerate(frames_to_save):
                    # Ensure the frame is a NumPy array
                    if isinstance(frame, torch.Tensor):
                        frame = frame.detach().cpu().numpy()

                    # Convert to uint8 if not already
                    if frame.dtype != np.uint8:
                         # Scale to 0-255 if in range [0, 1]
                         if np.max(frame) <= 1.0 and np.min(frame) >= 0.0:
                             frame = (frame * 255).astype(np.uint8)
                         else:
                            # If not in [0, 1] range, simply convert type, assuming it might be close to uint8 range
                            frame = frame.astype(np.uint8)

                    # Ensure the frame has 3 channels (RGB) and is in (H, W, C) format
                    if frame.ndim == 2:
                         frame = np.stack([frame] * 3, axis=-1) # Convert grayscale to RGB
                    elif frame.shape[-1] == 1:
                         frame = np.concatenate([frame] * 3, axis=-1) # Convert grayscale (with channel dim) to RGB
                    elif frame.shape[-1] == 4:
                        frame = frame[..., :3] # Drop alpha channel
                    elif frame.shape[0] in [1, 2, 3, 4] and frame.ndim == 3:
                         # If shape is (C, H, W), permute to (H, W, C)
                         frame = frame.transpose(1, 2, 0)

                    # Final check on shape before appending
                    if frame.ndim == 3 and frame.shape[-1] in [1, 3, 4]:
                         # print(f"Appending frame {i}: Shape {frame.shape}, Dtype {frame.dtype}") # Debugging print
                         writer.append_data(frame)
                    else:
                         print(f"Skipping frame {i} due to incorrect shape: {frame.shape}")


            print(f"Video saved successfully to {output_filename}")
    except Exception as e:
        print(f"Error saving video: {e}")
else:
    print("No video frames available to save.")

Skipping frame 0 due to incorrect shape: (25, 256, 256, 3)
Video saved successfully to generated_video.mp4


In [11]:
if 'video_frames' in locals() and video_frames is not None:
    import imageio
    import numpy as np
    # No need to import PIL or torch again if not explicitly converting from those types

    output_filename = "generated_video.mp4"
    try:
        # Assume video_frames is a single numpy array or torch tensor where the first dimension is the frame index
        if isinstance(video_frames, (np.ndarray, torch.Tensor)):
             # Iterate through the first dimension to get individual frames
             frames_to_save = video_frames
        elif isinstance(video_frames, list):
             # If it's a list, assume each element is a frame
             frames_to_save = video_frames
        else:
             print("Unsupported format for video_frames.")
             frames_to_save = None


        if frames_to_save is not None:
            with imageio.get_writer(output_filename, fps=8) as writer:
                # Correctly iterate through the individual frames
                for i in range(frames_to_save.shape[0] if isinstance(frames_to_save, (np.ndarray, torch.Tensor)) else len(frames_to_save)):
                    frame = frames_to_save[i]

                    # Ensure the frame is a NumPy array
                    if isinstance(frame, torch.Tensor):
                        frame = frame.detach().cpu().numpy()

                    # Convert to uint8 if not already
                    if frame.dtype != np.uint8:
                         # Scale to 0-255 if in range [0, 1]
                         if np.max(frame) <= 1.0 and np.min(frame) >= 0.0:
                             frame = (frame * 255).astype(np.uint8)
                         else:
                            # If not in [0, 1] range, simply convert type, assuming it might be close to uint8 range
                            frame = frame.astype(np.uint8)

                    # Ensure the frame has 3 channels (RGB) and is in (H, W, C) format
                    if frame.ndim == 2:
                         frame = np.stack([frame] * 3, axis=-1) # Convert grayscale to RGB
                    elif frame.shape[-1] == 1:
                         frame = np.concatenate([frame] * 3, axis=-1) # Convert grayscale (with channel dim) to RGB
                    elif frame.shape[0] in [1, 3, 4] and frame.ndim == 3:
                         # If shape is (C, H, W), permute to (H, W, C)
                         frame = frame.transpose(1, 2, 0)
                    elif frame.shape[-1] == 4:
                        frame = frame[..., :3] # Drop alpha channel


                    # Final check on shape before appending
                    if frame.ndim == 3 and frame.shape[-1] in [1, 3, 4]:
                         # print(f"Appending frame {i}: Shape {frame.shape}, Dtype {frame.dtype}") # Debugging print
                         writer.append_data(frame)
                    else:
                         print(f"Skipping frame {i} due to incorrect shape: {frame.shape}")


            print(f"Video saved successfully to {output_filename}")
    except Exception as e:
        print(f"Error saving video: {e}")
else:
    print("No video frames available to save.")

Skipping frame 0 due to incorrect shape: (25, 256, 256, 3)
Video saved successfully to generated_video.mp4


In [12]:
if 'video_frames' in locals() and video_frames is not None:
    # No need to import imageio, numpy, PIL, or torch again

    output_filename = "generated_video.mp4"
    try:
        # Assume video_frames is a single numpy array or torch tensor where the first dimension is the frame index
        if isinstance(video_frames, (np.ndarray, torch.Tensor)):
             frames_array = video_frames
        elif isinstance(video_frames, list):
             # If it's a list, convert to a numpy array for consistent processing
             frames_array = np.array(video_frames)
        else:
             print("Unsupported format for video_frames.")
             frames_array = None


        if frames_array is not None:
            with imageio.get_writer(output_filename, fps=8) as writer:
                # Correctly iterate through the individual frames by indexing the first dimension
                for i in range(frames_array.shape[0]):
                    frame = frames_array[i]

                    # Ensure the frame is a NumPy array and in uint8 format
                    if isinstance(frame, torch.Tensor):
                        frame = frame.detach().cpu().numpy()

                    if frame.dtype != np.uint8:
                         if np.max(frame) <= 1.0 and np.min(frame) >= 0.0:
                             frame = (frame * 255).astype(np.uint8)
                         else:
                            frame = frame.astype(np.uint8)

                    # Ensure the frame has 3 channels (RGB) and is in (H, W, C) format
                    if frame.ndim == 2:
                         frame = np.stack([frame] * 3, axis=-1) # Convert grayscale to RGB
                    elif frame.shape[-1] == 1:
                         frame = np.concatenate([frame] * 3, axis=-1) # Convert grayscale (with channel dim) to RGB
                    elif frame.shape[0] in [1, 3, 4] and frame.ndim == 3:
                         # If shape is (C, H, W), permute to (H, W, C)
                         frame = frame.transpose(1, 2, 0)
                    elif frame.shape[-1] == 4:
                        frame = frame[..., :3] # Drop alpha channel


                    # Final check on shape before appending
                    if frame.ndim == 3 and frame.shape[-1] in [1, 3, 4]:
                         # print(f"Appending frame {i}: Shape {frame.shape}, Dtype {frame.dtype}") # Debugging print
                         writer.append_data(frame)
                    else:
                         print(f"Skipping frame {i} due to incorrect shape: {frame.shape}")


            print(f"Video saved successfully to {output_filename}")
    except Exception as e:
        print(f"Error saving video: {e}")
else:
    print("No video frames available to save.")

Skipping frame 0 due to incorrect shape: (25, 256, 256, 3)
Video saved successfully to generated_video.mp4


In [13]:
if 'video_frames' in locals() and video_frames is not None:
    # No need to import imageio, numpy, PIL, or torch again

    output_filename = "generated_video.mp4"
    try:
        # Assume video_frames is a single numpy array or torch tensor where the first dimension is the frame index
        if isinstance(video_frames, (np.ndarray, torch.Tensor)):
             frames_array = video_frames
        elif isinstance(video_frames, list):
             # If it's a list, convert to a numpy array for consistent processing
             frames_array = np.array(video_frames)
        else:
             print("Unsupported format for video_frames.")
             frames_array = None


        if frames_array is not None:
            with imageio.get_writer(output_filename, fps=8) as writer:
                # Correctly iterate through the individual frames by indexing the first dimension
                for i in range(frames_array.shape[0]):
                    frame = frames_array[i]

                    # Ensure the frame is a NumPy array and in uint8 format
                    if isinstance(frame, torch.Tensor):
                        frame = frame.detach().cpu().numpy()

                    if frame.dtype != np.uint8:
                         if np.max(frame) <= 1.0 and np.min(frame) >= 0.0:
                             frame = (frame * 255).astype(np.uint8)
                         else:
                            frame = frame.astype(np.uint8)

                    # Ensure the frame has 3 channels (RGB) and is in (H, W, C) format
                    # This logic should apply to the individual frame's shape
                    if frame.ndim == 2:
                         frame = np.stack([frame] * 3, axis=-1) # Convert grayscale to RGB
                    elif frame.shape[-1] == 1:
                         frame = np.concatenate([frame] * 3, axis=-1) # Convert grayscale (with channel dim) to RGB
                    elif frame.shape[0] in [1, 3, 4] and frame.ndim == 3:
                         # If shape is (C, H, W), permute to (H, W, C)
                         frame = frame.transpose(1, 2, 0)
                    elif frame.shape[-1] == 4:
                        frame = frame[..., :3] # Drop alpha channel


                    # Final check on shape before appending
                    # This check should be against the shape of the individual frame
                    if frame.ndim == 3 and frame.shape[-1] in [1, 3, 4]:
                         # print(f"Appending frame {i}: Shape {frame.shape}, Dtype {frame.dtype}") # Debugging print
                         writer.append_data(frame)
                    else:
                         # This print indicates an issue with the processing logic or the input frame format
                         print(f"Skipping frame {i} due to incorrect shape after processing: {frame.shape}")


            print(f"Video saved successfully to {output_filename}")
    except Exception as e:
        print(f"Error saving video: {e}")
else:
    print("No video frames available to save.")

Skipping frame 0 due to incorrect shape after processing: (25, 256, 256, 3)
Video saved successfully to generated_video.mp4


In [14]:
if 'video_frames' in locals() and video_frames is not None:
    output_filename = "generated_video.mp4"
    try:
        # Assume video_frames is a single numpy array or torch tensor where the first dimension is the frame index
        if isinstance(video_frames, (np.ndarray, torch.Tensor)):
             frames_array = video_frames
        elif isinstance(video_frames, list):
             # If it's a list, convert to a numpy array for consistent processing
             frames_array = np.array(video_frames)
        else:
             print("Unsupported format for video_frames.")
             frames_array = None


        if frames_array is not None:
            with imageio.get_writer(output_filename, fps=8) as writer:
                # Correctly iterate through the individual frames by indexing the first dimension
                for i in range(frames_array.shape[0]):
                    frame = frames_array[i]

                    # Ensure the frame is a NumPy array and in uint8 format
                    if isinstance(frame, torch.Tensor):
                        frame = frame.detach().cpu().numpy()

                    if frame.dtype != np.uint8:
                         if np.max(frame) <= 1.0 and np.min(frame) >= 0.0:
                             frame = (frame * 255).astype(np.uint8)
                         else:
                            frame = frame.astype(np.uint8)

                    # Ensure the frame has 3 channels (RGB) and is in (H, W, C) format
                    # This logic should apply to the individual frame's shape
                    if frame.ndim == 2:
                         frame = np.stack([frame] * 3, axis=-1) # Convert grayscale to RGB
                    elif frame.shape[-1] == 1:
                         frame = np.concatenate([frame] * 3, axis=-1) # Convert grayscale (with channel dim) to RGB
                    elif frame.shape[0] in [1, 3, 4] and frame.ndim == 3:
                         # If shape is (C, H, W), permute to (H, W, C)
                         frame = frame.transpose(1, 2, 0)
                    elif frame.shape[-1] == 4:
                        frame = frame[..., :3] # Drop alpha channel


                    # Final check on shape before appending
                    # This check should be against the shape of the individual frame
                    if frame.ndim == 3 and frame.shape[-1] in [1, 3, 4]:
                         # print(f"Appending frame {i}: Shape {frame.shape}, Dtype {frame.dtype}") # Debugging print
                         writer.append_data(frame)
                    else:
                         # This print indicates an issue with the processing logic or the input frame format
                         print(f"Skipping frame {i} due to incorrect shape after processing: {frame.shape}")


            print(f"Video saved successfully to {output_filename}")
    except Exception as e:
        print(f"Error saving video: {e}")
else:
    print("No video frames available to save.")

Skipping frame 0 due to incorrect shape after processing: (25, 256, 256, 3)
Video saved successfully to generated_video.mp4


In [15]:
if 'video_frames' in locals() and video_frames is not None:
    output_filename = "generated_video.mp4"
    try:
        # Assume video_frames is a single numpy array or torch tensor where the first dimension is the frame index
        if isinstance(video_frames, (np.ndarray, torch.Tensor)):
             frames_array = video_frames
        elif isinstance(video_frames, list):
             # If it's a list, convert to a numpy array for consistent processing
             frames_array = np.array(video_frames)
        else:
             print("Unsupported format for video_frames.")
             frames_array = None


        if frames_array is not None:
            with imageio.get_writer(output_filename, fps=8) as writer:
                # Iterate through the first dimension to get individual frames
                for i in range(frames_array.shape[0]):
                    # Extract the individual frame
                    frame = frames_array[i]

                    # Ensure the frame is a NumPy array and in uint8 format
                    if isinstance(frame, torch.Tensor):
                        frame = frame.detach().cpu().numpy()

                    if frame.dtype != np.uint8:
                         if np.max(frame) <= 1.0 and np.min(frame) >= 0.0:
                             frame = (frame * 255).astype(np.uint8)
                         else:
                            frame = frame.astype(np.uint8)

                    # Ensure the frame has 3 channels (RGB) and is in (H, W, C) format
                    # This logic should apply to the individual frame's shape
                    if frame.ndim == 2:
                         frame = np.stack([frame] * 3, axis=-1) # Convert grayscale to RGB
                    elif frame.shape[-1] == 1:
                         frame = np.concatenate([frame] * 3, axis=-1) # Convert grayscale (with channel dim) to RGB
                    elif frame.shape[0] in [1, 3, 4] and frame.ndim == 3:
                         # If shape is (C, H, W), permute to (H, W, C)
                         frame = frame.transpose(1, 2, 0)
                    elif frame.shape[-1] == 4:
                        frame = frame[..., :3] # Drop alpha channel

                    # Final check on shape before appending
                    # This check should be against the shape of the individual frame
                    if frame.ndim == 3 and frame.shape[-1] in [3]: # Only accept 3 channel RGB for saving
                         # print(f"Appending frame {i}: Shape {frame.shape}, Dtype {frame.dtype}") # Debugging print
                         writer.append_data(frame)
                    else:
                         # This print indicates an issue with the processing logic or the input frame format
                         print(f"Skipping frame {i} due to incorrect shape after processing: {frame.shape}")


            print(f"Video saved successfully to {output_filename}")
    except Exception as e:
        print(f"Error saving video: {e}")
else:
    print("No video frames available to save.")

Skipping frame 0 due to incorrect shape after processing: (25, 256, 256, 3)
Video saved successfully to generated_video.mp4


In [16]:
if 'video_frames' in locals() and video_frames is not None:
    output_filename = "generated_video.mp4"
    try:
        frames_to_process = None

        # Determine if video_frames is a list or a single array/tensor
        if isinstance(video_frames, (np.ndarray, torch.Tensor)):
             # If it's a single array/tensor, assume the first dimension is the frame index
             frames_to_process = video_frames
        elif isinstance(video_frames, list):
             # If it's a list, assume each element is a frame
             frames_to_process = video_frames
        else:
             print("Unsupported format for video_frames.")


        if frames_to_process is not None:
            with imageio.get_writer(output_filename, fps=8) as writer:
                # Iterate through the frames
                if isinstance(frames_to_process, (np.ndarray, torch.Tensor)):
                    # Iterate through the first dimension for numpy array/tensor
                    for i in range(frames_to_process.shape[0]):
                        frame = frames_to_process[i]

                        # Ensure the frame is a NumPy array and in uint8 format
                        if isinstance(frame, torch.Tensor):
                            frame = frame.detach().cpu().numpy()

                        if frame.dtype != np.uint8:
                            # Scale to 0-255 if in range [0, 1]
                            if np.max(frame) <= 1.0 and np.min(frame) >= 0.0:
                                frame = (frame * 255).astype(np.uint8)
                            else:
                                frame = frame.astype(np.uint8)

                        # Ensure the frame has 3 channels (RGB) and is in (H, W, C) format
                        if frame.ndim == 2:
                            frame = np.stack([frame] * 3, axis=-1) # Convert grayscale to RGB
                        elif frame.shape[-1] == 1:
                            frame = np.concatenate([frame] * 3, axis=-1) # Convert grayscale (with channel dim) to RGB
                        elif frame.shape[0] in [1, 3, 4] and frame.ndim == 3:
                            # If shape is (C, H, W), permute to (H, W, C)
                            frame = frame.transpose(1, 2, 0)
                        elif frame.shape[-1] == 4:
                            frame = frame[..., :3] # Drop alpha channel


                        # Final check on shape before appending - should be (H, W, C) with C=1, 3, or 4
                        if frame.ndim == 3 and frame.shape[-1] in [1, 3, 4]:
                            writer.append_data(frame)
                        else:
                            print(f"Skipping frame {i} due to incorrect shape before appending: {frame.shape}")

                elif isinstance(frames_to_process, list):
                    # Iterate through the list elements
                    for i, frame in enumerate(frames_to_process):
                         # Ensure the frame is a NumPy array and in uint8 format
                        if isinstance(frame, torch.Tensor):
                            frame = frame.detach().cpu().numpy()
                        elif not isinstance(frame, np.ndarray):
                             print(f"Skipping frame {i} due to unsupported type in list: {type(frame)}")
                             continue # Skip to the next frame if not a recognized type


                        if frame.dtype != np.uint8:
                            # Scale to 0-255 if in range [0, 1]
                            if np.max(frame) <= 1.0 and np.min(frame) >= 0.0:
                                frame = (frame * 255).astype(np.uint8)
                            else:
                                frame = frame.astype(np.uint8)

                        # Ensure the frame has 3 channels (RGB) and is in (H, W, C) format
                        if frame.ndim == 2:
                            frame = np.stack([frame] * 3, axis=-1) # Convert grayscale to RGB
                        elif frame.shape[-1] == 1:
                            frame = np.concatenate([frame] * 3, axis=-1) # Convert grayscale (with channel dim) to RGB
                        elif frame.shape[0] in [1, 3, 4] and frame.ndim == 3:
                            # If shape is (C, H, W), permute to (H, W, C)
                            frame = frame.transpose(1, 2, 0)
                        elif frame.shape[-1] == 4:
                            frame = frame[..., :3] # Drop alpha channel


                        # Final check on shape before appending - should be (H, W, C) with C=1, 3, or 4
                        if frame.ndim == 3 and frame.shape[-1] in [1, 3, 4]:
                            writer.append_data(frame)
                        else:
                            print(f"Skipping frame {i} due to incorrect shape before appending: {frame.shape}")


            print(f"Video saved successfully to {output_filename}")
    except Exception as e:
        print(f"Error saving video: {e}")
else:
    print("No video frames available to save.")

Skipping frame 0 due to incorrect shape before appending: (25, 256, 256, 3)
Video saved successfully to generated_video.mp4


In [17]:
import imageio
import numpy as np
import torch

if 'video_frames' in locals() and video_frames is not None:
    output_filename = "generated_video.mp4"
    try:
        frames_to_save = None

        # 1. Handle list containing a single array/tensor or direct array/tensor
        if isinstance(video_frames, list) and len(video_frames) == 1 and isinstance(video_frames[0], (np.ndarray, torch.Tensor)):
            frames_array = video_frames[0] # Extract the single array/tensor from the list
        elif isinstance(video_frames, (np.ndarray, torch.Tensor)):
             # If it's already a single array/tensor, use it directly
             frames_array = video_frames
        else:
             print("Unsupported format for video_frames. Expected a NumPy array, PyTorch Tensor, or a list containing one of these.")
             frames_array = None # Set to None if format is not supported


        if frames_array is not None:
             # Ensure frames_array is a numpy array
            if isinstance(frames_array, torch.Tensor):
                frames_array = frames_array.detach().cpu().numpy()

            # 4. Permute if in (num_frames, C, H, W) format
            # Check if the first dimension is likely channels based on a common range (1 to 4 channels)
            if frames_array.ndim == 4 and frames_array.shape[1] in [1, 3, 4] and frames_array.shape[-1] not in [1, 3, 4]:
                 print(f"Permuting shape from {frames_array.shape} (assuming NCHW) to NHWC.")
                 frames_array = frames_array.transpose(0, 2, 3, 1) # Permute to (num_frames, H, W, C)


            # 5. Ensure the resulting array has the shape (num_frames, H, W, C) with C being 1, 3, or 4.
            if not (frames_array.ndim == 4 and frames_array.shape[-1] in [1, 3, 4]):
                 print(f"Error: Unexpected shape for frames_array after processing: {frames_array.shape}. Expected (num_frames, H, W, C) with C=1, 3, or 4.")
                 frames_array = None # Set to None to prevent further processing


        if frames_array is not None:
            with imageio.get_writer(output_filename, fps=8) as writer:
                # 6. Iterate through the first dimension (frames).
                for i in range(frames_array.shape[0]):
                    frame = frames_array[i] # This should now be a (H, W, C) array

                    # 7. Ensure it is a NumPy array and in uint8 format, scaling if necessary.
                    if frame.dtype != np.uint8:
                         # Scale to 0-255 if in range [0, 1]
                         if np.max(frame) <= 1.0 and np.min(frame) >= 0.0:
                             frame = (frame * 255).astype(np.uint8)
                         else:
                            # If not in [0, 1] range, simply convert type, assuming it might be close to uint8 range
                            frame = frame.astype(np.uint8)

                    # 8. Ensure the individual frame has 3 channels (RGB), converting from grayscale or dropping alpha if needed.
                    if frame.ndim == 2:
                         frame = np.stack([frame] * 3, axis=-1) # Convert grayscale (H, W) to RGB (H, W, 3)
                    elif frame.shape[-1] == 1:
                         frame = np.concatenate([frame] * 3, axis=-1) # Convert grayscale (H, W, 1) to RGB (H, W, 3)
                    elif frame.shape[-1] == 4:
                        frame = frame[..., :3] # Drop alpha channel (H, W, 4) to RGB (H, W, 3)


                    # Final check before appending - must be (H, W, 3) for imageio writer when writing to mp4
                    if frame.ndim == 3 and frame.shape[-1] == 3:
                         writer.append_data(frame)
                    else:
                         # This print indicates an issue with the processing logic or the input frame format
                         print(f"Skipping frame {i} due to incorrect shape before appending: {frame.shape}. Expected (H, W, 3).")


            # 10. Print a success message if the video is saved
            print(f"Video saved successfully to {output_filename}")
    except Exception as e:
        # 10. Print an error message if any exception occurs.
        print(f"Error saving video: {e}")
else:
    print("No video frames available to save.")


Error: Unexpected shape for frames_array after processing: (1, 25, 256, 256, 3). Expected (num_frames, H, W, C) with C=1, 3, or 4.
