# **LTX-VIDEO WITH START & END FRAMES**

- You can use the free T4 GPU to run this. For faster video generation, use higher GPUs.
- This notebook is mainly for generating animations from simple transitions between two images. It doesn't do well for fast motions like walking or running.
- Use detailed prompts to generate good videos.

In [None]:
# @title Prepare Environment
!pip install --upgrade --quiet torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
%cd /content
Always_Load_Models_for_Inference = False
Use_t5xxl_fp16 = False

!pip install -q torchsde einops diffusers accelerate xformers
!pip install av
!git clone https://github.com/Isi-dev/ComfyUI
%cd /content/ComfyUI
!apt -y install -qq aria2 ffmpeg

!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/Isi99999/LTX-Video/resolve/main/ltx-video-2b-v0.9.5.safetensors -d /content/ComfyUI/models/checkpoints -o ltx-video-2b-v0.9.5.safetensors
if Use_t5xxl_fp16:
    !aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/Isi99999/LTX-Video/resolve/main/t5xxl_fp16.safetensors -d /content/ComfyUI/models/text_encoders -o t5xxl_fp16.safetensors
else:
    !aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/Isi99999/LTX-Video/resolve/main/t5xxl_fp8_e4m3fn_scaled.safetensors -d /content/ComfyUI/models/text_encoders -o t5xxl_fp8_e4m3fn_scaled.safetensors

import torch
import numpy as np
from PIL import Image
import gc
import sys
import random
import os
import imageio
from google.colab import files
from IPython.display import display, HTML
sys.path.insert(0, '/content/ComfyUI')

from comfy import model_management

from nodes import (
    CheckpointLoaderSimple,
    CLIPLoader,
    CLIPTextEncode,
    VAEDecode,
    LoadImage
)

from comfy_extras.nodes_custom_sampler import (
    KSamplerSelect,
    SamplerCustom
)

from comfy_extras.nodes_lt import (
    EmptyLTXVLatentVideo,
    LTXVPreprocess,
    LTXVAddGuide,
    LTXVScheduler,
    LTXVConditioning,
    LTXVCropGuides
)

checkpoint_loader = CheckpointLoaderSimple()
clip_loader = CLIPLoader()
clip_encode_positive = CLIPTextEncode()
clip_encode_negative = CLIPTextEncode()
load_image = LoadImage()
empty_latent = EmptyLTXVLatentVideo()
preprocess = LTXVPreprocess()
add_guide = LTXVAddGuide()
scheduler = LTXVScheduler()
sampler_select = KSamplerSelect()
conditioning = LTXVConditioning()
sampler = SamplerCustom()
vae_decode = VAEDecode()
crop_guides = LTXVCropGuides()

def clear_gpu_memory():
    gc.collect()
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
        torch.cuda.ipc_collect()
    for obj in list(globals().values()):
        if torch.is_tensor(obj) or (hasattr(obj, "data") and torch.is_tensor(obj.data)):
            del obj
    gc.collect()

def upload_image():
    """Handle image upload in Colab and store in /content/ComfyUI/input/"""
    from google.colab import files
    import os
    import shutil

    os.makedirs('/content/ComfyUI/input', exist_ok=True)

    uploaded = files.upload()

    # Move each uploaded file to ComfyUI input directory
    for filename in uploaded.keys():
        src_path = f'/content/ComfyUI/{filename}'
        dest_path = f'/content/ComfyUI/input/{filename}'

        shutil.move(src_path, dest_path)
        print(f"Image saved to: {dest_path}")
        return dest_path

    return None

def generate_video(
    image_path: str = None,
    guide_image_path: str = None,
    positive_prompt: str = "A red fox moving gracefully, its russet coat vibrant against the white landscape, leaving perfect star-shaped prints behind as steam rises from its breath in the crisp winter air. The scene is wrapped in snow-muffled silence, broken only by the gentle murmur of water still flowing beneath the ice.",
    negative_prompt: str = "low quality, worst quality, deformed, distorted, disfigured, motion smear, motion artifacts, fused fingers, bad anatomy, weird hand, ugly",
    width: int = 768,
    height: int = 512,
    seed: int = 397166166231987,
    steps: int = 30,
    cfg_scale: float = 2.05,
    sampler_name: str = "euler",
    length: int = 97,
    fps: int = 24,
    guide_strength: float = 0.1,
    guide_frame: int = -1
):
    with torch.inference_mode():
        print("Loading Text_Encoder...")
        clip = clip_loader.load_clip("t5xxl_fp8_e4m3fn_scaled.safetensors", "ltxv", "default")[0]
        print("Loaded Text_Encoder!")

    try:
        assert width % 32 == 0, "Width must be divisible by 32"
        assert height % 32 == 0, "Height must be divisible by 32"

        positive = clip_encode_positive.encode(clip, positive_prompt)[0]
        negative = clip_encode_negative.encode(clip, negative_prompt)[0]

        del clip
        torch.cuda.empty_cache()
        gc.collect()
        print("Text_Encoder removed from memory")

        if image_path is None:
            print("Please upload the main image file:")
            image_path = upload_image()
        if image_path is None:
            print("No main image uploaded!")

        if guide_image_path is None:
            print("Please upload the guide image file:")
            guide_image_path = upload_image()
        if guide_image_path is None:
            print("No guide image uploaded!")

        loaded_image = load_image.load_image(image_path)[0]
        processed_image = preprocess.preprocess(loaded_image, 35)[0]

        loaded_guide_image = load_image.load_image(guide_image_path)[0]
        processed_guide_image = preprocess.preprocess(loaded_guide_image, 40)[0]

        print("Loading model & VAE...")
        model, _, vae = checkpoint_loader.load_checkpoint("ltx-video-2b-v0.9.5.safetensors")
        print("Loaded model & VAE!")

        # Create empty latent video
        latent_video = empty_latent.generate(width, height, length)[0]

        # First guide pass
        guided_positive, guided_negative, guided_latent_1 = add_guide.generate(
            positive=positive,
            negative=negative,
            vae=vae,
            latent=latent_video,
            image=processed_image,
            frame_idx=0,
            strength=1
        )

        # Second guide pass (from the other image)
        guided_positive, guided_negative, guided_latent = add_guide.generate(
            positive=guided_positive,
            negative=guided_negative,
            vae=vae,
            latent=guided_latent_1,
            image=processed_guide_image,
            frame_idx=guide_frame,
            strength=guide_strength
        )

        # Get sigmas for sampling
        sigmas = scheduler.get_sigmas(steps, cfg_scale, 0.95, True, 0.1, guided_latent_1)[0]
        selected_sampler = sampler_select.get_sampler(sampler_name)[0]

        # Apply conditioning
        conditioned_positive, conditioned_negative = conditioning.append(
            guided_positive,
            guided_negative,
            25.0
        )

        print("Generating video...")

        # Sample the video
        sampled = sampler.sample(
            model=model,
            add_noise=True,
            noise_seed=seed if seed != 0 else random.randint(0, 2**32),
            cfg=cfg_scale,
            positive=conditioned_positive,
            negative=conditioned_negative,
            sampler=selected_sampler,
            sigmas=sigmas,
            latent_image=guided_latent
        )[0]

        # Crop guides if needed
        cropped_latent = crop_guides.crop(
            conditioned_positive,
            conditioned_negative,
            sampled
        )[2]

        del model
        torch.cuda.empty_cache()
        gc.collect()
        print("Model removed from memory")

        with torch.no_grad():
            try:
                print("Decoding Latents...")
                decoded = vae_decode.decode(vae, cropped_latent)[0].detach()
                print("Latents Decoded!")
                del vae
                torch.cuda.empty_cache()
                gc.collect()
                print("VAE removed from memory")
            except Exception as e:
                print(f"Error during decoding: {str(e)}")
                raise

        # Save as MP4
        output_path = "/content/output.mp4"
        frames_np = (decoded.cpu().numpy() * 255).astype(np.uint8)
        with imageio.get_writer(output_path, fps=fps) as writer:
            for frame in frames_np:
                writer.append_data(frame)

        print(f"\nVideo generation complete!")
        print(f"Saved {len(decoded)} frames to {output_path}")
        display_video(output_path)

    except Exception as e:
        print(f"Error during video generation: {str(e)}")
        raise
    finally:
        clear_gpu_memory()

def display_video(video_path):
    """Display video in Colab notebook with proper HTML5 player"""
    from IPython.display import HTML
    from base64 import b64encode

    mp4 = open(video_path,'rb').read()
    data_url = "data:video/mp4;base64," + b64encode(mp4).decode()

    display(HTML(f"""
    <video width=512 controls autoplay loop>
        <source src="{data_url}" type="video/mp4">
    </video>
    """))



In [None]:
# @title Run Video Generation
positive_prompt = "Flowers growing from the sides of a vase" # @param {"type":"string"}
negative_prompt = "low quality, worst quality, deformed, distorted, disfigured, motion smear, motion artifacts, fused fingers, bad anatomy, weird hand, ugly" # @param {"type":"string"}
width = 512 # @param {"type":"number"}
height = 768 # @param {"type":"number"}
seed = 397166166231987 # @param {"type":"integer"}
steps = 25 # @param {"type":"integer", "min":1, "max":100}
cfg_scale = 2.05 # @param {"type":"number", "min":1, "max":20}
sampler_name = "euler" # @param ["euler", "dpmpp_2m", "ddim", "lms"]
frames = 49 # @param {"type":"integer", "min":1, "max":120}
guide_strength = 1 # @param {"type":"number", "min":0, "max":1}
guide_frame = -1 # @param {"type":"integer"}

# @title Run Video Generation
print("Starting video generation workflow...")
with torch.inference_mode():
    generate_video(
        image_path=None,
        guide_image_path=None,
        positive_prompt=positive_prompt,
        negative_prompt=negative_prompt,
        width=width,
        height=height,
        seed=seed,
        steps=steps,
        cfg_scale=cfg_scale,
        sampler_name=sampler_name,
        length=frames,
        guide_strength=guide_strength,
        guide_frame=guide_frame
    )
clear_gpu_memory()